mirror of
https://github.com/zama-ai/tfhe-rs.git
synced 2026-01-11 15:48:20 -05:00
Compare commits
56 Commits
al/div_mul
...
release/0.
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
2bbcf6e5b3 | ||
|
|
0d7a88e640 | ||
|
|
77656cd055 | ||
|
|
8ce964cb18 | ||
|
|
4ea368d395 | ||
|
|
59b029e038 | ||
|
|
1403663033 | ||
|
|
0a307497cd | ||
|
|
0ce0567cef | ||
|
|
e9c19b419d | ||
|
|
5b653864b7 | ||
|
|
a1d189b415 | ||
|
|
c59434f183 | ||
|
|
83239e6afa | ||
|
|
ef8cb0273f | ||
|
|
9b353bac2d | ||
|
|
46d65f1f87 | ||
|
|
a63a2cb725 | ||
|
|
c45af05ec6 | ||
|
|
584eaeb4ed | ||
|
|
8d94ed2512 | ||
|
|
b8d9dbe85b | ||
|
|
ad25340c33 | ||
|
|
ad1ae0c8c2 | ||
|
|
ee40906b8b | ||
|
|
bf6b4cc541 | ||
|
|
24404567a4 | ||
|
|
052dd4a60e | ||
|
|
f8d829d076 | ||
|
|
d9761ca17e | ||
|
|
8d2e15347b | ||
|
|
a368257bc7 | ||
|
|
76d23d0c91 | ||
|
|
ddc5002232 | ||
|
|
c08c479616 | ||
|
|
f26afc16de | ||
|
|
13f533f6fb | ||
|
|
d9541e472b | ||
|
|
3453e45258 | ||
|
|
55de96f046 | ||
|
|
9747c06f6e | ||
|
|
00f72d2c13 | ||
|
|
01f5cb9056 | ||
|
|
d66e313fa4 | ||
|
|
c9d530e642 | ||
|
|
6c2096fe52 | ||
|
|
1e94134dda | ||
|
|
c76a60111c | ||
|
|
18ff400df2 | ||
|
|
3d31d09be5 | ||
|
|
76322606f2 | ||
|
|
bf58a9f0c6 | ||
|
|
64461c82b4 | ||
|
|
339c84fbd9 | ||
|
|
bc682a5ffb | ||
|
|
2920daf2d9 |
15
.github/workflows/aws_tfhe_fast_tests.yml
vendored
15
.github/workflows/aws_tfhe_fast_tests.yml
vendored
@@ -5,6 +5,8 @@ env:
|
||||
CARGO_TERM_COLOR: always
|
||||
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
RUSTFLAGS: "-C target-cpu=native"
|
||||
RUST_BACKTRACE: "full"
|
||||
RUST_MIN_STACK: "8388608"
|
||||
|
||||
on:
|
||||
# Allows you to run this workflow manually from the Actions tab as an alternative.
|
||||
@@ -24,13 +26,13 @@ on:
|
||||
description: "Action runner name"
|
||||
type: string
|
||||
request_id:
|
||||
description: 'Slab request ID'
|
||||
description: "Slab request ID"
|
||||
type: string
|
||||
fork_repo:
|
||||
description: 'Name of forked repo as user/repo'
|
||||
description: "Name of forked repo as user/repo"
|
||||
type: string
|
||||
fork_git_sha:
|
||||
description: 'Git SHA to checkout from fork'
|
||||
description: "Git SHA to checkout from fork"
|
||||
type: string
|
||||
|
||||
jobs:
|
||||
@@ -61,10 +63,9 @@ jobs:
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install latest stable
|
||||
uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af
|
||||
uses: dtolnay/rust-toolchain@be73d7920c329f220ce78e0234b8f96b7ae60248
|
||||
with:
|
||||
toolchain: stable
|
||||
default: true
|
||||
|
||||
- name: Run concrete-csprng tests
|
||||
run: |
|
||||
@@ -114,6 +115,10 @@ jobs:
|
||||
run: |
|
||||
make test_safe_deserialization
|
||||
|
||||
- name: Run forward compatibility tests
|
||||
run: |
|
||||
make test_forward_compatibility
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ always() }}
|
||||
continue-on-error: true
|
||||
|
||||
15
.github/workflows/aws_tfhe_gpu_tests.yml
vendored
15
.github/workflows/aws_tfhe_gpu_tests.yml
vendored
@@ -5,6 +5,8 @@ env:
|
||||
CARGO_TERM_COLOR: always
|
||||
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
RUSTFLAGS: "-C target-cpu=native"
|
||||
RUST_BACKTRACE: "full"
|
||||
RUST_MIN_STACK: "8388608"
|
||||
|
||||
on:
|
||||
# Allows you to run this workflow manually from the Actions tab as an alternative.
|
||||
@@ -63,7 +65,7 @@ jobs:
|
||||
echo "Fork git sha: ${{ inputs.fork_git_sha }}"
|
||||
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9
|
||||
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
|
||||
with:
|
||||
repository: ${{ inputs.fork_repo }}
|
||||
ref: ${{ inputs.fork_git_sha }}
|
||||
@@ -73,10 +75,9 @@ jobs:
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install latest stable
|
||||
uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af
|
||||
uses: dtolnay/rust-toolchain@be73d7920c329f220ce78e0234b8f96b7ae60248
|
||||
with:
|
||||
toolchain: stable
|
||||
default: true
|
||||
|
||||
- name: Export CUDA variables
|
||||
if: ${{ !cancelled() }}
|
||||
@@ -102,3 +103,11 @@ jobs:
|
||||
- name: Run all tests
|
||||
run: |
|
||||
make test_gpu
|
||||
|
||||
- name: Run user docs tests
|
||||
run: |
|
||||
make test_user_doc_gpu
|
||||
|
||||
- name: Test C API
|
||||
run: |
|
||||
make test_c_api_gpu
|
||||
|
||||
5
.github/workflows/aws_tfhe_integer_tests.yml
vendored
5
.github/workflows/aws_tfhe_integer_tests.yml
vendored
@@ -4,6 +4,8 @@ env:
|
||||
CARGO_TERM_COLOR: always
|
||||
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
RUSTFLAGS: "-C target-cpu=native"
|
||||
RUST_BACKTRACE: "full"
|
||||
RUST_MIN_STACK: "8388608"
|
||||
|
||||
on:
|
||||
# Allows you to run this workflow manually from the Actions tab as an alternative.
|
||||
@@ -60,10 +62,9 @@ jobs:
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install latest stable
|
||||
uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af
|
||||
uses: dtolnay/rust-toolchain@be73d7920c329f220ce78e0234b8f96b7ae60248
|
||||
with:
|
||||
toolchain: stable
|
||||
default: true
|
||||
|
||||
- name: Gen Keys if required
|
||||
run: |
|
||||
|
||||
@@ -4,6 +4,8 @@ env:
|
||||
CARGO_TERM_COLOR: always
|
||||
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
RUSTFLAGS: "-C target-cpu=native"
|
||||
RUST_BACKTRACE: "full"
|
||||
RUST_MIN_STACK: "8388608"
|
||||
|
||||
on:
|
||||
# Allows you to run this workflow manually from the Actions tab as an alternative.
|
||||
@@ -60,10 +62,9 @@ jobs:
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install latest stable
|
||||
uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af
|
||||
uses: dtolnay/rust-toolchain@be73d7920c329f220ce78e0234b8f96b7ae60248
|
||||
with:
|
||||
toolchain: stable
|
||||
default: true
|
||||
|
||||
- name: Gen Keys if required
|
||||
run: |
|
||||
|
||||
15
.github/workflows/aws_tfhe_tests.yml
vendored
15
.github/workflows/aws_tfhe_tests.yml
vendored
@@ -4,6 +4,8 @@ env:
|
||||
CARGO_TERM_COLOR: always
|
||||
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
RUSTFLAGS: "-C target-cpu=native"
|
||||
RUST_BACKTRACE: "full"
|
||||
RUST_MIN_STACK: "8388608"
|
||||
|
||||
on:
|
||||
# Allows you to run this workflow manually from the Actions tab as an alternative.
|
||||
@@ -23,13 +25,13 @@ on:
|
||||
description: "Action runner name"
|
||||
type: string
|
||||
request_id:
|
||||
description: 'Slab request ID'
|
||||
description: "Slab request ID"
|
||||
type: string
|
||||
fork_repo:
|
||||
description: 'Name of forked repo as user/repo'
|
||||
description: "Name of forked repo as user/repo"
|
||||
type: string
|
||||
fork_git_sha:
|
||||
description: 'Git SHA to checkout from fork'
|
||||
description: "Git SHA to checkout from fork"
|
||||
type: string
|
||||
|
||||
jobs:
|
||||
@@ -60,10 +62,9 @@ jobs:
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install latest stable
|
||||
uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af
|
||||
uses: dtolnay/rust-toolchain@be73d7920c329f220ce78e0234b8f96b7ae60248
|
||||
with:
|
||||
toolchain: stable
|
||||
default: true
|
||||
|
||||
- name: Run concrete-csprng tests
|
||||
run: |
|
||||
@@ -81,6 +82,10 @@ jobs:
|
||||
run: |
|
||||
make test_c_api
|
||||
|
||||
- name: Run C API tests with forward_compatibility
|
||||
run: |
|
||||
FORWARD_COMPAT=ON make test_c_api
|
||||
|
||||
- name: Run user docs tests
|
||||
run: |
|
||||
make test_user_doc
|
||||
|
||||
5
.github/workflows/aws_tfhe_wasm_tests.yml
vendored
5
.github/workflows/aws_tfhe_wasm_tests.yml
vendored
@@ -4,6 +4,8 @@ env:
|
||||
CARGO_TERM_COLOR: always
|
||||
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
RUSTFLAGS: "-C target-cpu=native"
|
||||
RUST_BACKTRACE: "full"
|
||||
RUST_MIN_STACK: "8388608"
|
||||
|
||||
on:
|
||||
# Allows you to run this workflow manually from the Actions tab as an alternative.
|
||||
@@ -60,10 +62,9 @@ jobs:
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install latest stable
|
||||
uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af
|
||||
uses: dtolnay/rust-toolchain@be73d7920c329f220ce78e0234b8f96b7ae60248
|
||||
with:
|
||||
toolchain: stable
|
||||
default: true
|
||||
|
||||
- name: Run js on wasm API tests
|
||||
run: |
|
||||
|
||||
7
.github/workflows/boolean_benchmark.yml
vendored
7
.github/workflows/boolean_benchmark.yml
vendored
@@ -32,6 +32,8 @@ env:
|
||||
CARGO_TERM_COLOR: always
|
||||
RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
|
||||
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
RUST_BACKTRACE: "full"
|
||||
RUST_MIN_STACK: "8388608"
|
||||
|
||||
jobs:
|
||||
run-boolean-benchmarks:
|
||||
@@ -61,10 +63,9 @@ jobs:
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af
|
||||
uses: dtolnay/rust-toolchain@be73d7920c329f220ce78e0234b8f96b7ae60248
|
||||
with:
|
||||
toolchain: nightly
|
||||
override: true
|
||||
|
||||
- name: Run benchmarks with AVX512
|
||||
run: |
|
||||
@@ -96,7 +97,7 @@ jobs:
|
||||
--append-results
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@1eb3cb2b3e0f29609092a73eb033bb759a334595
|
||||
uses: actions/upload-artifact@26f96dfa697d77e81fd5907df203aa23a56210a8
|
||||
with:
|
||||
name: ${{ github.sha }}_boolean
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
4
.github/workflows/cargo_build.yml
vendored
4
.github/workflows/cargo_build.yml
vendored
@@ -6,6 +6,8 @@ on:
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
RUSTFLAGS: "-C target-cpu=native"
|
||||
RUST_BACKTRACE: "full"
|
||||
RUST_MIN_STACK: "8388608"
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.head_ref }}
|
||||
@@ -17,7 +19,7 @@ jobs:
|
||||
|
||||
strategy:
|
||||
matrix:
|
||||
os: [ubuntu-latest, macos-latest, windows-latest]
|
||||
os: [ubuntu-latest, macos-latest-large, windows-latest]
|
||||
fail-fast: false
|
||||
|
||||
steps:
|
||||
|
||||
9
.github/workflows/code_coverage.yml
vendored
9
.github/workflows/code_coverage.yml
vendored
@@ -4,6 +4,8 @@ env:
|
||||
CARGO_TERM_COLOR: always
|
||||
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
RUSTFLAGS: "-C target-cpu=native"
|
||||
RUST_BACKTRACE: "full"
|
||||
RUST_MIN_STACK: "8388608"
|
||||
|
||||
on:
|
||||
# Allows you to run this workflow manually from the Actions tab as an alternative.
|
||||
@@ -61,14 +63,13 @@ jobs:
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install latest stable
|
||||
uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af
|
||||
uses: dtolnay/rust-toolchain@be73d7920c329f220ce78e0234b8f96b7ae60248
|
||||
with:
|
||||
toolchain: stable
|
||||
default: true
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@62f4729b5df35e6e0e01265fa70a82ccaf196b4b
|
||||
uses: tj-actions/changed-files@90a06d6ba9543371ab4df8eeca0be07ca6054959
|
||||
with:
|
||||
files_yaml: |
|
||||
tfhe:
|
||||
@@ -98,7 +99,7 @@ jobs:
|
||||
make test_shortint_cov
|
||||
|
||||
- name: Upload tfhe coverage to Codecov
|
||||
uses: codecov/codecov-action@eaaf4bedf32dbdc6b720b63067d99c4d77d6047d
|
||||
uses: codecov/codecov-action@4fe8c5f003fae66aa5ebb77cfd3e7bfbbda0b6b0
|
||||
if: steps.changed-files.outputs.tfhe_any_changed == 'true'
|
||||
with:
|
||||
token: ${{ secrets.CODECOV_TOKEN }}
|
||||
|
||||
@@ -4,6 +4,7 @@ env:
|
||||
CARGO_TERM_COLOR: always
|
||||
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
RUSTFLAGS: "-C target-cpu=native"
|
||||
RUST_BACKTRACE: "full"
|
||||
|
||||
on:
|
||||
# Allows you to run this workflow manually from the Actions tab as an alternative.
|
||||
@@ -52,10 +53,9 @@ jobs:
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install latest stable
|
||||
uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af
|
||||
uses: dtolnay/rust-toolchain@be73d7920c329f220ce78e0234b8f96b7ae60248
|
||||
with:
|
||||
toolchain: stable
|
||||
default: true
|
||||
|
||||
- name: Dieharder randomness test suite
|
||||
run: |
|
||||
|
||||
9
.github/workflows/integer_benchmark.yml
vendored
9
.github/workflows/integer_benchmark.yml
vendored
@@ -25,6 +25,8 @@ env:
|
||||
RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
|
||||
PARSE_INTEGER_BENCH_CSV_FILE: tfhe_rs_integer_benches_${{ github.sha }}.csv
|
||||
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
RUST_BACKTRACE: "full"
|
||||
RUST_MIN_STACK: "8388608"
|
||||
|
||||
jobs:
|
||||
run-integer-benchmarks:
|
||||
@@ -54,10 +56,9 @@ jobs:
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af
|
||||
uses: dtolnay/rust-toolchain@be73d7920c329f220ce78e0234b8f96b7ae60248
|
||||
with:
|
||||
toolchain: nightly
|
||||
override: true
|
||||
|
||||
- name: Run benchmarks with AVX512
|
||||
run: |
|
||||
@@ -69,7 +70,7 @@ jobs:
|
||||
parse_integer_benches
|
||||
|
||||
- name: Upload csv results artifact
|
||||
uses: actions/upload-artifact@1eb3cb2b3e0f29609092a73eb033bb759a334595
|
||||
uses: actions/upload-artifact@26f96dfa697d77e81fd5907df203aa23a56210a8
|
||||
with:
|
||||
name: ${{ github.sha }}_csv_integer
|
||||
path: ${{ env.PARSE_INTEGER_BENCH_CSV_FILE }}
|
||||
@@ -90,7 +91,7 @@ jobs:
|
||||
--throughput
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@1eb3cb2b3e0f29609092a73eb033bb759a334595
|
||||
uses: actions/upload-artifact@26f96dfa697d77e81fd5907df203aa23a56210a8
|
||||
with:
|
||||
name: ${{ github.sha }}_integer
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
7
.github/workflows/integer_full_benchmark.yml
vendored
7
.github/workflows/integer_full_benchmark.yml
vendored
@@ -28,6 +28,8 @@ env:
|
||||
CARGO_TERM_COLOR: always
|
||||
RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
|
||||
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
RUST_BACKTRACE: "full"
|
||||
RUST_MIN_STACK: "8388608"
|
||||
|
||||
jobs:
|
||||
prepare-matrix:
|
||||
@@ -88,10 +90,9 @@ jobs:
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af
|
||||
uses: dtolnay/rust-toolchain@be73d7920c329f220ce78e0234b8f96b7ae60248
|
||||
with:
|
||||
toolchain: nightly
|
||||
override: true
|
||||
|
||||
- name: Checkout Slab repo
|
||||
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
|
||||
@@ -118,7 +119,7 @@ jobs:
|
||||
--throughput
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@1eb3cb2b3e0f29609092a73eb033bb759a334595
|
||||
uses: actions/upload-artifact@26f96dfa697d77e81fd5907df203aa23a56210a8
|
||||
with:
|
||||
name: ${{ github.sha }}_${{ matrix.command }}_${{ matrix.op_flavor }}
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
13
.github/workflows/integer_gpu_benchmark.yml
vendored
13
.github/workflows/integer_gpu_benchmark.yml
vendored
@@ -25,6 +25,8 @@ env:
|
||||
RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
|
||||
PARSE_INTEGER_BENCH_CSV_FILE: tfhe_rs_integer_benches_${{ github.sha }}.csv
|
||||
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
RUST_BACKTRACE: "full"
|
||||
RUST_MIN_STACK: "8388608"
|
||||
|
||||
jobs:
|
||||
run-integer-benchmarks:
|
||||
@@ -54,7 +56,7 @@ jobs:
|
||||
echo "BENCH_DATE=$(date --iso-8601=seconds)" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Checkout tfhe-rs repo with tags
|
||||
uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744
|
||||
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
@@ -64,10 +66,9 @@ jobs:
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af
|
||||
uses: dtolnay/rust-toolchain@be73d7920c329f220ce78e0234b8f96b7ae60248
|
||||
with:
|
||||
toolchain: nightly
|
||||
override: true
|
||||
|
||||
- name: Export CUDA variables
|
||||
if: ${{ !cancelled() }}
|
||||
@@ -96,7 +97,7 @@ jobs:
|
||||
parse_integer_benches
|
||||
|
||||
- name: Upload csv results artifact
|
||||
uses: actions/upload-artifact@0b7f8abb1508181956e8e162db84b466c27e18ce
|
||||
uses: actions/upload-artifact@26f96dfa697d77e81fd5907df203aa23a56210a8
|
||||
with:
|
||||
name: ${{ github.sha }}_csv_integer
|
||||
path: ${{ env.PARSE_INTEGER_BENCH_CSV_FILE }}
|
||||
@@ -118,13 +119,13 @@ jobs:
|
||||
--throughput
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@0b7f8abb1508181956e8e162db84b466c27e18ce
|
||||
uses: actions/upload-artifact@26f96dfa697d77e81fd5907df203aa23a56210a8
|
||||
with:
|
||||
name: ${{ github.sha }}_integer
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
- name: Checkout Slab repo
|
||||
uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744
|
||||
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
|
||||
19
.github/workflows/integer_gpu_full_benchmark.yml
vendored
19
.github/workflows/integer_gpu_full_benchmark.yml
vendored
@@ -19,11 +19,21 @@ on:
|
||||
request_id:
|
||||
description: "Slab request ID"
|
||||
type: string
|
||||
# This input is not used in this workflow but still mandatory since a calling workflow could
|
||||
# use it. If a triggering command include a user_inputs field, then the triggered workflow
|
||||
# must include this very input, otherwise the workflow won't be called.
|
||||
# See start_full_benchmarks.yml as example.
|
||||
user_inputs:
|
||||
description: "Type of benchmarks to run"
|
||||
type: string
|
||||
default: "weekly_benchmarks"
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
|
||||
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
RUST_BACKTRACE: "full"
|
||||
RUST_MIN_STACK: "8388608"
|
||||
|
||||
jobs:
|
||||
integer-benchmarks:
|
||||
@@ -53,7 +63,7 @@ jobs:
|
||||
echo "Request ID: ${{ inputs.request_id }}"
|
||||
|
||||
- name: Checkout tfhe-rs repo with tags
|
||||
uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744
|
||||
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
@@ -69,10 +79,9 @@ jobs:
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af
|
||||
uses: dtolnay/rust-toolchain@be73d7920c329f220ce78e0234b8f96b7ae60248
|
||||
with:
|
||||
toolchain: nightly
|
||||
override: true
|
||||
|
||||
- name: Export CUDA variables
|
||||
if: ${{ !cancelled() }}
|
||||
@@ -92,7 +101,7 @@ jobs:
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Checkout Slab repo
|
||||
uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744
|
||||
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
@@ -117,7 +126,7 @@ jobs:
|
||||
--throughput
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@0b7f8abb1508181956e8e162db84b466c27e18ce
|
||||
uses: actions/upload-artifact@26f96dfa697d77e81fd5907df203aa23a56210a8
|
||||
with:
|
||||
name: ${{ github.sha }}_${{ matrix.command }}_${{ matrix.op_flavor }}
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
@@ -25,6 +25,8 @@ env:
|
||||
RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
|
||||
PARSE_INTEGER_BENCH_CSV_FILE: tfhe_rs_integer_benches_${{ github.sha }}.csv
|
||||
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
RUST_BACKTRACE: "full"
|
||||
RUST_MIN_STACK: "8388608"
|
||||
|
||||
jobs:
|
||||
run-integer-benchmarks:
|
||||
@@ -54,10 +56,9 @@ jobs:
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af
|
||||
uses: dtolnay/rust-toolchain@be73d7920c329f220ce78e0234b8f96b7ae60248
|
||||
with:
|
||||
toolchain: nightly
|
||||
override: true
|
||||
|
||||
- name: Run multi-bit benchmarks with AVX512
|
||||
run: |
|
||||
@@ -69,7 +70,7 @@ jobs:
|
||||
parse_integer_benches
|
||||
|
||||
- name: Upload csv results artifact
|
||||
uses: actions/upload-artifact@1eb3cb2b3e0f29609092a73eb033bb759a334595
|
||||
uses: actions/upload-artifact@26f96dfa697d77e81fd5907df203aa23a56210a8
|
||||
with:
|
||||
name: ${{ github.sha }}_csv_integer
|
||||
path: ${{ env.PARSE_INTEGER_BENCH_CSV_FILE }}
|
||||
@@ -90,7 +91,7 @@ jobs:
|
||||
--throughput
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@1eb3cb2b3e0f29609092a73eb033bb759a334595
|
||||
uses: actions/upload-artifact@26f96dfa697d77e81fd5907df203aa23a56210a8
|
||||
with:
|
||||
name: ${{ github.sha }}_integer
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
@@ -25,6 +25,8 @@ env:
|
||||
RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
|
||||
PARSE_INTEGER_BENCH_CSV_FILE: tfhe_rs_integer_benches_${{ github.sha }}.csv
|
||||
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
RUST_BACKTRACE: "full"
|
||||
RUST_MIN_STACK: "8388608"
|
||||
|
||||
jobs:
|
||||
run-integer-benchmarks:
|
||||
@@ -55,7 +57,7 @@ jobs:
|
||||
echo "BENCH_DATE=$(date --iso-8601=seconds)" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Checkout tfhe-rs repo with tags
|
||||
uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744
|
||||
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
@@ -65,10 +67,9 @@ jobs:
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af
|
||||
uses: dtolnay/rust-toolchain@be73d7920c329f220ce78e0234b8f96b7ae60248
|
||||
with:
|
||||
toolchain: nightly
|
||||
override: true
|
||||
|
||||
- name: Export CUDA variables
|
||||
if: ${{ !cancelled() }}
|
||||
@@ -97,7 +98,7 @@ jobs:
|
||||
parse_integer_benches
|
||||
|
||||
- name: Upload csv results artifact
|
||||
uses: actions/upload-artifact@0b7f8abb1508181956e8e162db84b466c27e18ce
|
||||
uses: actions/upload-artifact@26f96dfa697d77e81fd5907df203aa23a56210a8
|
||||
with:
|
||||
name: ${{ github.sha }}_csv_integer
|
||||
path: ${{ env.PARSE_INTEGER_BENCH_CSV_FILE }}
|
||||
@@ -119,13 +120,13 @@ jobs:
|
||||
--throughput
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@0b7f8abb1508181956e8e162db84b466c27e18ce
|
||||
uses: actions/upload-artifact@26f96dfa697d77e81fd5907df203aa23a56210a8
|
||||
with:
|
||||
name: ${{ github.sha }}_integer
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
- name: Checkout Slab repo
|
||||
uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744
|
||||
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
|
||||
5
.github/workflows/m1_tests.yml
vendored
5
.github/workflows/m1_tests.yml
vendored
@@ -14,6 +14,8 @@ on:
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
RUSTFLAGS: "-C target-cpu=native"
|
||||
RUST_BACKTRACE: "full"
|
||||
RUST_MIN_STACK: "8388608"
|
||||
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
FAST_TESTS: "TRUE"
|
||||
|
||||
@@ -30,10 +32,9 @@ jobs:
|
||||
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
|
||||
|
||||
- name: Install latest stable
|
||||
uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af
|
||||
uses: dtolnay/rust-toolchain@be73d7920c329f220ce78e0234b8f96b7ae60248
|
||||
with:
|
||||
toolchain: stable
|
||||
default: true
|
||||
|
||||
- name: Run pcc checks
|
||||
run: |
|
||||
|
||||
7
.github/workflows/pbs_benchmark.yml
vendored
7
.github/workflows/pbs_benchmark.yml
vendored
@@ -32,6 +32,8 @@ env:
|
||||
CARGO_TERM_COLOR: always
|
||||
RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
|
||||
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
RUST_BACKTRACE: "full"
|
||||
RUST_MIN_STACK: "8388608"
|
||||
|
||||
jobs:
|
||||
run-pbs-benchmarks:
|
||||
@@ -61,10 +63,9 @@ jobs:
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af
|
||||
uses: dtolnay/rust-toolchain@be73d7920c329f220ce78e0234b8f96b7ae60248
|
||||
with:
|
||||
toolchain: nightly
|
||||
override: true
|
||||
|
||||
- name: Run benchmarks with AVX512
|
||||
run: |
|
||||
@@ -86,7 +87,7 @@ jobs:
|
||||
--throughput
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@1eb3cb2b3e0f29609092a73eb033bb759a334595
|
||||
uses: actions/upload-artifact@26f96dfa697d77e81fd5907df203aa23a56210a8
|
||||
with:
|
||||
name: ${{ github.sha }}_pbs
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
142
.github/workflows/pbs_gpu_benchmark.yml
vendored
Normal file
142
.github/workflows/pbs_gpu_benchmark.yml
vendored
Normal file
@@ -0,0 +1,142 @@
|
||||
# Run PBS benchmarks on an AWS instance with CUDA and return parsed results to Slab CI bot.
|
||||
name: PBS GPU benchmarks
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
instance_id:
|
||||
description: "Instance ID"
|
||||
type: string
|
||||
instance_image_id:
|
||||
description: "Instance AMI ID"
|
||||
type: string
|
||||
instance_type:
|
||||
description: "Instance product type"
|
||||
type: string
|
||||
runner_name:
|
||||
description: "Action runner name"
|
||||
type: string
|
||||
request_id:
|
||||
description: "Slab request ID"
|
||||
type: string
|
||||
# This input is not used in this workflow but still mandatory since a calling workflow could
|
||||
# use it. If a triggering command include a user_inputs field, then the triggered workflow
|
||||
# must include this very input, otherwise the workflow won't be called.
|
||||
# See start_full_benchmarks.yml as example.
|
||||
user_inputs:
|
||||
description: "Type of benchmarks to run"
|
||||
type: string
|
||||
default: "weekly_benchmarks"
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
|
||||
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
|
||||
jobs:
|
||||
run-pbs-benchmarks:
|
||||
name: Execute PBS benchmarks in EC2
|
||||
runs-on: ${{ github.event.inputs.runner_name }}
|
||||
if: ${{ !cancelled() }}
|
||||
steps:
|
||||
- name: Instance configuration used
|
||||
run: |
|
||||
echo "IDs: ${{ inputs.instance_id }}"
|
||||
echo "AMI: ${{ inputs.instance_image_id }}"
|
||||
echo "Type: ${{ inputs.instance_type }}"
|
||||
echo "Request ID: ${{ inputs.request_id }}"
|
||||
|
||||
- name: Get benchmark date
|
||||
run: |
|
||||
echo "BENCH_DATE=$(date --iso-8601=seconds)" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Checkout tfhe-rs repo with tags
|
||||
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Set up home
|
||||
# "Install rust" step require root user to have a HOME directory which is not set.
|
||||
run: |
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@be73d7920c329f220ce78e0234b8f96b7ae60248
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
- name: Export CUDA variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
|
||||
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
|
||||
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
|
||||
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
|
||||
|
||||
# Specify the correct host compilers
|
||||
- name: Export gcc and g++ variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}" >> "${GITHUB_ENV}"
|
||||
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}" >> "${GITHUB_ENV}"
|
||||
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}" >> "${GITHUB_ENV}"
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Run benchmarks with AVX512
|
||||
run: |
|
||||
make AVX512_SUPPORT=ON bench_pbs_gpu
|
||||
|
||||
- name: Parse results
|
||||
run: |
|
||||
COMMIT_DATE="$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})"
|
||||
COMMIT_HASH="$(git describe --tags --dirty)"
|
||||
python3 ./ci/benchmark_parser.py target/criterion ${{ env.RESULTS_FILENAME }} \
|
||||
--database tfhe_rs \
|
||||
--hardware ${{ inputs.instance_type }} \
|
||||
--backend gpu \
|
||||
--project-version "${COMMIT_HASH}" \
|
||||
--branch ${{ github.ref_name }} \
|
||||
--commit-date "${COMMIT_DATE}" \
|
||||
--bench-date "${{ env.BENCH_DATE }}" \
|
||||
--name-suffix avx512 \
|
||||
--walk-subdirs \
|
||||
--throughput
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@26f96dfa697d77e81fd5907df203aa23a56210a8
|
||||
with:
|
||||
name: ${{ github.sha }}_pbs
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
- name: Checkout Slab repo
|
||||
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
token: ${{ secrets.CONCRETE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Send data to Slab
|
||||
shell: bash
|
||||
run: |
|
||||
echo "Computing HMac on downloaded artifact"
|
||||
SIGNATURE="$(slab/scripts/hmac_calculator.sh ${{ env.RESULTS_FILENAME }} '${{ secrets.JOB_SECRET }}')"
|
||||
echo "Sending results to Slab..."
|
||||
curl -v -k \
|
||||
-H "Content-Type: application/json" \
|
||||
-H "X-Slab-Repository: ${{ github.repository }}" \
|
||||
-H "X-Slab-Command: store_data_v2" \
|
||||
-H "X-Hub-Signature-256: sha256=${SIGNATURE}" \
|
||||
-d @${{ env.RESULTS_FILENAME }} \
|
||||
${{ secrets.SLAB_URL }}
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_MESSAGE: "PBS GPU benchmarks failed. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
7
.github/workflows/shortint_benchmark.yml
vendored
7
.github/workflows/shortint_benchmark.yml
vendored
@@ -24,6 +24,8 @@ env:
|
||||
CARGO_TERM_COLOR: always
|
||||
RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
|
||||
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
RUST_BACKTRACE: "full"
|
||||
RUST_MIN_STACK: "8388608"
|
||||
|
||||
jobs:
|
||||
run-shortint-benchmarks:
|
||||
@@ -53,10 +55,9 @@ jobs:
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af
|
||||
uses: dtolnay/rust-toolchain@be73d7920c329f220ce78e0234b8f96b7ae60248
|
||||
with:
|
||||
toolchain: nightly
|
||||
override: true
|
||||
|
||||
- name: Run benchmarks with AVX512
|
||||
run: |
|
||||
@@ -88,7 +89,7 @@ jobs:
|
||||
--append-results
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@1eb3cb2b3e0f29609092a73eb033bb759a334595
|
||||
uses: actions/upload-artifact@26f96dfa697d77e81fd5907df203aa23a56210a8
|
||||
with:
|
||||
name: ${{ github.sha }}_shortint
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
@@ -32,6 +32,8 @@ env:
|
||||
CARGO_TERM_COLOR: always
|
||||
RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
|
||||
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
RUST_BACKTRACE: "full"
|
||||
RUST_MIN_STACK: "8388608"
|
||||
|
||||
jobs:
|
||||
shortint-benchmarks:
|
||||
@@ -67,10 +69,9 @@ jobs:
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af
|
||||
uses: dtolnay/rust-toolchain@be73d7920c329f220ce78e0234b8f96b7ae60248
|
||||
with:
|
||||
toolchain: nightly
|
||||
override: true
|
||||
|
||||
- name: Checkout Slab repo
|
||||
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
|
||||
@@ -112,7 +113,7 @@ jobs:
|
||||
--append-results
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@1eb3cb2b3e0f29609092a73eb033bb759a334595
|
||||
uses: actions/upload-artifact@26f96dfa697d77e81fd5907df203aa23a56210a8
|
||||
with:
|
||||
name: ${{ github.sha }}_shortint_${{ matrix.op_flavor }}
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
@@ -25,6 +25,8 @@ env:
|
||||
RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
|
||||
PARSE_INTEGER_BENCH_CSV_FILE: tfhe_rs_integer_benches_${{ github.sha }}.csv
|
||||
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
RUST_BACKTRACE: "full"
|
||||
RUST_MIN_STACK: "8388608"
|
||||
|
||||
jobs:
|
||||
run-integer-benchmarks:
|
||||
@@ -54,10 +56,9 @@ jobs:
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af
|
||||
uses: dtolnay/rust-toolchain@be73d7920c329f220ce78e0234b8f96b7ae60248
|
||||
with:
|
||||
toolchain: nightly
|
||||
override: true
|
||||
|
||||
- name: Run benchmarks with AVX512
|
||||
run: |
|
||||
@@ -69,7 +70,7 @@ jobs:
|
||||
parse_integer_benches
|
||||
|
||||
- name: Upload csv results artifact
|
||||
uses: actions/upload-artifact@1eb3cb2b3e0f29609092a73eb033bb759a334595
|
||||
uses: actions/upload-artifact@26f96dfa697d77e81fd5907df203aa23a56210a8
|
||||
with:
|
||||
name: ${{ github.sha }}_csv_integer
|
||||
path: ${{ env.PARSE_INTEGER_BENCH_CSV_FILE }}
|
||||
@@ -90,7 +91,7 @@ jobs:
|
||||
--throughput
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@1eb3cb2b3e0f29609092a73eb033bb759a334595
|
||||
uses: actions/upload-artifact@26f96dfa697d77e81fd5907df203aa23a56210a8
|
||||
with:
|
||||
name: ${{ github.sha }}_integer
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
@@ -28,6 +28,8 @@ env:
|
||||
CARGO_TERM_COLOR: always
|
||||
RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
|
||||
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
RUST_BACKTRACE: "full"
|
||||
RUST_MIN_STACK: "8388608"
|
||||
|
||||
jobs:
|
||||
integer-benchmarks:
|
||||
@@ -66,10 +68,9 @@ jobs:
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af
|
||||
uses: dtolnay/rust-toolchain@be73d7920c329f220ce78e0234b8f96b7ae60248
|
||||
with:
|
||||
toolchain: nightly
|
||||
override: true
|
||||
|
||||
- name: Checkout Slab repo
|
||||
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
|
||||
@@ -96,7 +97,7 @@ jobs:
|
||||
--throughput
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@1eb3cb2b3e0f29609092a73eb033bb759a334595
|
||||
uses: actions/upload-artifact@26f96dfa697d77e81fd5907df203aa23a56210a8
|
||||
with:
|
||||
name: ${{ github.sha }}_${{ matrix.command }}_${{ matrix.op_flavor }}
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
@@ -25,6 +25,8 @@ env:
|
||||
RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
|
||||
PARSE_INTEGER_BENCH_CSV_FILE: tfhe_rs_integer_benches_${{ github.sha }}.csv
|
||||
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
RUST_BACKTRACE: "full"
|
||||
RUST_MIN_STACK: "8388608"
|
||||
|
||||
jobs:
|
||||
run-integer-benchmarks:
|
||||
@@ -54,10 +56,9 @@ jobs:
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af
|
||||
uses: dtolnay/rust-toolchain@be73d7920c329f220ce78e0234b8f96b7ae60248
|
||||
with:
|
||||
toolchain: nightly
|
||||
override: true
|
||||
|
||||
- name: Run multi-bit benchmarks with AVX512
|
||||
run: |
|
||||
@@ -69,7 +70,7 @@ jobs:
|
||||
parse_integer_benches
|
||||
|
||||
- name: Upload csv results artifact
|
||||
uses: actions/upload-artifact@1eb3cb2b3e0f29609092a73eb033bb759a334595
|
||||
uses: actions/upload-artifact@26f96dfa697d77e81fd5907df203aa23a56210a8
|
||||
with:
|
||||
name: ${{ github.sha }}_csv_integer
|
||||
path: ${{ env.PARSE_INTEGER_BENCH_CSV_FILE }}
|
||||
@@ -90,7 +91,7 @@ jobs:
|
||||
--throughput
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@1eb3cb2b3e0f29609092a73eb033bb759a334595
|
||||
uses: actions/upload-artifact@26f96dfa697d77e81fd5907df203aa23a56210a8
|
||||
with:
|
||||
name: ${{ github.sha }}_integer
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
8
.github/workflows/start_benchmarks.yml
vendored
8
.github/workflows/start_benchmarks.yml
vendored
@@ -36,6 +36,10 @@ on:
|
||||
description: "Run PBS benches"
|
||||
type: boolean
|
||||
default: true
|
||||
pbs_gpu_bench:
|
||||
description: "Run PBS benches on GPU"
|
||||
type: boolean
|
||||
default: true
|
||||
wasm_client_bench:
|
||||
description: "Run WASM client benches"
|
||||
type: boolean
|
||||
@@ -50,7 +54,7 @@ jobs:
|
||||
integer_bench, integer_multi_bit_bench,
|
||||
signed_integer_bench, signed_integer_multi_bit_bench,
|
||||
integer_gpu_bench, integer_multi_bit_gpu_bench,
|
||||
pbs_bench, wasm_client_bench ]
|
||||
pbs_bench, pbs_gpu_bench, wasm_client_bench ]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
@@ -60,7 +64,7 @@ jobs:
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@62f4729b5df35e6e0e01265fa70a82ccaf196b4b
|
||||
uses: tj-actions/changed-files@90a06d6ba9543371ab4df8eeca0be07ca6054959
|
||||
with:
|
||||
files_yaml: |
|
||||
common_benches:
|
||||
|
||||
5
.github/workflows/start_full_benchmarks.yml
vendored
5
.github/workflows/start_full_benchmarks.yml
vendored
@@ -24,8 +24,9 @@ jobs:
|
||||
if: ${{ (github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs') || github.event_name == 'workflow_dispatch' }}
|
||||
strategy:
|
||||
matrix:
|
||||
command: [ boolean_bench, shortint_full_bench, integer_full_bench, signed_integer_full_bench, integer_gpu_full_bench,
|
||||
pbs_bench, wasm_client_bench ]
|
||||
command: [ boolean_bench, shortint_full_bench,
|
||||
integer_full_bench, signed_integer_full_bench, integer_gpu_full_bench,
|
||||
pbs_bench, pbs_gpu_bench, wasm_client_bench ]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
|
||||
2
.github/workflows/sync_on_push.yml
vendored
2
.github/workflows/sync_on_push.yml
vendored
@@ -17,7 +17,7 @@ jobs:
|
||||
with:
|
||||
fetch-depth: 0
|
||||
- name: Save repo
|
||||
uses: actions/upload-artifact@1eb3cb2b3e0f29609092a73eb033bb759a334595
|
||||
uses: actions/upload-artifact@26f96dfa697d77e81fd5907df203aa23a56210a8
|
||||
with:
|
||||
name: repo-archive
|
||||
path: '.'
|
||||
|
||||
7
.github/workflows/wasm_client_benchmark.yml
vendored
7
.github/workflows/wasm_client_benchmark.yml
vendored
@@ -32,6 +32,8 @@ env:
|
||||
CARGO_TERM_COLOR: always
|
||||
RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
|
||||
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
RUST_BACKTRACE: "full"
|
||||
RUST_MIN_STACK: "8388608"
|
||||
|
||||
jobs:
|
||||
run-wasm-client-benchmarks:
|
||||
@@ -61,10 +63,9 @@ jobs:
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af
|
||||
uses: dtolnay/rust-toolchain@be73d7920c329f220ce78e0234b8f96b7ae60248
|
||||
with:
|
||||
toolchain: nightly
|
||||
override: true
|
||||
|
||||
- name: Run benchmarks
|
||||
run: |
|
||||
@@ -97,7 +98,7 @@ jobs:
|
||||
--append-results
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@1eb3cb2b3e0f29609092a73eb033bb759a334595
|
||||
uses: actions/upload-artifact@26f96dfa697d77e81fd5907df203aa23a56210a8
|
||||
with:
|
||||
name: ${{ github.sha }}_wasm
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
104
Makefile
104
Makefile
@@ -17,6 +17,7 @@ FAST_TESTS?=FALSE
|
||||
FAST_BENCH?=FALSE
|
||||
BENCH_OP_FLAVOR?=DEFAULT
|
||||
NODE_VERSION=20
|
||||
FORWARD_COMPAT?=OFF
|
||||
# sed: -n, do not print input stream, -e means a script/expression
|
||||
# 1,/version/ indicates from the first line, to the line matching version at the start of the line
|
||||
# p indicates to print, so we keep only the start of the Cargo.toml until we hit the first version
|
||||
@@ -49,12 +50,18 @@ else
|
||||
COVERAGE_ONLY=
|
||||
endif
|
||||
|
||||
ifeq ($(FORWARD_COMPAT),ON)
|
||||
FORWARD_COMPAT_FEATURE=forward_compatibility
|
||||
else
|
||||
FORWARD_COMPAT_FEATURE=
|
||||
endif
|
||||
|
||||
# Variables used only for regex_engine example
|
||||
REGEX_STRING?=''
|
||||
REGEX_PATTERN?=''
|
||||
|
||||
# tfhe-cuda-backend
|
||||
TFHECUDA_SRC="backends/tfhe-cuda-backend/implementation"
|
||||
TFHECUDA_SRC="backends/tfhe-cuda-backend/cuda"
|
||||
TFHECUDA_BUILD=$(TFHECUDA_SRC)/build
|
||||
|
||||
# Exclude these files from coverage reports
|
||||
@@ -154,7 +161,7 @@ check_fmt: install_rs_check_toolchain
|
||||
clippy_gpu: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
|
||||
--features=$(TARGET_ARCH_FEATURE),integer,shortint,gpu \
|
||||
-p tfhe -- --no-deps -D warnings
|
||||
-p $(TFHE_SPEC) -- --no-deps -D warnings
|
||||
|
||||
.PHONY: fix_newline # Fix newline at end of file issues to be UNIX compliant
|
||||
fix_newline: check_linelint_installed
|
||||
@@ -172,6 +179,12 @@ clippy_core: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
|
||||
--features=$(TARGET_ARCH_FEATURE),experimental \
|
||||
-p $(TFHE_SPEC) -- --no-deps -D warnings
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
|
||||
--features=$(TARGET_ARCH_FEATURE),nightly-avx512 \
|
||||
-p $(TFHE_SPEC) -- --no-deps -D warnings
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
|
||||
--features=$(TARGET_ARCH_FEATURE),experimental,nightly-avx512 \
|
||||
-p $(TFHE_SPEC) -- --no-deps -D warnings
|
||||
|
||||
.PHONY: clippy_boolean # Run clippy lints enabling the boolean features
|
||||
clippy_boolean: install_rs_check_toolchain
|
||||
@@ -220,11 +233,17 @@ clippy_trivium: install_rs_check_toolchain
|
||||
-p tfhe-trivium -- --no-deps -D warnings
|
||||
|
||||
.PHONY: clippy_all_targets # Run clippy lints on all targets (benches, examples, etc.)
|
||||
clippy_all_targets:
|
||||
clippy_all_targets: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy --all-targets \
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,internal-keycache \
|
||||
-p $(TFHE_SPEC) -- --no-deps -D warnings
|
||||
|
||||
.PHONY: clippy_all_targets_forward_compatibility # Run clippy lints on all targets (benches, examples, etc.)
|
||||
clippy_all_targets_forward_compatibility: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy --all-targets \
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,internal-keycache,forward_compatibility \
|
||||
-p $(TFHE_SPEC) -- --no-deps -D warnings
|
||||
|
||||
.PHONY: clippy_concrete_csprng # Run clippy lints on concrete-csprng
|
||||
clippy_concrete_csprng:
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy --all-targets \
|
||||
@@ -233,7 +252,8 @@ clippy_concrete_csprng:
|
||||
|
||||
.PHONY: clippy_all # Run all clippy targets
|
||||
clippy_all: clippy clippy_boolean clippy_shortint clippy_integer clippy_all_targets clippy_c_api \
|
||||
clippy_js_wasm_api clippy_tasks clippy_core clippy_concrete_csprng clippy_trivium
|
||||
clippy_js_wasm_api clippy_tasks clippy_core clippy_concrete_csprng clippy_trivium \
|
||||
clippy_all_targets_forward_compatibility
|
||||
|
||||
.PHONY: clippy_fast # Run main clippy targets
|
||||
clippy_fast: clippy clippy_all_targets clippy_c_api clippy_js_wasm_api clippy_tasks clippy_core \
|
||||
@@ -286,15 +306,23 @@ symlink_c_libs_without_fingerprint:
|
||||
.PHONY: build_c_api # Build the C API for boolean, shortint and integer
|
||||
build_c_api: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean-c-api,shortint-c-api,high-level-c-api \
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean-c-api,shortint-c-api,high-level-c-api,$(FORWARD_COMPAT_FEATURE) \
|
||||
-p $(TFHE_SPEC)
|
||||
@"$(MAKE)" symlink_c_libs_without_fingerprint
|
||||
|
||||
.PHONY: build_c_api_gpu # Build the C API for boolean, shortint and integer
|
||||
build_c_api_gpu: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean-c-api,shortint-c-api,high-level-c-api,gpu \
|
||||
-p $(TFHE_SPEC)
|
||||
@"$(MAKE)" symlink_c_libs_without_fingerprint
|
||||
|
||||
.PHONY: build_c_api_experimental_deterministic_fft # Build the C API for boolean, shortint and integer with experimental deterministic FFT
|
||||
build_c_api_experimental_deterministic_fft: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean-c-api,shortint-c-api,high-level-c-api,experimental-force_fft_algo_dif4 \
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean-c-api,shortint-c-api,high-level-c-api,experimental-force_fft_algo_dif4,$(FORWARD_COMPAT_FEATURE) \
|
||||
-p $(TFHE_SPEC)
|
||||
@"$(MAKE)" symlink_c_libs_without_fingerprint
|
||||
|
||||
.PHONY: build_web_js_api # Build the js API targeting the web browser
|
||||
build_web_js_api: install_rs_build_toolchain install_wasm_pack
|
||||
@@ -354,16 +382,16 @@ test_gpu: test_core_crypto_gpu test_integer_gpu
|
||||
.PHONY: test_core_crypto_gpu # Run the tests of the core_crypto module including experimental on the gpu backend
|
||||
test_core_crypto_gpu: install_rs_build_toolchain install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
|
||||
--features=$(TARGET_ARCH_FEATURE),integer,gpu -p tfhe -- core_crypto::gpu::
|
||||
--features=$(TARGET_ARCH_FEATURE),integer,gpu -p $(TFHE_SPEC) -- core_crypto::gpu::
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --doc --profile $(CARGO_PROFILE) \
|
||||
--features=$(TARGET_ARCH_FEATURE),integer,gpu -p tfhe -- core_crypto::gpu::
|
||||
--features=$(TARGET_ARCH_FEATURE),integer,gpu -p $(TFHE_SPEC) -- core_crypto::gpu::
|
||||
|
||||
.PHONY: test_integer_gpu # Run the tests of the integer module including experimental on the gpu backend
|
||||
test_integer_gpu: install_rs_build_toolchain install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
|
||||
--features=$(TARGET_ARCH_FEATURE),integer,gpu -p tfhe -- integer::gpu::server_key::
|
||||
--features=$(TARGET_ARCH_FEATURE),integer,gpu -p $(TFHE_SPEC) -- integer::gpu::server_key::
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --doc --profile $(CARGO_PROFILE) \
|
||||
--features=$(TARGET_ARCH_FEATURE),integer,gpu -p tfhe -- integer::gpu::server_key::
|
||||
--features=$(TARGET_ARCH_FEATURE),integer,gpu -p $(TFHE_SPEC) -- integer::gpu::server_key::
|
||||
|
||||
.PHONY: test_boolean # Run the tests of the boolean module
|
||||
test_boolean: install_rs_build_toolchain
|
||||
@@ -387,24 +415,28 @@ test_c_api_rs: install_rs_check_toolchain
|
||||
|
||||
.PHONY: test_c_api_c # Run the C tests for the C API
|
||||
test_c_api_c: build_c_api
|
||||
./scripts/c_api_tests.sh
|
||||
./scripts/c_api_tests.sh --forward-compat "$(FORWARD_COMPAT)"
|
||||
|
||||
.PHONY: test_c_api # Run all the tests for the C API
|
||||
test_c_api: test_c_api_rs test_c_api_c
|
||||
|
||||
.PHONY: test_c_api_gpu # Run the C tests for the C API
|
||||
test_c_api_gpu: build_c_api_gpu
|
||||
./scripts/c_api_tests.sh --gpu
|
||||
|
||||
.PHONY: test_shortint_ci # Run the tests for shortint ci
|
||||
test_shortint_ci: install_rs_build_toolchain install_cargo_nextest
|
||||
BIG_TESTS_INSTANCE="$(BIG_TESTS_INSTANCE)" \
|
||||
FAST_TESTS="$(FAST_TESTS)" \
|
||||
./scripts/shortint-tests.sh --rust-toolchain $(CARGO_RS_BUILD_TOOLCHAIN) \
|
||||
--cargo-profile "$(CARGO_PROFILE)"
|
||||
--cargo-profile "$(CARGO_PROFILE)" --tfhe-package "$(TFHE_SPEC)"
|
||||
|
||||
.PHONY: test_shortint_multi_bit_ci # Run the tests for shortint ci running only multibit tests
|
||||
test_shortint_multi_bit_ci: install_rs_build_toolchain install_cargo_nextest
|
||||
BIG_TESTS_INSTANCE="$(BIG_TESTS_INSTANCE)" \
|
||||
FAST_TESTS="$(FAST_TESTS)" \
|
||||
./scripts/shortint-tests.sh --rust-toolchain $(CARGO_RS_BUILD_TOOLCHAIN) \
|
||||
--cargo-profile "$(CARGO_PROFILE)" --multi-bit
|
||||
--cargo-profile "$(CARGO_PROFILE)" --multi-bit --tfhe-package "$(TFHE_SPEC)"
|
||||
|
||||
.PHONY: test_shortint # Run all the tests for shortint
|
||||
test_shortint: install_rs_build_toolchain
|
||||
@@ -424,7 +456,8 @@ test_integer_ci: install_rs_check_toolchain install_cargo_nextest
|
||||
BIG_TESTS_INSTANCE="$(BIG_TESTS_INSTANCE)" \
|
||||
FAST_TESTS="$(FAST_TESTS)" \
|
||||
./scripts/integer-tests.sh --rust-toolchain $(CARGO_RS_CHECK_TOOLCHAIN) \
|
||||
--cargo-profile "$(CARGO_PROFILE)" --avx512-support "$(AVX512_SUPPORT)"
|
||||
--cargo-profile "$(CARGO_PROFILE)" --avx512-support "$(AVX512_SUPPORT)" \
|
||||
--tfhe-package "$(TFHE_SPEC)"
|
||||
|
||||
.PHONY: test_unsigned_integer_ci # Run the tests for unsigned integer ci
|
||||
test_unsigned_integer_ci: install_rs_check_toolchain install_cargo_nextest
|
||||
@@ -432,7 +465,7 @@ test_unsigned_integer_ci: install_rs_check_toolchain install_cargo_nextest
|
||||
FAST_TESTS="$(FAST_TESTS)" \
|
||||
./scripts/integer-tests.sh --rust-toolchain $(CARGO_RS_CHECK_TOOLCHAIN) \
|
||||
--cargo-profile "$(CARGO_PROFILE)" --avx512-support "$(AVX512_SUPPORT)" \
|
||||
--unsigned-only
|
||||
--unsigned-only --tfhe-package "$(TFHE_SPEC)"
|
||||
|
||||
.PHONY: test_signed_integer_ci # Run the tests for signed integer ci
|
||||
test_signed_integer_ci: install_rs_check_toolchain install_cargo_nextest
|
||||
@@ -440,14 +473,15 @@ test_signed_integer_ci: install_rs_check_toolchain install_cargo_nextest
|
||||
FAST_TESTS="$(FAST_TESTS)" \
|
||||
./scripts/integer-tests.sh --rust-toolchain $(CARGO_RS_CHECK_TOOLCHAIN) \
|
||||
--cargo-profile "$(CARGO_PROFILE)" --avx512-support "$(AVX512_SUPPORT)" \
|
||||
--signed-only
|
||||
--signed-only --tfhe-package "$(TFHE_SPEC)"
|
||||
|
||||
.PHONY: test_integer_multi_bit_ci # Run the tests for integer ci running only multibit tests
|
||||
test_integer_multi_bit_ci: install_rs_check_toolchain install_cargo_nextest
|
||||
BIG_TESTS_INSTANCE="$(BIG_TESTS_INSTANCE)" \
|
||||
FAST_TESTS="$(FAST_TESTS)" \
|
||||
./scripts/integer-tests.sh --rust-toolchain $(CARGO_RS_CHECK_TOOLCHAIN) \
|
||||
--cargo-profile "$(CARGO_PROFILE)" --multi-bit --avx512-support "$(AVX512_SUPPORT)"
|
||||
--cargo-profile "$(CARGO_PROFILE)" --multi-bit --avx512-support "$(AVX512_SUPPORT)" \
|
||||
--tfhe-package "$(TFHE_SPEC)"
|
||||
|
||||
.PHONY: test_unsigned_integer_multi_bit_ci # Run the tests for nsigned integer ci running only multibit tests
|
||||
test_unsigned_integer_multi_bit_ci: install_rs_check_toolchain install_cargo_nextest
|
||||
@@ -455,7 +489,7 @@ test_unsigned_integer_multi_bit_ci: install_rs_check_toolchain install_cargo_nex
|
||||
FAST_TESTS="$(FAST_TESTS)" \
|
||||
./scripts/integer-tests.sh --rust-toolchain $(CARGO_RS_CHECK_TOOLCHAIN) \
|
||||
--cargo-profile "$(CARGO_PROFILE)" --multi-bit --avx512-support "$(AVX512_SUPPORT)" \
|
||||
--unsigned-only
|
||||
--unsigned-only --tfhe-package "$(TFHE_SPEC)"
|
||||
|
||||
.PHONY: test_signed_integer_multi_bit_ci # Run the tests for nsigned integer ci running only multibit tests
|
||||
test_signed_integer_multi_bit_ci: install_rs_check_toolchain install_cargo_nextest
|
||||
@@ -463,7 +497,7 @@ test_signed_integer_multi_bit_ci: install_rs_check_toolchain install_cargo_nexte
|
||||
FAST_TESTS="$(FAST_TESTS)" \
|
||||
./scripts/integer-tests.sh --rust-toolchain $(CARGO_RS_CHECK_TOOLCHAIN) \
|
||||
--cargo-profile "$(CARGO_PROFILE)" --multi-bit --avx512-support "$(AVX512_SUPPORT)" \
|
||||
--signed-only
|
||||
--signed-only --tfhe-package "$(TFHE_SPEC)"
|
||||
|
||||
.PHONY: test_safe_deserialization # Run the tests for safe deserialization
|
||||
test_safe_deserialization: install_rs_build_toolchain install_cargo_nextest
|
||||
@@ -481,12 +515,24 @@ test_high_level_api: install_rs_build_toolchain
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,internal-keycache -p $(TFHE_SPEC) \
|
||||
-- high_level_api::
|
||||
|
||||
.PHONY: test_forward_compatibility # Run forward compatibility tests
|
||||
test_forward_compatibility: install_rs_build_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --tests --profile $(CARGO_PROFILE) \
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,forward_compatibility,internal-keycache -p $(TFHE_SPEC) \
|
||||
-- forward_compatibility::
|
||||
|
||||
.PHONY: test_user_doc # Run tests from the .md documentation
|
||||
test_user_doc: install_rs_build_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) --doc \
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,internal-keycache -p $(TFHE_SPEC) \
|
||||
-- test_user_docs::
|
||||
|
||||
.PHONY: test_user_doc_gpu # Run tests for GPU from the .md documentation
|
||||
test_user_doc_gpu: install_rs_build_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) --doc \
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,internal-keycache,gpu -p $(TFHE_SPEC) \
|
||||
-- test_user_docs::
|
||||
|
||||
.PHONY: test_regex_engine # Run tests for regex_engine example
|
||||
test_regex_engine: install_rs_build_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
|
||||
@@ -530,7 +576,7 @@ docs: doc
|
||||
lint_doc: install_rs_check_toolchain
|
||||
RUSTDOCFLAGS="--html-in-header katex-header.html -Dwarnings" \
|
||||
cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" doc \
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer -p tfhe --no-deps
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer -p $(TFHE_SPEC) --no-deps
|
||||
|
||||
.PHONY: lint_docs # Build rust doc with linting enabled alias for lint_doc
|
||||
lint_docs: lint_doc
|
||||
@@ -552,7 +598,9 @@ check_compile_tests:
|
||||
|
||||
@if [[ "$(OS)" == "Linux" || "$(OS)" == "Darwin" ]]; then \
|
||||
"$(MAKE)" build_c_api && \
|
||||
./scripts/c_api_tests.sh --build-only; \
|
||||
./scripts/c_api_tests.sh --build-only --forward-compat "$(FORWARD_COMPAT)" && \
|
||||
FORWARD_COMPAT=ON "$(MAKE)" build_c_api && \
|
||||
./scripts/c_api_tests.sh --build-only --forward-compat "$(FORWARD_COMPAT)"; \
|
||||
fi
|
||||
|
||||
.PHONY: build_nodejs_test_docker # Build a docker image with tools to run nodejs tests for wasm API
|
||||
@@ -621,7 +669,7 @@ bench_integer_gpu: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench integer-bench \
|
||||
--features=$(TARGET_ARCH_FEATURE),integer,gpu,internal-keycache,$(AVX512_FEATURE) -p tfhe --
|
||||
--features=$(TARGET_ARCH_FEATURE),integer,gpu,internal-keycache,$(AVX512_FEATURE) -p $(TFHE_SPEC) --
|
||||
|
||||
.PHONY: bench_integer_multi_bit # Run benchmarks for unsigned integer using multi-bit parameters
|
||||
bench_integer_multi_bit: install_rs_check_toolchain
|
||||
@@ -645,7 +693,7 @@ bench_integer_multi_bit_gpu: install_rs_check_toolchain
|
||||
__TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench integer-bench \
|
||||
--features=$(TARGET_ARCH_FEATURE),integer,gpu,internal-keycache,$(AVX512_FEATURE) -p tfhe --
|
||||
--features=$(TARGET_ARCH_FEATURE),integer,gpu,internal-keycache,$(AVX512_FEATURE) -p $(TFHE_SPEC) --
|
||||
|
||||
.PHONY: bench_shortint # Run benchmarks for shortint
|
||||
bench_shortint: install_rs_check_toolchain
|
||||
@@ -659,11 +707,11 @@ bench_oprf: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench oprf-shortint-bench \
|
||||
--features=$(TARGET_ARCH_FEATURE),shortint,internal-keycache,$(AVX512_FEATURE) -p tfhe
|
||||
--features=$(TARGET_ARCH_FEATURE),shortint,internal-keycache,$(AVX512_FEATURE) -p $(TFHE_SPEC)
|
||||
RUSTFLAGS="$(RUSTFLAGS)" \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench oprf-integer-bench \
|
||||
--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache,$(AVX512_FEATURE) -p tfhe
|
||||
--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache,$(AVX512_FEATURE) -p $(TFHE_SPEC)
|
||||
|
||||
|
||||
|
||||
@@ -688,6 +736,12 @@ bench_pbs: install_rs_check_toolchain
|
||||
--bench pbs-bench \
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,internal-keycache,$(AVX512_FEATURE) -p $(TFHE_SPEC)
|
||||
|
||||
.PHONY: bench_pbs_gpu # Run benchmarks for PBS on GPU backend
|
||||
bench_pbs_gpu: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench pbs-bench \
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,gpu,internal-keycache,$(AVX512_FEATURE) -p $(TFHE_SPEC)
|
||||
|
||||
.PHONY: bench_web_js_api_parallel # Run benchmarks for the web wasm api
|
||||
bench_web_js_api_parallel: build_web_js_api_parallel
|
||||
$(MAKE) -C tfhe/web_wasm_parallel_tests bench
|
||||
|
||||
10
README.md
10
README.md
@@ -4,13 +4,17 @@
|
||||
</p>
|
||||
<hr/>
|
||||
<p align="center">
|
||||
<a href="https://docs.zama.ai/tfhe-rs"> 📒 Read documentation</a> | <a href="https://zama.ai/community"> 💛 Community support</a>
|
||||
<a href="https://docs.zama.ai/tfhe-rs"> 📒 Read documentation</a> | <a href="https://zama.ai/community"> 💛 Community support</a> | <a href="https://github.com/zama-ai/awesome-zama"> 📚 FHE resources</a>
|
||||
</p>
|
||||
<p align="center">
|
||||
<!-- Version badge using shields.io -->
|
||||
<a href="https://github.com/zama-ai/tfhe-rs/releases">
|
||||
<img src="https://img.shields.io/github/v/release/zama-ai/tfhe-rs?style=flat-square">
|
||||
</a>
|
||||
<!-- Link to tutorials badge using shields.io -->
|
||||
<a href="#license">
|
||||
<img src="https://img.shields.io/badge/License-BSD--3--Clause--Clear-orange?style=flat-square">
|
||||
</a>
|
||||
<!-- Zama Bounty Program -->
|
||||
<a href="https://github.com/zama-ai/bounty-program">
|
||||
<img src="https://img.shields.io/badge/Contribute-Zama%20Bounty%20Program-yellow?style=flat-square">
|
||||
@@ -138,9 +142,11 @@ libraries.
|
||||
|
||||
## Need support?
|
||||
<a target="_blank" href="https://community.zama.ai">
|
||||
<img src="https://user-images.githubusercontent.com/5758427/231115030-21195b55-2629-4c01-9809-be5059243999.png">
|
||||
<img src="https://github.com/zama-ai/tfhe-rs/assets/157474013/33d856dc-f25d-454b-a010-af12bff2aa7d">
|
||||
</a>
|
||||
|
||||
|
||||
|
||||
## Citing TFHE-rs
|
||||
|
||||
To cite TFHE-rs in academic papers, please use the following entry:
|
||||
|
||||
@@ -146,7 +146,7 @@ use tfhe::prelude::*;
|
||||
use tfhe_trivium::TriviumStreamShortint;
|
||||
|
||||
fn test_shortint() {
|
||||
let config = ConfigBuilder::all_disabled().enable_default_integers().build();
|
||||
let config = ConfigBuilder::default().build();
|
||||
let (hl_client_key, hl_server_key) = generate_keys(config);
|
||||
let (client_key, server_key): (ClientKey, ServerKey) = gen_keys(PARAM_MESSAGE_1_CARRY_1_KS_PBS);
|
||||
let ksk = CastingKey::new((&client_key, &server_key), (&hl_client_key, &hl_server_key));
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "tfhe-cuda-backend"
|
||||
version = "0.1.2"
|
||||
version = "0.1.3"
|
||||
edition = "2021"
|
||||
authors = ["Zama team"]
|
||||
license = "BSD-3-Clause-Clear"
|
||||
|
||||
@@ -34,13 +34,14 @@ The Cuda project held in `tfhe-cuda-backend` can be compiled independently from
|
||||
following way:
|
||||
```
|
||||
git clone git@github.com:zama-ai/tfhe-rs
|
||||
cd backends/tfhe-cuda-backend/implementation
|
||||
cd backends/tfhe-cuda-backend/cuda
|
||||
mkdir build
|
||||
cd build
|
||||
cmake ..
|
||||
make
|
||||
```
|
||||
The compute capability is detected automatically (with the first GPU information) and set accordingly.
|
||||
If your machine does not have an available Nvidia GPU, the compilation will work if you have the nvcc compiler installed. The generated executable will target a 7.0 compute capability (sm_70).
|
||||
|
||||
## Links
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
cmake_minimum_required(VERSION 3.24 FATAL_ERROR)
|
||||
project(tfhe_cuda_backend LANGUAGES CXX CUDA)
|
||||
project(tfhe_cuda_backend LANGUAGES CXX)
|
||||
|
||||
# See if the minimum CUDA version is available. If not, only enable documentation building.
|
||||
set(MINIMUM_SUPPORTED_CUDA_VERSION 10.0)
|
||||
@@ -56,9 +56,10 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler ${OpenMP_CXX_FLAGS}")
|
||||
set(CMAKE_CUDA_ARCHITECTURES native)
|
||||
if(NOT CUDA_NVCC_FLAGS)
|
||||
set(CUDA_NVCC_FLAGS -arch=sm_70)
|
||||
if(${CUDA_SUCCESS})
|
||||
set(CMAKE_CUDA_ARCHITECTURES native)
|
||||
else()
|
||||
set(CMAKE_CUDA_ARCHITECTURES 70)
|
||||
endif()
|
||||
|
||||
# in production, should use -arch=sm_70 --ptxas-options=-v to see register spills -lineinfo for better debugging
|
||||
|
||||
@@ -40,7 +40,7 @@ void cuda_bootstrap_amortized_lwe_ciphertext_vector_32(
|
||||
void *lwe_input_indexes, void *bootstrapping_key, int8_t *pbs_buffer,
|
||||
uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size,
|
||||
uint32_t base_log, uint32_t level_count, uint32_t num_samples,
|
||||
uint32_t num_lut_vectors, uint32_t lwe_idx, uint32_t max_shared_memory);
|
||||
uint32_t num_luts, uint32_t lwe_idx, uint32_t max_shared_memory);
|
||||
|
||||
void cuda_bootstrap_amortized_lwe_ciphertext_vector_64(
|
||||
cuda_stream_t *stream, void *lwe_array_out, void *lwe_output_indexes,
|
||||
@@ -48,7 +48,7 @@ void cuda_bootstrap_amortized_lwe_ciphertext_vector_64(
|
||||
void *lwe_input_indexes, void *bootstrapping_key, int8_t *pbs_buffer,
|
||||
uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size,
|
||||
uint32_t base_log, uint32_t level_count, uint32_t num_samples,
|
||||
uint32_t num_lut_vectors, uint32_t lwe_idx, uint32_t max_shared_memory);
|
||||
uint32_t num_luts, uint32_t lwe_idx, uint32_t max_shared_memory);
|
||||
|
||||
void cleanup_cuda_bootstrap_amortized(cuda_stream_t *stream,
|
||||
int8_t **pbs_buffer);
|
||||
@@ -71,7 +71,7 @@ void cuda_bootstrap_low_latency_lwe_ciphertext_vector_32(
|
||||
void *lwe_input_indexes, void *bootstrapping_key, int8_t *pbs_buffer,
|
||||
uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size,
|
||||
uint32_t base_log, uint32_t level_count, uint32_t num_samples,
|
||||
uint32_t num_lut_vectors, uint32_t lwe_idx, uint32_t max_shared_memory);
|
||||
uint32_t num_luts, uint32_t lwe_idx, uint32_t max_shared_memory);
|
||||
|
||||
void cuda_bootstrap_low_latency_lwe_ciphertext_vector_64(
|
||||
cuda_stream_t *stream, void *lwe_array_out, void *lwe_output_indexes,
|
||||
@@ -79,7 +79,7 @@ void cuda_bootstrap_low_latency_lwe_ciphertext_vector_64(
|
||||
void *lwe_input_indexes, void *bootstrapping_key, int8_t *pbs_buffer,
|
||||
uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size,
|
||||
uint32_t base_log, uint32_t level_count, uint32_t num_samples,
|
||||
uint32_t num_lut_vectors, uint32_t lwe_idx, uint32_t max_shared_memory);
|
||||
uint32_t num_luts, uint32_t lwe_idx, uint32_t max_shared_memory);
|
||||
|
||||
void cleanup_cuda_bootstrap_low_latency(cuda_stream_t *stream,
|
||||
int8_t **pbs_buffer);
|
||||
|
||||
@@ -15,7 +15,7 @@ void cuda_multi_bit_pbs_lwe_ciphertext_vector_64(
|
||||
void *lwe_input_indexes, void *bootstrapping_key, int8_t *pbs_buffer,
|
||||
uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size,
|
||||
uint32_t grouping_factor, uint32_t base_log, uint32_t level_count,
|
||||
uint32_t num_samples, uint32_t num_lut_vectors, uint32_t lwe_idx,
|
||||
uint32_t num_samples, uint32_t num_luts, uint32_t lwe_idx,
|
||||
uint32_t max_shared_memory, uint32_t chunk_size = 0);
|
||||
|
||||
void scratch_cuda_multi_bit_pbs_64(
|
||||
|
||||
@@ -32,34 +32,6 @@ enum COMPARISON_TYPE {
|
||||
};
|
||||
enum IS_RELATIONSHIP { IS_INFERIOR = 0, IS_EQUAL = 1, IS_SUPERIOR = 2 };
|
||||
|
||||
/*
|
||||
* generate bivariate accumulator for device pointer
|
||||
* v_stream - cuda stream
|
||||
* acc - device pointer for bivariate accumulator
|
||||
* ...
|
||||
* f - wrapping function with two Torus inputs
|
||||
*/
|
||||
template <typename Torus>
|
||||
void generate_device_accumulator_bivariate(
|
||||
cuda_stream_t *stream, Torus *acc_bivariate, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t message_modulus, uint32_t carry_modulus,
|
||||
std::function<Torus(Torus, Torus)> f);
|
||||
|
||||
/*
|
||||
* generate univariate accumulator for device pointer
|
||||
* v_stream - cuda stream
|
||||
* acc - device pointer for univariate accumulator
|
||||
* ...
|
||||
* f - evaluating function with one Torus input
|
||||
*/
|
||||
template <typename Torus>
|
||||
void generate_device_accumulator(cuda_stream_t *stream, Torus *acc,
|
||||
uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size,
|
||||
uint32_t message_modulus,
|
||||
uint32_t carry_modulus,
|
||||
std::function<Torus(Torus)> f);
|
||||
|
||||
extern "C" {
|
||||
void scratch_cuda_full_propagation_64(
|
||||
cuda_stream_t *stream, int8_t **mem_ptr, uint32_t lwe_dimension,
|
||||
@@ -226,6 +198,34 @@ void cleanup_cuda_propagate_single_carry_low_latency(cuda_stream_t *stream,
|
||||
int8_t **mem_ptr_void);
|
||||
}
|
||||
|
||||
/*
|
||||
* generate bivariate accumulator (lut) for device pointer
|
||||
* v_stream - cuda stream
|
||||
* acc_bivariate - device pointer for bivariate accumulator
|
||||
* ...
|
||||
* f - wrapping function with two Torus inputs
|
||||
*/
|
||||
template <typename Torus>
|
||||
void generate_device_accumulator_bivariate(
|
||||
cuda_stream_t *stream, Torus *acc_bivariate, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t message_modulus, uint32_t carry_modulus,
|
||||
std::function<Torus(Torus, Torus)> f);
|
||||
|
||||
/*
|
||||
* generate univariate accumulator (lut) for device pointer
|
||||
* v_stream - cuda stream
|
||||
* acc - device pointer for univariate accumulator
|
||||
* ...
|
||||
* f - evaluating function with one Torus input
|
||||
*/
|
||||
template <typename Torus>
|
||||
void generate_device_accumulator(cuda_stream_t *stream, Torus *acc,
|
||||
uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size,
|
||||
uint32_t message_modulus,
|
||||
uint32_t carry_modulus,
|
||||
std::function<Torus(Torus)> f);
|
||||
|
||||
struct int_radix_params {
|
||||
PBS_TYPE pbs_type;
|
||||
uint32_t glwe_dimension;
|
||||
@@ -326,7 +326,7 @@ template <typename Torus> struct int_radix_lut {
|
||||
if (allocate_gpu_memory) {
|
||||
// Allocate LUT
|
||||
// LUT is used as a trivial encryption and must be initialized outside
|
||||
// this contructor
|
||||
// this constructor
|
||||
lut = (Torus *)cuda_malloc_async(num_luts * lut_buffer_size, stream);
|
||||
|
||||
lut_indexes = (Torus *)cuda_malloc_async(lut_indexes_size, stream);
|
||||
@@ -408,7 +408,7 @@ template <typename Torus> struct int_radix_lut {
|
||||
return &lut[ind * (params.glwe_dimension + 1) * params.polynomial_size];
|
||||
}
|
||||
|
||||
Torus *get_tvi(size_t ind) { return &lut_indexes[ind]; }
|
||||
Torus *get_lut_indexes(size_t ind) { return &lut_indexes[ind]; }
|
||||
void release(cuda_stream_t *stream) {
|
||||
cuda_drop_async(lut_indexes, stream);
|
||||
cuda_drop_async(lwe_indexes, stream);
|
||||
@@ -437,10 +437,10 @@ template <typename Torus> struct int_sc_prop_memory {
|
||||
Torus *generates_or_propagates;
|
||||
Torus *step_output;
|
||||
|
||||
// test_vector_array[2] = {lut_does_block_generate_carry,
|
||||
// luts_array[2] = {lut_does_block_generate_carry,
|
||||
// lut_does_block_generate_or_propagate}
|
||||
int_radix_lut<Torus> *test_vector_array;
|
||||
int_radix_lut<Torus> *lut_carry_propagation_sum;
|
||||
int_radix_lut<Torus> *luts_array;
|
||||
int_radix_lut<Torus> *luts_carry_propagation_sum;
|
||||
int_radix_lut<Torus> *message_acc;
|
||||
|
||||
int_radix_params params;
|
||||
@@ -461,7 +461,7 @@ template <typename Torus> struct int_sc_prop_memory {
|
||||
step_output = (Torus *)cuda_malloc_async(
|
||||
num_radix_blocks * big_lwe_size_bytes, stream);
|
||||
|
||||
// declare functions for test vector generation
|
||||
// declare functions for lut generation
|
||||
auto f_lut_does_block_generate_carry = [message_modulus](Torus x) -> Torus {
|
||||
if (x >= message_modulus)
|
||||
return OUTPUT_CARRY::GENERATED;
|
||||
@@ -477,7 +477,7 @@ template <typename Torus> struct int_sc_prop_memory {
|
||||
return OUTPUT_CARRY::NONE;
|
||||
};
|
||||
|
||||
auto f_lut_carry_propagation_sum = [](Torus msb, Torus lsb) -> Torus {
|
||||
auto f_luts_carry_propagation_sum = [](Torus msb, Torus lsb) -> Torus {
|
||||
if (msb == OUTPUT_CARRY::PROPAGATED)
|
||||
return lsb;
|
||||
return msb;
|
||||
@@ -487,18 +487,18 @@ template <typename Torus> struct int_sc_prop_memory {
|
||||
return x % message_modulus;
|
||||
};
|
||||
|
||||
// create test vector objects
|
||||
test_vector_array = new int_radix_lut<Torus>(
|
||||
// create lut objects
|
||||
luts_array = new int_radix_lut<Torus>(
|
||||
stream, params, 2, num_radix_blocks, allocate_gpu_memory);
|
||||
lut_carry_propagation_sum = new struct int_radix_lut<Torus>(
|
||||
luts_carry_propagation_sum = new struct int_radix_lut<Torus>(
|
||||
stream, params, 1, num_radix_blocks, allocate_gpu_memory);
|
||||
message_acc = new struct int_radix_lut<Torus>(
|
||||
stream, params, 1, num_radix_blocks, allocate_gpu_memory);
|
||||
|
||||
auto lut_does_block_generate_carry = test_vector_array->get_lut(0);
|
||||
auto lut_does_block_generate_or_propagate = test_vector_array->get_lut(1);
|
||||
auto lut_does_block_generate_carry = luts_array->get_lut(0);
|
||||
auto lut_does_block_generate_or_propagate = luts_array->get_lut(1);
|
||||
|
||||
// generate test vectors
|
||||
// generate luts (aka accumulators)
|
||||
generate_device_accumulator<Torus>(
|
||||
stream, lut_does_block_generate_carry, glwe_dimension, polynomial_size,
|
||||
message_modulus, carry_modulus, f_lut_does_block_generate_carry);
|
||||
@@ -507,12 +507,12 @@ template <typename Torus> struct int_sc_prop_memory {
|
||||
polynomial_size, message_modulus, carry_modulus,
|
||||
f_lut_does_block_generate_or_propagate);
|
||||
cuda_set_value_async<Torus>(&(stream->stream),
|
||||
test_vector_array->get_tvi(1), 1,
|
||||
luts_array->get_lut_indexes(1), 1,
|
||||
num_radix_blocks - 1);
|
||||
|
||||
generate_device_accumulator_bivariate<Torus>(
|
||||
stream, lut_carry_propagation_sum->lut, glwe_dimension, polynomial_size,
|
||||
message_modulus, carry_modulus, f_lut_carry_propagation_sum);
|
||||
stream, luts_carry_propagation_sum->lut, glwe_dimension, polynomial_size,
|
||||
message_modulus, carry_modulus, f_luts_carry_propagation_sum);
|
||||
|
||||
generate_device_accumulator<Torus>(stream, message_acc->lut, glwe_dimension,
|
||||
polynomial_size, message_modulus,
|
||||
@@ -523,12 +523,12 @@ template <typename Torus> struct int_sc_prop_memory {
|
||||
cuda_drop_async(generates_or_propagates, stream);
|
||||
cuda_drop_async(step_output, stream);
|
||||
|
||||
test_vector_array->release(stream);
|
||||
lut_carry_propagation_sum->release(stream);
|
||||
luts_array->release(stream);
|
||||
luts_carry_propagation_sum->release(stream);
|
||||
message_acc->release(stream);
|
||||
|
||||
delete test_vector_array;
|
||||
delete lut_carry_propagation_sum;
|
||||
delete luts_array;
|
||||
delete luts_carry_propagation_sum;
|
||||
delete message_acc;
|
||||
}
|
||||
};
|
||||
@@ -538,9 +538,9 @@ template <typename Torus> struct int_mul_memory {
|
||||
Torus *block_mul_res;
|
||||
Torus *small_lwe_vector;
|
||||
Torus *lwe_pbs_out_array;
|
||||
int_radix_lut<Torus> *test_vector_array; // lsb msb
|
||||
int_radix_lut<Torus> *test_vector_message;
|
||||
int_radix_lut<Torus> *test_vector_carry;
|
||||
int_radix_lut<Torus> *luts_array; // lsb msb
|
||||
int_radix_lut<Torus> *luts_message;
|
||||
int_radix_lut<Torus> *luts_carry;
|
||||
int_sc_prop_memory<Torus> *scp_mem;
|
||||
int_radix_params params;
|
||||
|
||||
@@ -583,18 +583,18 @@ template <typename Torus> struct int_mul_memory {
|
||||
stream);
|
||||
|
||||
// create int_radix_lut objects for lsb, msb, message, carry
|
||||
// test_vector_array -> lut = {lsb_acc, msb_acc}
|
||||
test_vector_array = new int_radix_lut<Torus>(
|
||||
// luts_array -> lut = {lsb_acc, msb_acc}
|
||||
luts_array = new int_radix_lut<Torus>(
|
||||
stream, params, 2, total_block_count, allocate_gpu_memory);
|
||||
test_vector_message = new int_radix_lut<Torus>(
|
||||
stream, params, 1, total_block_count, test_vector_array);
|
||||
test_vector_carry = new int_radix_lut<Torus>(
|
||||
stream, params, 1, total_block_count, test_vector_array);
|
||||
luts_message = new int_radix_lut<Torus>(
|
||||
stream, params, 1, total_block_count, luts_array);
|
||||
luts_carry = new int_radix_lut<Torus>(
|
||||
stream, params, 1, total_block_count, luts_array);
|
||||
|
||||
auto lsb_acc = test_vector_array->get_lut(0);
|
||||
auto msb_acc = test_vector_array->get_lut(1);
|
||||
auto message_acc = test_vector_message->get_lut(0);
|
||||
auto carry_acc = test_vector_carry->get_lut(0);
|
||||
auto lsb_acc = luts_array->get_lut(0);
|
||||
auto msb_acc = luts_array->get_lut(1);
|
||||
auto message_acc = luts_message->get_lut(0);
|
||||
auto carry_acc = luts_carry->get_lut(0);
|
||||
|
||||
// define functions for each accumulator
|
||||
auto lut_f_lsb = [message_modulus](Torus x, Torus y) -> Torus {
|
||||
@@ -624,12 +624,12 @@ template <typename Torus> struct int_mul_memory {
|
||||
stream, msb_acc, glwe_dimension, polynomial_size, message_modulus,
|
||||
carry_modulus, lut_f_msb);
|
||||
|
||||
// tvi for test_vector_array should be reinitialized
|
||||
// lut_indexes for luts_array should be reinitialized
|
||||
// first lsb_vector_block_count value should reference to lsb_acc
|
||||
// last msb_vector_block_count values should reference to msb_acc
|
||||
// for message and carry default tvi is fine
|
||||
// for message and carry default lut_indexes is fine
|
||||
cuda_set_value_async<Torus>(
|
||||
&(stream->stream), test_vector_array->get_tvi(lsb_vector_block_count),
|
||||
&(stream->stream), luts_array->get_lut_indexes(lsb_vector_block_count),
|
||||
1, msb_vector_block_count);
|
||||
}
|
||||
|
||||
@@ -639,15 +639,15 @@ template <typename Torus> struct int_mul_memory {
|
||||
cuda_drop_async(small_lwe_vector, stream);
|
||||
cuda_drop_async(lwe_pbs_out_array, stream);
|
||||
|
||||
test_vector_array->release(stream);
|
||||
test_vector_message->release(stream);
|
||||
test_vector_carry->release(stream);
|
||||
luts_array->release(stream);
|
||||
luts_message->release(stream);
|
||||
luts_carry->release(stream);
|
||||
|
||||
scp_mem->release(stream);
|
||||
|
||||
delete test_vector_array;
|
||||
delete test_vector_message;
|
||||
delete test_vector_carry;
|
||||
delete luts_array;
|
||||
delete luts_message;
|
||||
delete luts_carry;
|
||||
|
||||
delete scp_mem;
|
||||
}
|
||||
@@ -681,12 +681,12 @@ template <typename Torus> struct int_shift_buffer {
|
||||
// LUT
|
||||
// pregenerate lut vector and indexes
|
||||
// lut for left shift
|
||||
// here we generate 'num_bits_in_block' times test_vector
|
||||
// here we generate 'num_bits_in_block' times lut
|
||||
// one for each 'shift_within_block' = 'shift' % 'num_bits_in_block'
|
||||
// even though test_vector_left contains 'num_bits_in_block' lut
|
||||
// tvi will have indexes for single lut only and those indexes will be 0
|
||||
// even though lut_left contains 'num_bits_in_block' lut
|
||||
// lut_indexes will have indexes for single lut only and those indexes will be 0
|
||||
// it means for pbs corresponding lut should be selected and pass along
|
||||
// tvi filled with zeros
|
||||
// lut_indexes filled with zeros
|
||||
|
||||
// calculate bivariate lut for each 'shift_within_block'
|
||||
for (int s_w_b = 1; s_w_b < num_bits_in_block; s_w_b++) {
|
||||
@@ -738,11 +738,11 @@ template <typename Torus> struct int_shift_buffer {
|
||||
lut_buffers_bivariate.push_back(cur_lut_bivariate);
|
||||
}
|
||||
|
||||
// here we generate 'message_modulus' times test_vector
|
||||
// here we generate 'message_modulus' times lut
|
||||
// one for each 'shift'
|
||||
// tvi will have indexes for single lut only and those indexes will be 0
|
||||
// lut_indexes will have indexes for single lut only and those indexes will be 0
|
||||
// it means for pbs corresponding lut should be selected and pass along
|
||||
// tvi filled with zeros
|
||||
// lut_indexes filled with zeros
|
||||
|
||||
// calculate lut for each 'shift'
|
||||
for (int shift = 0; shift < params.message_modulus; shift++) {
|
||||
|
||||
@@ -13,10 +13,6 @@ set(SOURCES
|
||||
${CMAKE_SOURCE_DIR}/${INCLUDE_DIR}/vertical_packing.h)
|
||||
file(GLOB_RECURSE SOURCES "*.cu")
|
||||
add_library(tfhe_cuda_backend STATIC ${SOURCES})
|
||||
set_target_properties(
|
||||
tfhe_cuda_backend
|
||||
PROPERTIES CUDA_SEPARABLE_COMPILATION ON
|
||||
CUDA_RESOLVE_DEVICE_SYMBOLS ON
|
||||
CUDA_ARCHITECTURES native)
|
||||
set_target_properties(tfhe_cuda_backend PROPERTIES CUDA_SEPARABLE_COMPILATION ON CUDA_RESOLVE_DEVICE_SYMBOLS ON)
|
||||
target_link_libraries(tfhe_cuda_backend PUBLIC cudart OpenMP::OpenMP_CXX)
|
||||
target_include_directories(tfhe_cuda_backend PRIVATE .)
|
||||
|
||||
@@ -22,7 +22,7 @@ void execute_pbs(cuda_stream_t *stream, Torus *lwe_array_out,
|
||||
uint32_t lwe_dimension, uint32_t polynomial_size,
|
||||
uint32_t base_log, uint32_t level_count,
|
||||
uint32_t grouping_factor, uint32_t input_lwe_ciphertext_count,
|
||||
uint32_t num_lut_vectors, uint32_t lwe_idx,
|
||||
uint32_t num_luts, uint32_t lwe_idx,
|
||||
uint32_t max_shared_memory, PBS_TYPE pbs_type) {
|
||||
if (sizeof(Torus) == sizeof(uint32_t)) {
|
||||
// 32 bits
|
||||
@@ -37,7 +37,7 @@ void execute_pbs(cuda_stream_t *stream, Torus *lwe_array_out,
|
||||
lut_vector_indexes, lwe_array_in, lwe_input_indexes,
|
||||
bootstrapping_key, pbs_buffer, lwe_dimension, glwe_dimension,
|
||||
polynomial_size, base_log, level_count, input_lwe_ciphertext_count,
|
||||
num_lut_vectors, lwe_idx, max_shared_memory);
|
||||
num_luts, lwe_idx, max_shared_memory);
|
||||
break;
|
||||
case AMORTIZED:
|
||||
cuda_bootstrap_amortized_lwe_ciphertext_vector_32(
|
||||
@@ -45,7 +45,7 @@ void execute_pbs(cuda_stream_t *stream, Torus *lwe_array_out,
|
||||
lut_vector_indexes, lwe_array_in, lwe_input_indexes,
|
||||
bootstrapping_key, pbs_buffer, lwe_dimension, glwe_dimension,
|
||||
polynomial_size, base_log, level_count, input_lwe_ciphertext_count,
|
||||
num_lut_vectors, lwe_idx, max_shared_memory);
|
||||
num_luts, lwe_idx, max_shared_memory);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
@@ -59,7 +59,7 @@ void execute_pbs(cuda_stream_t *stream, Torus *lwe_array_out,
|
||||
lut_vector_indexes, lwe_array_in, lwe_input_indexes,
|
||||
bootstrapping_key, pbs_buffer, lwe_dimension, glwe_dimension,
|
||||
polynomial_size, grouping_factor, base_log, level_count,
|
||||
input_lwe_ciphertext_count, num_lut_vectors, lwe_idx,
|
||||
input_lwe_ciphertext_count, num_luts, lwe_idx,
|
||||
max_shared_memory);
|
||||
break;
|
||||
case LOW_LAT:
|
||||
@@ -68,7 +68,7 @@ void execute_pbs(cuda_stream_t *stream, Torus *lwe_array_out,
|
||||
lut_vector_indexes, lwe_array_in, lwe_input_indexes,
|
||||
bootstrapping_key, pbs_buffer, lwe_dimension, glwe_dimension,
|
||||
polynomial_size, base_log, level_count, input_lwe_ciphertext_count,
|
||||
num_lut_vectors, lwe_idx, max_shared_memory);
|
||||
num_luts, lwe_idx, max_shared_memory);
|
||||
break;
|
||||
case AMORTIZED:
|
||||
cuda_bootstrap_amortized_lwe_ciphertext_vector_64(
|
||||
@@ -76,7 +76,7 @@ void execute_pbs(cuda_stream_t *stream, Torus *lwe_array_out,
|
||||
lut_vector_indexes, lwe_array_in, lwe_input_indexes,
|
||||
bootstrapping_key, pbs_buffer, lwe_dimension, glwe_dimension,
|
||||
polynomial_size, base_log, level_count, input_lwe_ciphertext_count,
|
||||
num_lut_vectors, lwe_idx, max_shared_memory);
|
||||
num_luts, lwe_idx, max_shared_memory);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
@@ -303,7 +303,7 @@ void generate_device_accumulator_bivariate(
|
||||
generate_lookup_table_bivariate<Torus>(h_lut, glwe_dimension, polynomial_size,
|
||||
message_modulus, carry_modulus, f);
|
||||
|
||||
// copy host lut and tvi to device
|
||||
// copy host lut and lut_indexes to device
|
||||
cuda_memcpy_async_to_gpu(
|
||||
acc_bivariate, h_lut,
|
||||
(glwe_dimension + 1) * polynomial_size * sizeof(Torus), stream);
|
||||
@@ -335,7 +335,7 @@ void generate_device_accumulator(cuda_stream_t *stream, Torus *acc,
|
||||
generate_lookup_table<Torus>(h_lut, glwe_dimension, polynomial_size,
|
||||
message_modulus, carry_modulus, f);
|
||||
|
||||
// copy host lut and tvi to device
|
||||
// copy host lut and lut_indexes to device
|
||||
cuda_memcpy_async_to_gpu(
|
||||
acc, h_lut, (glwe_dimension + 1) * polynomial_size * sizeof(Torus),
|
||||
stream);
|
||||
@@ -370,13 +370,13 @@ void host_propagate_single_carry_low_latency(cuda_stream_t *stream,
|
||||
auto generates_or_propagates = mem->generates_or_propagates;
|
||||
auto step_output = mem->step_output;
|
||||
|
||||
auto test_vector_array = mem->test_vector_array;
|
||||
auto lut_carry_propagation_sum = mem->lut_carry_propagation_sum;
|
||||
auto luts_array = mem->luts_array;
|
||||
auto luts_carry_propagation_sum = mem->luts_carry_propagation_sum;
|
||||
auto message_acc = mem->message_acc;
|
||||
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
stream, generates_or_propagates, lwe_array, bsk, ksk, num_blocks,
|
||||
test_vector_array);
|
||||
luts_array);
|
||||
|
||||
// compute prefix sum with hillis&steele
|
||||
|
||||
@@ -392,7 +392,7 @@ void host_propagate_single_carry_low_latency(cuda_stream_t *stream,
|
||||
|
||||
integer_radix_apply_bivariate_lookup_table_kb<Torus>(
|
||||
stream, cur_blocks, cur_blocks, prev_blocks, bsk, ksk, cur_total_blocks,
|
||||
lut_carry_propagation_sum);
|
||||
luts_carry_propagation_sum);
|
||||
|
||||
cuda_memcpy_async_gpu_to_gpu(&generates_or_propagates[space * big_lwe_size],
|
||||
cur_blocks,
|
||||
@@ -414,7 +414,7 @@ void host_propagate_single_carry_low_latency(cuda_stream_t *stream,
|
||||
/*
|
||||
* input_blocks: input radix ciphertext propagation will happen inplace
|
||||
* acc_message_carry: list of two lut s, [(message_acc), (carry_acc)]
|
||||
* tvi_message_carry: tvi for message and carry, should always be {0, 1}
|
||||
* lut_indexes_message_carry: lut_indexes for message and carry, should always be {0, 1}
|
||||
* small_lwe_vector: output of keyswitch should have
|
||||
* size = 2 * (lwe_dimension + 1) * sizeof(Torus)
|
||||
* big_lwe_vector: output of pbs should have
|
||||
|
||||
@@ -282,21 +282,21 @@ __host__ void host_integer_mult_radix_kb(
|
||||
// glwe_dimension * polynomial_size + 1 coefficients
|
||||
auto lwe_pbs_out_array = mem_ptr->lwe_pbs_out_array;
|
||||
|
||||
// it contains two test vector, first for lsb extraction,
|
||||
// it contains two lut, first for lsb extraction,
|
||||
// second for msb extraction, with total length =
|
||||
// 2 * (glwe_dimension + 1) * polynomial_size
|
||||
auto test_vector_array = mem_ptr->test_vector_array;
|
||||
auto luts_array = mem_ptr->luts_array;
|
||||
|
||||
// accumulator to extract message
|
||||
// with length (glwe_dimension + 1) * polynomial_size
|
||||
auto test_vector_message = mem_ptr->test_vector_message;
|
||||
auto luts_message = mem_ptr->luts_message;
|
||||
|
||||
// accumulator to extract carry
|
||||
// with length (glwe_dimension + 1) * polynomial_size
|
||||
auto test_vector_carry = mem_ptr->test_vector_carry;
|
||||
auto luts_carry = mem_ptr->luts_carry;
|
||||
|
||||
// to be used as default indexing
|
||||
auto lwe_indexes = test_vector_array->lwe_indexes;
|
||||
auto lwe_indexes = luts_array->lwe_indexes;
|
||||
|
||||
auto vector_result_lsb = &vector_result_sb[0];
|
||||
auto vector_result_msb =
|
||||
@@ -316,7 +316,7 @@ __host__ void host_integer_mult_radix_kb(
|
||||
|
||||
integer_radix_apply_bivariate_lookup_table_kb<Torus>(
|
||||
stream, block_mul_res, block_mul_res, vector_result_sb, bsk, ksk,
|
||||
total_block_count, test_vector_array);
|
||||
total_block_count, luts_array);
|
||||
|
||||
vector_result_lsb = &block_mul_res[0];
|
||||
vector_result_msb = &block_mul_res[lsb_vector_block_count *
|
||||
@@ -409,17 +409,17 @@ __host__ void host_integer_mult_radix_kb(
|
||||
mem_ptr->params.ks_base_log, mem_ptr->params.ks_level, total_copied);
|
||||
|
||||
execute_pbs<Torus>(
|
||||
stream, message_blocks_vector, lwe_indexes, test_vector_message->lut,
|
||||
test_vector_message->lut_indexes, small_lwe_vector, lwe_indexes, bsk,
|
||||
test_vector_message->pbs_buffer, glwe_dimension, lwe_dimension,
|
||||
stream, message_blocks_vector, lwe_indexes, luts_message->lut,
|
||||
luts_message->lut_indexes, small_lwe_vector, lwe_indexes, bsk,
|
||||
luts_message->pbs_buffer, glwe_dimension, lwe_dimension,
|
||||
polynomial_size, mem_ptr->params.pbs_base_log,
|
||||
mem_ptr->params.pbs_level, mem_ptr->params.grouping_factor,
|
||||
message_count, 1, 0, max_shared_memory, mem_ptr->params.pbs_type);
|
||||
|
||||
execute_pbs<Torus>(stream, carry_blocks_vector, lwe_indexes,
|
||||
test_vector_carry->lut, test_vector_carry->lut_indexes,
|
||||
luts_carry->lut, luts_carry->lut_indexes,
|
||||
&small_lwe_vector[message_count * (lwe_dimension + 1)],
|
||||
lwe_indexes, bsk, test_vector_carry->pbs_buffer,
|
||||
lwe_indexes, bsk, luts_carry->pbs_buffer,
|
||||
glwe_dimension, lwe_dimension, polynomial_size,
|
||||
mem_ptr->params.pbs_base_log, mem_ptr->params.pbs_level,
|
||||
mem_ptr->params.grouping_factor, carry_count, 1, 0,
|
||||
@@ -455,10 +455,10 @@ __host__ void host_integer_mult_radix_kb(
|
||||
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
stream, vector_result_sb, radix_lwe_out, bsk, ksk, num_blocks,
|
||||
test_vector_message);
|
||||
luts_message);
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
stream, &block_mul_res[big_lwe_size], radix_lwe_out, bsk, ksk, num_blocks,
|
||||
test_vector_carry);
|
||||
luts_carry);
|
||||
|
||||
cuda_memset_async(block_mul_res, 0, big_lwe_size * sizeof(Torus), stream);
|
||||
|
||||
@@ -544,16 +544,16 @@ void apply_lookup_table(Torus *input_ciphertexts, Torus *output_ciphertexts,
|
||||
// when message and carry have tobe extracted
|
||||
// for first message_count blocks we need message_acc
|
||||
// for last carry_count blocks we need carry_acc
|
||||
Torus *cur_tvi;
|
||||
Torus *cur_lut_indexes;
|
||||
if (lsb_msb_mode) {
|
||||
cur_tvi = (big_lwe_start_index < lsb_message_blocks_count)
|
||||
? mem_ptr->tvi_lsb_multi_gpu[i]
|
||||
: mem_ptr->tvi_msb_multi_gpu[i];
|
||||
cur_lut_indexes = (big_lwe_start_index < lsb_message_blocks_count)
|
||||
? mem_ptr->lut_indexes_lsb_multi_gpu[i]
|
||||
: mem_ptr->lut_indexes_msb_multi_gpu[i];
|
||||
|
||||
} else {
|
||||
cur_tvi = (big_lwe_start_index < lsb_message_blocks_count)
|
||||
? mem_ptr->tvi_message_multi_gpu[i]
|
||||
: mem_ptr->tvi_carry_multi_gpu[i];
|
||||
cur_lut_indexes = (big_lwe_start_index < lsb_message_blocks_count)
|
||||
? mem_ptr->lut_indexes_message_multi_gpu[i]
|
||||
: mem_ptr->lut_indexes_carry_multi_gpu[i];
|
||||
}
|
||||
|
||||
// execute keyswitch on a current gpu with corresponding input and output
|
||||
@@ -568,7 +568,7 @@ void apply_lookup_table(Torus *input_ciphertexts, Torus *output_ciphertexts,
|
||||
// execute pbs on a current gpu with corresponding input and output
|
||||
cuda_multi_bit_pbs_lwe_ciphertext_vector_64(
|
||||
this_stream, i, mem_ptr->pbs_output_multi_gpu[i],
|
||||
mem_ptr->test_vector_multi_gpu[i], cur_tvi,
|
||||
mem_ptr->lut_multi_gpu[i], cur_lut_indexes,
|
||||
mem_ptr->pbs_input_multi_gpu[i], mem_ptr->bsk_multi_gpu[i],
|
||||
mem_ptr->pbs_buffer_multi_gpu[i], lwe_dimension, glwe_dimension,
|
||||
polynomial_size, grouping_factor, pbs_base_log, pbs_level,
|
||||
|
||||
@@ -148,7 +148,7 @@ void cuda_bootstrap_amortized_lwe_ciphertext_vector_32(
|
||||
void *lwe_input_indexes, void *bootstrapping_key, int8_t *pbs_buffer,
|
||||
uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size,
|
||||
uint32_t base_log, uint32_t level_count, uint32_t num_samples,
|
||||
uint32_t num_lut_vectors, uint32_t lwe_idx, uint32_t max_shared_memory) {
|
||||
uint32_t num_luts, uint32_t lwe_idx, uint32_t max_shared_memory) {
|
||||
|
||||
checks_bootstrap_amortized(32, base_log, polynomial_size);
|
||||
|
||||
@@ -159,7 +159,7 @@ void cuda_bootstrap_amortized_lwe_ciphertext_vector_32(
|
||||
(uint32_t *)lut_vector, (uint32_t *)lut_vector_indexes,
|
||||
(uint32_t *)lwe_array_in, (uint32_t *)lwe_input_indexes,
|
||||
(double2 *)bootstrapping_key, pbs_buffer, glwe_dimension, lwe_dimension,
|
||||
polynomial_size, base_log, level_count, num_samples, num_lut_vectors,
|
||||
polynomial_size, base_log, level_count, num_samples, num_luts,
|
||||
lwe_idx, max_shared_memory);
|
||||
break;
|
||||
case 512:
|
||||
@@ -168,7 +168,7 @@ void cuda_bootstrap_amortized_lwe_ciphertext_vector_32(
|
||||
(uint32_t *)lut_vector, (uint32_t *)lut_vector_indexes,
|
||||
(uint32_t *)lwe_array_in, (uint32_t *)lwe_input_indexes,
|
||||
(double2 *)bootstrapping_key, pbs_buffer, glwe_dimension, lwe_dimension,
|
||||
polynomial_size, base_log, level_count, num_samples, num_lut_vectors,
|
||||
polynomial_size, base_log, level_count, num_samples, num_luts,
|
||||
lwe_idx, max_shared_memory);
|
||||
break;
|
||||
case 1024:
|
||||
@@ -177,7 +177,7 @@ void cuda_bootstrap_amortized_lwe_ciphertext_vector_32(
|
||||
(uint32_t *)lut_vector, (uint32_t *)lut_vector_indexes,
|
||||
(uint32_t *)lwe_array_in, (uint32_t *)lwe_input_indexes,
|
||||
(double2 *)bootstrapping_key, pbs_buffer, glwe_dimension, lwe_dimension,
|
||||
polynomial_size, base_log, level_count, num_samples, num_lut_vectors,
|
||||
polynomial_size, base_log, level_count, num_samples, num_luts,
|
||||
lwe_idx, max_shared_memory);
|
||||
break;
|
||||
case 2048:
|
||||
@@ -186,7 +186,7 @@ void cuda_bootstrap_amortized_lwe_ciphertext_vector_32(
|
||||
(uint32_t *)lut_vector, (uint32_t *)lut_vector_indexes,
|
||||
(uint32_t *)lwe_array_in, (uint32_t *)lwe_input_indexes,
|
||||
(double2 *)bootstrapping_key, pbs_buffer, glwe_dimension, lwe_dimension,
|
||||
polynomial_size, base_log, level_count, num_samples, num_lut_vectors,
|
||||
polynomial_size, base_log, level_count, num_samples, num_luts,
|
||||
lwe_idx, max_shared_memory);
|
||||
break;
|
||||
case 4096:
|
||||
@@ -195,7 +195,7 @@ void cuda_bootstrap_amortized_lwe_ciphertext_vector_32(
|
||||
(uint32_t *)lut_vector, (uint32_t *)lut_vector_indexes,
|
||||
(uint32_t *)lwe_array_in, (uint32_t *)lwe_input_indexes,
|
||||
(double2 *)bootstrapping_key, pbs_buffer, glwe_dimension, lwe_dimension,
|
||||
polynomial_size, base_log, level_count, num_samples, num_lut_vectors,
|
||||
polynomial_size, base_log, level_count, num_samples, num_luts,
|
||||
lwe_idx, max_shared_memory);
|
||||
break;
|
||||
case 8192:
|
||||
@@ -204,7 +204,7 @@ void cuda_bootstrap_amortized_lwe_ciphertext_vector_32(
|
||||
(uint32_t *)lut_vector, (uint32_t *)lut_vector_indexes,
|
||||
(uint32_t *)lwe_array_in, (uint32_t *)lwe_input_indexes,
|
||||
(double2 *)bootstrapping_key, pbs_buffer, glwe_dimension, lwe_dimension,
|
||||
polynomial_size, base_log, level_count, num_samples, num_lut_vectors,
|
||||
polynomial_size, base_log, level_count, num_samples, num_luts,
|
||||
lwe_idx, max_shared_memory);
|
||||
break;
|
||||
case 16384:
|
||||
@@ -213,7 +213,7 @@ void cuda_bootstrap_amortized_lwe_ciphertext_vector_32(
|
||||
(uint32_t *)lut_vector, (uint32_t *)lut_vector_indexes,
|
||||
(uint32_t *)lwe_array_in, (uint32_t *)lwe_input_indexes,
|
||||
(double2 *)bootstrapping_key, pbs_buffer, glwe_dimension, lwe_dimension,
|
||||
polynomial_size, base_log, level_count, num_samples, num_lut_vectors,
|
||||
polynomial_size, base_log, level_count, num_samples, num_luts,
|
||||
lwe_idx, max_shared_memory);
|
||||
break;
|
||||
default:
|
||||
@@ -228,11 +228,11 @@ void cuda_bootstrap_amortized_lwe_ciphertext_vector_32(
|
||||
* - `gpu_index` is the index of the GPU to be used in the kernel launch
|
||||
* - lwe_array_out: output batch of num_samples bootstrapped ciphertexts c =
|
||||
* (a0,..an-1,b) where n is the LWE dimension
|
||||
* - lut_vector: should hold as many test vectors of size polynomial_size
|
||||
* - lut_vector: should hold as many luts of size polynomial_size
|
||||
* as there are input ciphertexts, but actually holds
|
||||
* num_lut_vectors vectors to reduce memory usage
|
||||
* num_luts vectors to reduce memory usage
|
||||
* - lut_vector_indexes: stores the index corresponding to
|
||||
* which test vector of lut_vector to use for each LWE input in
|
||||
* which lut of lut_vector to use for each LWE input in
|
||||
* lwe_array_in
|
||||
* - lwe_array_in: input batch of num_samples LWE ciphertexts, containing n
|
||||
* mask values + 1 body value
|
||||
@@ -244,17 +244,17 @@ void cuda_bootstrap_amortized_lwe_ciphertext_vector_32(
|
||||
* bsk is thus a tensor of size (k+1)^2.l.N.n
|
||||
* where l is the number of decomposition levels and
|
||||
* k is the GLWE dimension, N is the polynomial size for
|
||||
* GLWE. The polynomial size for GLWE and the test vector
|
||||
* GLWE. The polynomial size for GLWE and the lut
|
||||
* are the same because they have to be in the same ring
|
||||
* to be multiplied.
|
||||
* - input_lwe_dimension: size of the Torus vector used to encrypt the input
|
||||
* LWE ciphertexts - referred to as n above (~ 600)
|
||||
* - polynomial_size: size of the test polynomial (test vector) and size of the
|
||||
* - polynomial_size: size of the test polynomial (lut) and size of the
|
||||
* GLWE polynomials (~1024) (where `size` refers to the polynomial degree + 1).
|
||||
* - base_log: log of the base used for the gadget matrix - B = 2^base_log (~8)
|
||||
* - level_count: number of decomposition levels in the gadget matrix (~4)
|
||||
* - num_samples: number of encrypted input messages
|
||||
* - num_lut_vectors: parameter to set the actual number of test vectors to be
|
||||
* - num_luts: parameter to set the actual number of luts to be
|
||||
* used
|
||||
* - lwe_idx: the index of the LWE input to consider for the GPU of index
|
||||
* gpu_index. In case of multi-GPU computing, it is assumed that only a part of
|
||||
@@ -292,7 +292,7 @@ void cuda_bootstrap_amortized_lwe_ciphertext_vector_64(
|
||||
void *lwe_input_indexes, void *bootstrapping_key, int8_t *pbs_buffer,
|
||||
uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size,
|
||||
uint32_t base_log, uint32_t level_count, uint32_t num_samples,
|
||||
uint32_t num_lut_vectors, uint32_t lwe_idx, uint32_t max_shared_memory) {
|
||||
uint32_t num_luts, uint32_t lwe_idx, uint32_t max_shared_memory) {
|
||||
|
||||
checks_bootstrap_amortized(64, base_log, polynomial_size);
|
||||
|
||||
@@ -303,7 +303,7 @@ void cuda_bootstrap_amortized_lwe_ciphertext_vector_64(
|
||||
(uint64_t *)lut_vector, (uint64_t *)lut_vector_indexes,
|
||||
(uint64_t *)lwe_array_in, (uint64_t *)lwe_input_indexes,
|
||||
(double2 *)bootstrapping_key, pbs_buffer, glwe_dimension, lwe_dimension,
|
||||
polynomial_size, base_log, level_count, num_samples, num_lut_vectors,
|
||||
polynomial_size, base_log, level_count, num_samples, num_luts,
|
||||
lwe_idx, max_shared_memory);
|
||||
break;
|
||||
case 512:
|
||||
@@ -312,7 +312,7 @@ void cuda_bootstrap_amortized_lwe_ciphertext_vector_64(
|
||||
(uint64_t *)lut_vector, (uint64_t *)lut_vector_indexes,
|
||||
(uint64_t *)lwe_array_in, (uint64_t *)lwe_input_indexes,
|
||||
(double2 *)bootstrapping_key, pbs_buffer, glwe_dimension, lwe_dimension,
|
||||
polynomial_size, base_log, level_count, num_samples, num_lut_vectors,
|
||||
polynomial_size, base_log, level_count, num_samples, num_luts,
|
||||
lwe_idx, max_shared_memory);
|
||||
break;
|
||||
case 1024:
|
||||
@@ -321,7 +321,7 @@ void cuda_bootstrap_amortized_lwe_ciphertext_vector_64(
|
||||
(uint64_t *)lut_vector, (uint64_t *)lut_vector_indexes,
|
||||
(uint64_t *)lwe_array_in, (uint64_t *)lwe_input_indexes,
|
||||
(double2 *)bootstrapping_key, pbs_buffer, glwe_dimension, lwe_dimension,
|
||||
polynomial_size, base_log, level_count, num_samples, num_lut_vectors,
|
||||
polynomial_size, base_log, level_count, num_samples, num_luts,
|
||||
lwe_idx, max_shared_memory);
|
||||
break;
|
||||
case 2048:
|
||||
@@ -330,7 +330,7 @@ void cuda_bootstrap_amortized_lwe_ciphertext_vector_64(
|
||||
(uint64_t *)lut_vector, (uint64_t *)lut_vector_indexes,
|
||||
(uint64_t *)lwe_array_in, (uint64_t *)lwe_input_indexes,
|
||||
(double2 *)bootstrapping_key, pbs_buffer, glwe_dimension, lwe_dimension,
|
||||
polynomial_size, base_log, level_count, num_samples, num_lut_vectors,
|
||||
polynomial_size, base_log, level_count, num_samples, num_luts,
|
||||
lwe_idx, max_shared_memory);
|
||||
break;
|
||||
case 4096:
|
||||
@@ -339,7 +339,7 @@ void cuda_bootstrap_amortized_lwe_ciphertext_vector_64(
|
||||
(uint64_t *)lut_vector, (uint64_t *)lut_vector_indexes,
|
||||
(uint64_t *)lwe_array_in, (uint64_t *)lwe_input_indexes,
|
||||
(double2 *)bootstrapping_key, pbs_buffer, glwe_dimension, lwe_dimension,
|
||||
polynomial_size, base_log, level_count, num_samples, num_lut_vectors,
|
||||
polynomial_size, base_log, level_count, num_samples, num_luts,
|
||||
lwe_idx, max_shared_memory);
|
||||
break;
|
||||
case 8192:
|
||||
@@ -348,7 +348,7 @@ void cuda_bootstrap_amortized_lwe_ciphertext_vector_64(
|
||||
(uint64_t *)lut_vector, (uint64_t *)lut_vector_indexes,
|
||||
(uint64_t *)lwe_array_in, (uint64_t *)lwe_input_indexes,
|
||||
(double2 *)bootstrapping_key, pbs_buffer, glwe_dimension, lwe_dimension,
|
||||
polynomial_size, base_log, level_count, num_samples, num_lut_vectors,
|
||||
polynomial_size, base_log, level_count, num_samples, num_luts,
|
||||
lwe_idx, max_shared_memory);
|
||||
break;
|
||||
case 16384:
|
||||
@@ -357,7 +357,7 @@ void cuda_bootstrap_amortized_lwe_ciphertext_vector_64(
|
||||
(uint64_t *)lut_vector, (uint64_t *)lut_vector_indexes,
|
||||
(uint64_t *)lwe_array_in, (uint64_t *)lwe_input_indexes,
|
||||
(double2 *)bootstrapping_key, pbs_buffer, glwe_dimension, lwe_dimension,
|
||||
polynomial_size, base_log, level_count, num_samples, num_lut_vectors,
|
||||
polynomial_size, base_log, level_count, num_samples, num_luts,
|
||||
lwe_idx, max_shared_memory);
|
||||
break;
|
||||
default:
|
||||
|
||||
@@ -24,10 +24,10 @@ template <typename Torus, class params, sharedMemDegree SMD>
|
||||
* Uses shared memory to increase performance
|
||||
* - lwe_array_out: output batch of num_samples bootstrapped ciphertexts c =
|
||||
* (a0,..an-1,b) where n is the LWE dimension
|
||||
* - lut_vector: should hold as many test vectors of size polynomial_size
|
||||
* - lut_vector: should hold as many luts of size polynomial_size
|
||||
* as there are input ciphertexts, but actually holds
|
||||
* num_lut_vectors vectors to reduce memory usage
|
||||
* - lut_vector_indexes: stores the index corresponding to which test vector
|
||||
* num_luts vectors to reduce memory usage
|
||||
* - lut_vector_indexes: stores the index corresponding to which lut
|
||||
* to use for each sample in lut_vector
|
||||
* - lwe_array_in: input batch of num_samples LWE ciphertexts, containing n
|
||||
* mask values + 1 body value
|
||||
@@ -37,7 +37,7 @@ template <typename Torus, class params, sharedMemDegree SMD>
|
||||
* == NOSM or PARTIALSM)
|
||||
* - lwe_dimension: size of the Torus vector used to encrypt the input
|
||||
* LWE ciphertexts - referred to as n above (~ 600)
|
||||
* - polynomial_size: size of the test polynomial (test vector) and size of the
|
||||
* - polynomial_size: size of the test polynomial (lut) and size of the
|
||||
* GLWE polynomial (~1024)
|
||||
* - base_log: log base used for the gadget matrix - B = 2^base_log (~8)
|
||||
* - level_count: number of decomposition levels in the gadget matrix (~4)
|
||||
@@ -288,7 +288,7 @@ __host__ void host_bootstrap_amortized(
|
||||
Torus *lwe_input_indexes, double2 *bootstrapping_key, int8_t *pbs_buffer,
|
||||
uint32_t glwe_dimension, uint32_t lwe_dimension, uint32_t polynomial_size,
|
||||
uint32_t base_log, uint32_t level_count,
|
||||
uint32_t input_lwe_ciphertext_count, uint32_t num_lut_vectors,
|
||||
uint32_t input_lwe_ciphertext_count, uint32_t num_luts,
|
||||
uint32_t lwe_idx, uint32_t max_shared_memory) {
|
||||
|
||||
cudaSetDevice(stream->gpu_index);
|
||||
|
||||
@@ -336,7 +336,7 @@ __host__ void host_bootstrap_fast_low_latency(
|
||||
Torus *lwe_input_indexes, double2 *bootstrapping_key, int8_t *pbs_buffer,
|
||||
uint32_t glwe_dimension, uint32_t lwe_dimension, uint32_t polynomial_size,
|
||||
uint32_t base_log, uint32_t level_count,
|
||||
uint32_t input_lwe_ciphertext_count, uint32_t num_lut_vectors,
|
||||
uint32_t input_lwe_ciphertext_count, uint32_t num_luts,
|
||||
uint32_t max_shared_memory) {
|
||||
cudaSetDevice(stream->gpu_index);
|
||||
|
||||
|
||||
@@ -206,7 +206,7 @@ __host__ void host_fast_multi_bit_pbs(
|
||||
Torus *lwe_input_indexes, uint64_t *bootstrapping_key, int8_t *pbs_buffer,
|
||||
uint32_t glwe_dimension, uint32_t lwe_dimension, uint32_t polynomial_size,
|
||||
uint32_t grouping_factor, uint32_t base_log, uint32_t level_count,
|
||||
uint32_t num_samples, uint32_t num_lut_vectors, uint32_t lwe_idx,
|
||||
uint32_t num_samples, uint32_t num_luts, uint32_t lwe_idx,
|
||||
uint32_t max_shared_memory, uint32_t lwe_chunk_size = 0) {
|
||||
cudaSetDevice(stream->gpu_index);
|
||||
|
||||
|
||||
@@ -368,7 +368,7 @@ void cuda_bootstrap_low_latency_lwe_ciphertext_vector_32(
|
||||
void *lwe_input_indexes, void *bootstrapping_key, int8_t *pbs_buffer,
|
||||
uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size,
|
||||
uint32_t base_log, uint32_t level_count, uint32_t num_samples,
|
||||
uint32_t num_lut_vectors, uint32_t lwe_idx, uint32_t max_shared_memory) {
|
||||
uint32_t num_luts, uint32_t lwe_idx, uint32_t max_shared_memory) {
|
||||
|
||||
checks_bootstrap_low_latency(32, glwe_dimension, level_count, base_log,
|
||||
polynomial_size, num_samples);
|
||||
@@ -387,7 +387,7 @@ void cuda_bootstrap_low_latency_lwe_ciphertext_vector_32(
|
||||
static_cast<uint32_t *>(lwe_input_indexes),
|
||||
static_cast<double2 *>(bootstrapping_key), pbs_buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, base_log, level_count, num_samples,
|
||||
num_lut_vectors, max_shared_memory);
|
||||
num_luts, max_shared_memory);
|
||||
else
|
||||
host_bootstrap_low_latency<uint32_t, Degree<256>>(
|
||||
stream, static_cast<uint32_t *>(lwe_array_out),
|
||||
@@ -398,7 +398,7 @@ void cuda_bootstrap_low_latency_lwe_ciphertext_vector_32(
|
||||
static_cast<uint32_t *>(lwe_input_indexes),
|
||||
static_cast<double2 *>(bootstrapping_key), pbs_buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, base_log, level_count, num_samples,
|
||||
num_lut_vectors, max_shared_memory);
|
||||
num_luts, max_shared_memory);
|
||||
break;
|
||||
case 512:
|
||||
if (verify_cuda_bootstrap_fast_low_latency_grid_size<uint32_t,
|
||||
@@ -413,7 +413,7 @@ void cuda_bootstrap_low_latency_lwe_ciphertext_vector_32(
|
||||
static_cast<uint32_t *>(lwe_input_indexes),
|
||||
static_cast<double2 *>(bootstrapping_key), pbs_buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, base_log, level_count, num_samples,
|
||||
num_lut_vectors, max_shared_memory);
|
||||
num_luts, max_shared_memory);
|
||||
else
|
||||
host_bootstrap_low_latency<uint32_t, Degree<512>>(
|
||||
stream, static_cast<uint32_t *>(lwe_array_out),
|
||||
@@ -424,7 +424,7 @@ void cuda_bootstrap_low_latency_lwe_ciphertext_vector_32(
|
||||
static_cast<uint32_t *>(lwe_input_indexes),
|
||||
static_cast<double2 *>(bootstrapping_key), pbs_buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, base_log, level_count, num_samples,
|
||||
num_lut_vectors, max_shared_memory);
|
||||
num_luts, max_shared_memory);
|
||||
break;
|
||||
case 1024:
|
||||
if (verify_cuda_bootstrap_fast_low_latency_grid_size<uint32_t,
|
||||
@@ -439,7 +439,7 @@ void cuda_bootstrap_low_latency_lwe_ciphertext_vector_32(
|
||||
static_cast<uint32_t *>(lwe_input_indexes),
|
||||
static_cast<double2 *>(bootstrapping_key), pbs_buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, base_log, level_count, num_samples,
|
||||
num_lut_vectors, max_shared_memory);
|
||||
num_luts, max_shared_memory);
|
||||
else
|
||||
host_bootstrap_low_latency<uint32_t, Degree<1024>>(
|
||||
stream, static_cast<uint32_t *>(lwe_array_out),
|
||||
@@ -450,7 +450,7 @@ void cuda_bootstrap_low_latency_lwe_ciphertext_vector_32(
|
||||
static_cast<uint32_t *>(lwe_input_indexes),
|
||||
static_cast<double2 *>(bootstrapping_key), pbs_buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, base_log, level_count, num_samples,
|
||||
num_lut_vectors, max_shared_memory);
|
||||
num_luts, max_shared_memory);
|
||||
break;
|
||||
case 2048:
|
||||
if (verify_cuda_bootstrap_fast_low_latency_grid_size<uint32_t,
|
||||
@@ -465,7 +465,7 @@ void cuda_bootstrap_low_latency_lwe_ciphertext_vector_32(
|
||||
static_cast<uint32_t *>(lwe_input_indexes),
|
||||
static_cast<double2 *>(bootstrapping_key), pbs_buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, base_log, level_count, num_samples,
|
||||
num_lut_vectors, max_shared_memory);
|
||||
num_luts, max_shared_memory);
|
||||
else
|
||||
host_bootstrap_low_latency<uint32_t, Degree<2048>>(
|
||||
stream, static_cast<uint32_t *>(lwe_array_out),
|
||||
@@ -476,7 +476,7 @@ void cuda_bootstrap_low_latency_lwe_ciphertext_vector_32(
|
||||
static_cast<uint32_t *>(lwe_input_indexes),
|
||||
static_cast<double2 *>(bootstrapping_key), pbs_buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, base_log, level_count, num_samples,
|
||||
num_lut_vectors, max_shared_memory);
|
||||
num_luts, max_shared_memory);
|
||||
break;
|
||||
case 4096:
|
||||
if (verify_cuda_bootstrap_fast_low_latency_grid_size<uint32_t,
|
||||
@@ -491,7 +491,7 @@ void cuda_bootstrap_low_latency_lwe_ciphertext_vector_32(
|
||||
static_cast<uint32_t *>(lwe_input_indexes),
|
||||
static_cast<double2 *>(bootstrapping_key), pbs_buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, base_log, level_count, num_samples,
|
||||
num_lut_vectors, max_shared_memory);
|
||||
num_luts, max_shared_memory);
|
||||
else
|
||||
host_bootstrap_low_latency<uint32_t, Degree<4096>>(
|
||||
stream, static_cast<uint32_t *>(lwe_array_out),
|
||||
@@ -502,7 +502,7 @@ void cuda_bootstrap_low_latency_lwe_ciphertext_vector_32(
|
||||
static_cast<uint32_t *>(lwe_input_indexes),
|
||||
static_cast<double2 *>(bootstrapping_key), pbs_buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, base_log, level_count, num_samples,
|
||||
num_lut_vectors, max_shared_memory);
|
||||
num_luts, max_shared_memory);
|
||||
break;
|
||||
case 8192:
|
||||
if (verify_cuda_bootstrap_fast_low_latency_grid_size<uint32_t,
|
||||
@@ -517,7 +517,7 @@ void cuda_bootstrap_low_latency_lwe_ciphertext_vector_32(
|
||||
static_cast<uint32_t *>(lwe_input_indexes),
|
||||
static_cast<double2 *>(bootstrapping_key), pbs_buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, base_log, level_count, num_samples,
|
||||
num_lut_vectors, max_shared_memory);
|
||||
num_luts, max_shared_memory);
|
||||
else
|
||||
host_bootstrap_low_latency<uint32_t, Degree<8192>>(
|
||||
stream, static_cast<uint32_t *>(lwe_array_out),
|
||||
@@ -528,7 +528,7 @@ void cuda_bootstrap_low_latency_lwe_ciphertext_vector_32(
|
||||
static_cast<uint32_t *>(lwe_input_indexes),
|
||||
static_cast<double2 *>(bootstrapping_key), pbs_buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, base_log, level_count, num_samples,
|
||||
num_lut_vectors, max_shared_memory);
|
||||
num_luts, max_shared_memory);
|
||||
break;
|
||||
case 16384:
|
||||
if (verify_cuda_bootstrap_fast_low_latency_grid_size<
|
||||
@@ -543,7 +543,7 @@ void cuda_bootstrap_low_latency_lwe_ciphertext_vector_32(
|
||||
static_cast<uint32_t *>(lwe_input_indexes),
|
||||
static_cast<double2 *>(bootstrapping_key), pbs_buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, base_log, level_count, num_samples,
|
||||
num_lut_vectors, max_shared_memory);
|
||||
num_luts, max_shared_memory);
|
||||
else
|
||||
host_bootstrap_low_latency<uint32_t, Degree<16384>>(
|
||||
stream, static_cast<uint32_t *>(lwe_array_out),
|
||||
@@ -554,7 +554,7 @@ void cuda_bootstrap_low_latency_lwe_ciphertext_vector_32(
|
||||
static_cast<uint32_t *>(lwe_input_indexes),
|
||||
static_cast<double2 *>(bootstrapping_key), pbs_buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, base_log, level_count, num_samples,
|
||||
num_lut_vectors, max_shared_memory);
|
||||
num_luts, max_shared_memory);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
@@ -572,11 +572,11 @@ void cuda_bootstrap_low_latency_lwe_ciphertext_vector_32(
|
||||
* - `gpu_index` is the index of the GPU to be used in the kernel launch
|
||||
* - lwe_array_out: output batch of num_samples bootstrapped ciphertexts c =
|
||||
* (a0,..an-1,b) where n is the LWE dimension
|
||||
* - lut_vector: should hold as many test vectors of size polynomial_size
|
||||
* - lut_vector: should hold as many luts of size polynomial_size
|
||||
* as there are input ciphertexts, but actually holds
|
||||
* num_lut_vectors vectors to reduce memory usage
|
||||
* num_luts vectors to reduce memory usage
|
||||
* - lut_vector_indexes: stores the index corresponding to
|
||||
* which test vector to use for each sample in
|
||||
* which lut to use for each sample in
|
||||
* lut_vector
|
||||
* - lwe_array_in: input batch of num_samples LWE ciphertexts, containing n
|
||||
* mask values + 1 body value
|
||||
@@ -588,7 +588,7 @@ void cuda_bootstrap_low_latency_lwe_ciphertext_vector_32(
|
||||
* bsk is thus a tensor of size (k+1)^2.l.N.n
|
||||
* where l is the number of decomposition levels and
|
||||
* k is the GLWE dimension, N is the polynomial size for
|
||||
* GLWE. The polynomial size for GLWE and the test vector
|
||||
* GLWE. The polynomial size for GLWE and the lut
|
||||
* are the same because they have to be in the same ring
|
||||
* to be multiplied.
|
||||
* - lwe_dimension: size of the Torus vector used to encrypt the input
|
||||
@@ -596,12 +596,12 @@ void cuda_bootstrap_low_latency_lwe_ciphertext_vector_32(
|
||||
* - glwe_dimension: size of the polynomial vector used to encrypt the LUT
|
||||
* GLWE ciphertexts - referred to as k above. Only the value 1 is supported for
|
||||
* this parameter.
|
||||
* - polynomial_size: size of the test polynomial (test vector) and size of the
|
||||
* - polynomial_size: size of the test polynomial (lut) and size of the
|
||||
* GLWE polynomial (~1024)
|
||||
* - base_log: log base used for the gadget matrix - B = 2^base_log (~8)
|
||||
* - level_count: number of decomposition levels in the gadget matrix (~4)
|
||||
* - num_samples: number of encrypted input messages
|
||||
* - num_lut_vectors: parameter to set the actual number of test vectors to be
|
||||
* - num_luts: parameter to set the actual number of luts to be
|
||||
* used
|
||||
* - lwe_idx: the index of the LWE input to consider for the GPU of index
|
||||
* gpu_index. In case of multi-GPU computing, it is assumed that only a part of
|
||||
@@ -643,7 +643,7 @@ void cuda_bootstrap_low_latency_lwe_ciphertext_vector_64(
|
||||
void *lwe_input_indexes, void *bootstrapping_key, int8_t *pbs_buffer,
|
||||
uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size,
|
||||
uint32_t base_log, uint32_t level_count, uint32_t num_samples,
|
||||
uint32_t num_lut_vectors, uint32_t lwe_idx, uint32_t max_shared_memory) {
|
||||
uint32_t num_luts, uint32_t lwe_idx, uint32_t max_shared_memory) {
|
||||
checks_bootstrap_low_latency(64, glwe_dimension, level_count, base_log,
|
||||
polynomial_size, num_samples);
|
||||
|
||||
@@ -661,7 +661,7 @@ void cuda_bootstrap_low_latency_lwe_ciphertext_vector_64(
|
||||
static_cast<uint64_t *>(lwe_input_indexes),
|
||||
static_cast<double2 *>(bootstrapping_key), pbs_buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, base_log, level_count, num_samples,
|
||||
num_lut_vectors, max_shared_memory);
|
||||
num_luts, max_shared_memory);
|
||||
else
|
||||
host_bootstrap_low_latency<uint64_t, Degree<256>>(
|
||||
stream, static_cast<uint64_t *>(lwe_array_out),
|
||||
@@ -672,7 +672,7 @@ void cuda_bootstrap_low_latency_lwe_ciphertext_vector_64(
|
||||
static_cast<uint64_t *>(lwe_input_indexes),
|
||||
static_cast<double2 *>(bootstrapping_key), pbs_buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, base_log, level_count, num_samples,
|
||||
num_lut_vectors, max_shared_memory);
|
||||
num_luts, max_shared_memory);
|
||||
break;
|
||||
case 512:
|
||||
if (verify_cuda_bootstrap_fast_low_latency_grid_size<uint64_t,
|
||||
@@ -687,7 +687,7 @@ void cuda_bootstrap_low_latency_lwe_ciphertext_vector_64(
|
||||
static_cast<uint64_t *>(lwe_input_indexes),
|
||||
static_cast<double2 *>(bootstrapping_key), pbs_buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, base_log, level_count, num_samples,
|
||||
num_lut_vectors, max_shared_memory);
|
||||
num_luts, max_shared_memory);
|
||||
else
|
||||
host_bootstrap_low_latency<uint64_t, Degree<512>>(
|
||||
stream, static_cast<uint64_t *>(lwe_array_out),
|
||||
@@ -698,7 +698,7 @@ void cuda_bootstrap_low_latency_lwe_ciphertext_vector_64(
|
||||
static_cast<uint64_t *>(lwe_input_indexes),
|
||||
static_cast<double2 *>(bootstrapping_key), pbs_buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, base_log, level_count, num_samples,
|
||||
num_lut_vectors, max_shared_memory);
|
||||
num_luts, max_shared_memory);
|
||||
break;
|
||||
case 1024:
|
||||
if (verify_cuda_bootstrap_fast_low_latency_grid_size<uint32_t,
|
||||
@@ -713,7 +713,7 @@ void cuda_bootstrap_low_latency_lwe_ciphertext_vector_64(
|
||||
static_cast<uint64_t *>(lwe_input_indexes),
|
||||
static_cast<double2 *>(bootstrapping_key), pbs_buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, base_log, level_count, num_samples,
|
||||
num_lut_vectors, max_shared_memory);
|
||||
num_luts, max_shared_memory);
|
||||
else
|
||||
host_bootstrap_low_latency<uint64_t, Degree<1024>>(
|
||||
stream, static_cast<uint64_t *>(lwe_array_out),
|
||||
@@ -724,7 +724,7 @@ void cuda_bootstrap_low_latency_lwe_ciphertext_vector_64(
|
||||
static_cast<uint64_t *>(lwe_input_indexes),
|
||||
static_cast<double2 *>(bootstrapping_key), pbs_buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, base_log, level_count, num_samples,
|
||||
num_lut_vectors, max_shared_memory);
|
||||
num_luts, max_shared_memory);
|
||||
break;
|
||||
case 2048:
|
||||
if (verify_cuda_bootstrap_fast_low_latency_grid_size<uint32_t,
|
||||
@@ -739,7 +739,7 @@ void cuda_bootstrap_low_latency_lwe_ciphertext_vector_64(
|
||||
static_cast<uint64_t *>(lwe_input_indexes),
|
||||
static_cast<double2 *>(bootstrapping_key), pbs_buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, base_log, level_count, num_samples,
|
||||
num_lut_vectors, max_shared_memory);
|
||||
num_luts, max_shared_memory);
|
||||
else
|
||||
host_bootstrap_low_latency<uint64_t, Degree<2048>>(
|
||||
stream, static_cast<uint64_t *>(lwe_array_out),
|
||||
@@ -750,7 +750,7 @@ void cuda_bootstrap_low_latency_lwe_ciphertext_vector_64(
|
||||
static_cast<uint64_t *>(lwe_input_indexes),
|
||||
static_cast<double2 *>(bootstrapping_key), pbs_buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, base_log, level_count, num_samples,
|
||||
num_lut_vectors, max_shared_memory);
|
||||
num_luts, max_shared_memory);
|
||||
break;
|
||||
case 4096:
|
||||
if (verify_cuda_bootstrap_fast_low_latency_grid_size<uint32_t,
|
||||
@@ -765,7 +765,7 @@ void cuda_bootstrap_low_latency_lwe_ciphertext_vector_64(
|
||||
static_cast<uint64_t *>(lwe_input_indexes),
|
||||
static_cast<double2 *>(bootstrapping_key), pbs_buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, base_log, level_count, num_samples,
|
||||
num_lut_vectors, max_shared_memory);
|
||||
num_luts, max_shared_memory);
|
||||
else
|
||||
host_bootstrap_low_latency<uint64_t, Degree<4096>>(
|
||||
stream, static_cast<uint64_t *>(lwe_array_out),
|
||||
@@ -776,7 +776,7 @@ void cuda_bootstrap_low_latency_lwe_ciphertext_vector_64(
|
||||
static_cast<uint64_t *>(lwe_input_indexes),
|
||||
static_cast<double2 *>(bootstrapping_key), pbs_buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, base_log, level_count, num_samples,
|
||||
num_lut_vectors, max_shared_memory);
|
||||
num_luts, max_shared_memory);
|
||||
break;
|
||||
case 8192:
|
||||
if (verify_cuda_bootstrap_fast_low_latency_grid_size<uint32_t,
|
||||
@@ -791,7 +791,7 @@ void cuda_bootstrap_low_latency_lwe_ciphertext_vector_64(
|
||||
static_cast<uint64_t *>(lwe_input_indexes),
|
||||
static_cast<double2 *>(bootstrapping_key), pbs_buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, base_log, level_count, num_samples,
|
||||
num_lut_vectors, max_shared_memory);
|
||||
num_luts, max_shared_memory);
|
||||
else
|
||||
host_bootstrap_low_latency<uint64_t, Degree<8192>>(
|
||||
stream, static_cast<uint64_t *>(lwe_array_out),
|
||||
@@ -802,7 +802,7 @@ void cuda_bootstrap_low_latency_lwe_ciphertext_vector_64(
|
||||
static_cast<uint64_t *>(lwe_input_indexes),
|
||||
static_cast<double2 *>(bootstrapping_key), pbs_buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, base_log, level_count, num_samples,
|
||||
num_lut_vectors, max_shared_memory);
|
||||
num_luts, max_shared_memory);
|
||||
break;
|
||||
case 16384:
|
||||
if (verify_cuda_bootstrap_fast_low_latency_grid_size<
|
||||
@@ -817,7 +817,7 @@ void cuda_bootstrap_low_latency_lwe_ciphertext_vector_64(
|
||||
static_cast<uint64_t *>(lwe_input_indexes),
|
||||
static_cast<double2 *>(bootstrapping_key), pbs_buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, base_log, level_count, num_samples,
|
||||
num_lut_vectors, max_shared_memory);
|
||||
num_luts, max_shared_memory);
|
||||
else
|
||||
host_bootstrap_low_latency<uint64_t, Degree<16384>>(
|
||||
stream, static_cast<uint64_t *>(lwe_array_out),
|
||||
@@ -828,7 +828,7 @@ void cuda_bootstrap_low_latency_lwe_ciphertext_vector_64(
|
||||
static_cast<uint64_t *>(lwe_input_indexes),
|
||||
static_cast<double2 *>(bootstrapping_key), pbs_buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, base_log, level_count, num_samples,
|
||||
num_lut_vectors, max_shared_memory);
|
||||
num_luts, max_shared_memory);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -435,7 +435,7 @@ __host__ void host_bootstrap_low_latency(
|
||||
Torus *lwe_input_indexes, double2 *bootstrapping_key, int8_t *pbs_buffer,
|
||||
uint32_t glwe_dimension, uint32_t lwe_dimension, uint32_t polynomial_size,
|
||||
uint32_t base_log, uint32_t level_count,
|
||||
uint32_t input_lwe_ciphertext_count, uint32_t num_lut_vectors,
|
||||
uint32_t input_lwe_ciphertext_count, uint32_t num_luts,
|
||||
uint32_t max_shared_memory) {
|
||||
cudaSetDevice(stream->gpu_index);
|
||||
|
||||
|
||||
@@ -19,7 +19,7 @@ void cuda_multi_bit_pbs_lwe_ciphertext_vector_64(
|
||||
void *lwe_input_indexes, void *bootstrapping_key, int8_t *pbs_buffer,
|
||||
uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size,
|
||||
uint32_t grouping_factor, uint32_t base_log, uint32_t level_count,
|
||||
uint32_t num_samples, uint32_t num_lut_vectors, uint32_t lwe_idx,
|
||||
uint32_t num_samples, uint32_t num_luts, uint32_t lwe_idx,
|
||||
uint32_t max_shared_memory, uint32_t lwe_chunk_size) {
|
||||
|
||||
checks_multi_bit_pbs(polynomial_size);
|
||||
@@ -38,7 +38,7 @@ void cuda_multi_bit_pbs_lwe_ciphertext_vector_64(
|
||||
static_cast<uint64_t *>(lwe_input_indexes),
|
||||
static_cast<uint64_t *>(bootstrapping_key), pbs_buffer,
|
||||
glwe_dimension, lwe_dimension, polynomial_size, grouping_factor,
|
||||
base_log, level_count, num_samples, num_lut_vectors, lwe_idx,
|
||||
base_log, level_count, num_samples, num_luts, lwe_idx,
|
||||
max_shared_memory, lwe_chunk_size);
|
||||
} else {
|
||||
host_multi_bit_pbs<uint64_t, int64_t, AmortizedDegree<256>>(
|
||||
@@ -50,7 +50,7 @@ void cuda_multi_bit_pbs_lwe_ciphertext_vector_64(
|
||||
static_cast<uint64_t *>(lwe_input_indexes),
|
||||
static_cast<uint64_t *>(bootstrapping_key), pbs_buffer,
|
||||
glwe_dimension, lwe_dimension, polynomial_size, grouping_factor,
|
||||
base_log, level_count, num_samples, num_lut_vectors, lwe_idx,
|
||||
base_log, level_count, num_samples, num_luts, lwe_idx,
|
||||
max_shared_memory, lwe_chunk_size);
|
||||
}
|
||||
break;
|
||||
@@ -67,7 +67,7 @@ void cuda_multi_bit_pbs_lwe_ciphertext_vector_64(
|
||||
static_cast<uint64_t *>(lwe_input_indexes),
|
||||
static_cast<uint64_t *>(bootstrapping_key), pbs_buffer,
|
||||
glwe_dimension, lwe_dimension, polynomial_size, grouping_factor,
|
||||
base_log, level_count, num_samples, num_lut_vectors, lwe_idx,
|
||||
base_log, level_count, num_samples, num_luts, lwe_idx,
|
||||
max_shared_memory, lwe_chunk_size);
|
||||
} else {
|
||||
host_multi_bit_pbs<uint64_t, int64_t, AmortizedDegree<512>>(
|
||||
@@ -79,7 +79,7 @@ void cuda_multi_bit_pbs_lwe_ciphertext_vector_64(
|
||||
static_cast<uint64_t *>(lwe_input_indexes),
|
||||
static_cast<uint64_t *>(bootstrapping_key), pbs_buffer,
|
||||
glwe_dimension, lwe_dimension, polynomial_size, grouping_factor,
|
||||
base_log, level_count, num_samples, num_lut_vectors, lwe_idx,
|
||||
base_log, level_count, num_samples, num_luts, lwe_idx,
|
||||
max_shared_memory, lwe_chunk_size);
|
||||
}
|
||||
break;
|
||||
@@ -96,7 +96,7 @@ void cuda_multi_bit_pbs_lwe_ciphertext_vector_64(
|
||||
static_cast<uint64_t *>(lwe_input_indexes),
|
||||
static_cast<uint64_t *>(bootstrapping_key), pbs_buffer,
|
||||
glwe_dimension, lwe_dimension, polynomial_size, grouping_factor,
|
||||
base_log, level_count, num_samples, num_lut_vectors, lwe_idx,
|
||||
base_log, level_count, num_samples, num_luts, lwe_idx,
|
||||
max_shared_memory, lwe_chunk_size);
|
||||
} else {
|
||||
host_multi_bit_pbs<uint64_t, int64_t, AmortizedDegree<1024>>(
|
||||
@@ -108,7 +108,7 @@ void cuda_multi_bit_pbs_lwe_ciphertext_vector_64(
|
||||
static_cast<uint64_t *>(lwe_input_indexes),
|
||||
static_cast<uint64_t *>(bootstrapping_key), pbs_buffer,
|
||||
glwe_dimension, lwe_dimension, polynomial_size, grouping_factor,
|
||||
base_log, level_count, num_samples, num_lut_vectors, lwe_idx,
|
||||
base_log, level_count, num_samples, num_luts, lwe_idx,
|
||||
max_shared_memory, lwe_chunk_size);
|
||||
}
|
||||
break;
|
||||
@@ -125,7 +125,7 @@ void cuda_multi_bit_pbs_lwe_ciphertext_vector_64(
|
||||
static_cast<uint64_t *>(lwe_input_indexes),
|
||||
static_cast<uint64_t *>(bootstrapping_key), pbs_buffer,
|
||||
glwe_dimension, lwe_dimension, polynomial_size, grouping_factor,
|
||||
base_log, level_count, num_samples, num_lut_vectors, lwe_idx,
|
||||
base_log, level_count, num_samples, num_luts, lwe_idx,
|
||||
max_shared_memory, lwe_chunk_size);
|
||||
} else {
|
||||
host_multi_bit_pbs<uint64_t, int64_t, AmortizedDegree<2048>>(
|
||||
@@ -137,7 +137,7 @@ void cuda_multi_bit_pbs_lwe_ciphertext_vector_64(
|
||||
static_cast<uint64_t *>(lwe_input_indexes),
|
||||
static_cast<uint64_t *>(bootstrapping_key), pbs_buffer,
|
||||
glwe_dimension, lwe_dimension, polynomial_size, grouping_factor,
|
||||
base_log, level_count, num_samples, num_lut_vectors, lwe_idx,
|
||||
base_log, level_count, num_samples, num_luts, lwe_idx,
|
||||
max_shared_memory, lwe_chunk_size);
|
||||
}
|
||||
break;
|
||||
@@ -154,7 +154,7 @@ void cuda_multi_bit_pbs_lwe_ciphertext_vector_64(
|
||||
static_cast<uint64_t *>(lwe_input_indexes),
|
||||
static_cast<uint64_t *>(bootstrapping_key), pbs_buffer,
|
||||
glwe_dimension, lwe_dimension, polynomial_size, grouping_factor,
|
||||
base_log, level_count, num_samples, num_lut_vectors, lwe_idx,
|
||||
base_log, level_count, num_samples, num_luts, lwe_idx,
|
||||
max_shared_memory, lwe_chunk_size);
|
||||
} else {
|
||||
host_multi_bit_pbs<uint64_t, int64_t, AmortizedDegree<4096>>(
|
||||
@@ -166,7 +166,7 @@ void cuda_multi_bit_pbs_lwe_ciphertext_vector_64(
|
||||
static_cast<uint64_t *>(lwe_input_indexes),
|
||||
static_cast<uint64_t *>(bootstrapping_key), pbs_buffer,
|
||||
glwe_dimension, lwe_dimension, polynomial_size, grouping_factor,
|
||||
base_log, level_count, num_samples, num_lut_vectors, lwe_idx,
|
||||
base_log, level_count, num_samples, num_luts, lwe_idx,
|
||||
max_shared_memory, lwe_chunk_size);
|
||||
}
|
||||
break;
|
||||
@@ -183,7 +183,7 @@ void cuda_multi_bit_pbs_lwe_ciphertext_vector_64(
|
||||
static_cast<uint64_t *>(lwe_input_indexes),
|
||||
static_cast<uint64_t *>(bootstrapping_key), pbs_buffer,
|
||||
glwe_dimension, lwe_dimension, polynomial_size, grouping_factor,
|
||||
base_log, level_count, num_samples, num_lut_vectors, lwe_idx,
|
||||
base_log, level_count, num_samples, num_luts, lwe_idx,
|
||||
max_shared_memory, lwe_chunk_size);
|
||||
} else {
|
||||
host_multi_bit_pbs<uint64_t, int64_t, AmortizedDegree<8192>>(
|
||||
@@ -195,7 +195,7 @@ void cuda_multi_bit_pbs_lwe_ciphertext_vector_64(
|
||||
static_cast<uint64_t *>(lwe_input_indexes),
|
||||
static_cast<uint64_t *>(bootstrapping_key), pbs_buffer,
|
||||
glwe_dimension, lwe_dimension, polynomial_size, grouping_factor,
|
||||
base_log, level_count, num_samples, num_lut_vectors, lwe_idx,
|
||||
base_log, level_count, num_samples, num_luts, lwe_idx,
|
||||
max_shared_memory, lwe_chunk_size);
|
||||
}
|
||||
break;
|
||||
@@ -212,7 +212,7 @@ void cuda_multi_bit_pbs_lwe_ciphertext_vector_64(
|
||||
static_cast<uint64_t *>(lwe_input_indexes),
|
||||
static_cast<uint64_t *>(bootstrapping_key), pbs_buffer,
|
||||
glwe_dimension, lwe_dimension, polynomial_size, grouping_factor,
|
||||
base_log, level_count, num_samples, num_lut_vectors, lwe_idx,
|
||||
base_log, level_count, num_samples, num_luts, lwe_idx,
|
||||
max_shared_memory, lwe_chunk_size);
|
||||
} else {
|
||||
host_multi_bit_pbs<uint64_t, int64_t, AmortizedDegree<16384>>(
|
||||
@@ -224,7 +224,7 @@ void cuda_multi_bit_pbs_lwe_ciphertext_vector_64(
|
||||
static_cast<uint64_t *>(lwe_input_indexes),
|
||||
static_cast<uint64_t *>(bootstrapping_key), pbs_buffer,
|
||||
glwe_dimension, lwe_dimension, polynomial_size, grouping_factor,
|
||||
base_log, level_count, num_samples, num_lut_vectors, lwe_idx,
|
||||
base_log, level_count, num_samples, num_luts, lwe_idx,
|
||||
max_shared_memory, lwe_chunk_size);
|
||||
}
|
||||
break;
|
||||
|
||||
@@ -395,7 +395,7 @@ __host__ void host_multi_bit_pbs(
|
||||
Torus *lwe_input_indexes, uint64_t *bootstrapping_key, int8_t *pbs_buffer,
|
||||
uint32_t glwe_dimension, uint32_t lwe_dimension, uint32_t polynomial_size,
|
||||
uint32_t grouping_factor, uint32_t base_log, uint32_t level_count,
|
||||
uint32_t num_samples, uint32_t num_lut_vectors, uint32_t lwe_idx,
|
||||
uint32_t num_samples, uint32_t num_luts, uint32_t lwe_idx,
|
||||
uint32_t max_shared_memory, uint32_t lwe_chunk_size = 0) {
|
||||
cudaSetDevice(stream->gpu_index);
|
||||
|
||||
|
||||
@@ -61,7 +61,7 @@ extern "C" {
|
||||
pub fn cuda_drop_async(ptr: *mut c_void, v_stream: *const c_void) -> i32;
|
||||
|
||||
/// Free memory for pointer `ptr` on GPU `gpu_index` synchronously
|
||||
pub fn cuda_drop(ptr: *mut c_void) -> i32;
|
||||
pub fn cuda_drop(ptr: *mut c_void, gpu_index: u32) -> i32;
|
||||
|
||||
/// Get the maximum amount of shared memory on GPU `gpu_index`
|
||||
pub fn cuda_get_max_shared_memory(gpu_index: u32) -> i32;
|
||||
|
||||
@@ -138,6 +138,11 @@ workflow = "pbs_benchmark.yml"
|
||||
profile = "bench"
|
||||
check_run_name = "PBS CPU AWS Benchmarks"
|
||||
|
||||
[command.pbs_gpu_bench]
|
||||
workflow = "pbs_gpu_benchmark.yml"
|
||||
profile = "gpu-bench"
|
||||
check_run_name = "PBS GPU AWS Benchmarks"
|
||||
|
||||
[command.wasm_client_bench]
|
||||
workflow = "wasm_client_benchmark.yml"
|
||||
profile = "cpu-small"
|
||||
|
||||
@@ -7,10 +7,14 @@ function usage() {
|
||||
echo
|
||||
echo "--help Print this message"
|
||||
echo "--build-only Pass to only build the tests without running them"
|
||||
echo "--gpu Enable GPU support"
|
||||
echo "--forward-compat Indicate if we have forward compatibility enabled"
|
||||
echo
|
||||
}
|
||||
|
||||
BUILD_ONLY=0
|
||||
WITH_FEATURE_GPU="OFF"
|
||||
WITH_FORWARD_COMPAT="OFF"
|
||||
|
||||
while [ -n "$1" ]
|
||||
do
|
||||
@@ -24,6 +28,15 @@ do
|
||||
BUILD_ONLY=1
|
||||
;;
|
||||
|
||||
"--gpu" )
|
||||
WITH_FEATURE_GPU="ON"
|
||||
;;
|
||||
|
||||
"--forward-compat" )
|
||||
shift
|
||||
WITH_FORWARD_COMPAT="$1"
|
||||
;;
|
||||
|
||||
*)
|
||||
echo "Unknown param : $1"
|
||||
exit 1
|
||||
@@ -40,7 +53,9 @@ mkdir -p "${TFHE_BUILD_DIR}"
|
||||
|
||||
cd "${TFHE_BUILD_DIR}"
|
||||
|
||||
cmake .. -DCMAKE_BUILD_TYPE=RELEASE -DCARGO_PROFILE="${CARGO_PROFILE}"
|
||||
cmake .. -DCMAKE_BUILD_TYPE=RELEASE -DCARGO_PROFILE="${CARGO_PROFILE}" \
|
||||
-DWITH_FEATURE_GPU="${WITH_FEATURE_GPU}" \
|
||||
-DWITH_FORWARD_COMPATIBILITY="${WITH_FORWARD_COMPAT}"
|
||||
|
||||
make -j
|
||||
|
||||
@@ -55,5 +70,8 @@ if [[ $(uname) == "Darwin" ]]; then
|
||||
nproc_bin="sysctl -n hw.logicalcpu"
|
||||
fi
|
||||
|
||||
# Let's go parallel
|
||||
ARGS="-j$(${nproc_bin})" make test
|
||||
if [ "${WITH_FEATURE_GPU}" == "ON" ]; then
|
||||
ctest --output-on-failure --test-dir "." --parallel "$(${nproc_bin})" --tests-regex ".*cuda.*"
|
||||
else
|
||||
ctest --output-on-failure --test-dir "." --parallel "$(${nproc_bin})" --exclude-regex ".*cuda.*"
|
||||
fi
|
||||
|
||||
@@ -12,6 +12,7 @@ function usage() {
|
||||
echo "--signed-only Run only signed integer tests, by default both signed and unsigned tests are run"
|
||||
echo "--cargo-profile The cargo profile used to build tests"
|
||||
echo "--avx512-support Set to ON to enable avx512"
|
||||
echo "--tfhe-package The package spec like tfhe@0.4.2, default=tfhe"
|
||||
echo
|
||||
}
|
||||
|
||||
@@ -23,6 +24,7 @@ signed=""
|
||||
not_signed=""
|
||||
cargo_profile="release"
|
||||
avx512_feature=""
|
||||
tfhe_package="tfhe"
|
||||
|
||||
while [ -n "$1" ]
|
||||
do
|
||||
@@ -64,6 +66,11 @@ do
|
||||
fi
|
||||
;;
|
||||
|
||||
"--tfhe-package" )
|
||||
shift
|
||||
tfhe_package="$1"
|
||||
;;
|
||||
|
||||
*)
|
||||
echo "Unknown param : $1"
|
||||
exit 1
|
||||
@@ -153,7 +160,7 @@ fi
|
||||
cargo "${RUST_TOOLCHAIN}" nextest run \
|
||||
--tests \
|
||||
--cargo-profile "${cargo_profile}" \
|
||||
--package tfhe \
|
||||
--package "${tfhe_package}" \
|
||||
--profile ci \
|
||||
--features="${ARCH_FEATURE}",integer,internal-keycache,"${avx512_feature}" \
|
||||
--test-threads "${test_threads}" \
|
||||
@@ -162,7 +169,7 @@ cargo "${RUST_TOOLCHAIN}" nextest run \
|
||||
if [[ "${multi_bit}" == "" ]]; then
|
||||
cargo "${RUST_TOOLCHAIN}" test \
|
||||
--profile "${cargo_profile}" \
|
||||
--package tfhe \
|
||||
--package "${tfhe_package}" \
|
||||
--features="${ARCH_FEATURE}",integer,internal-keycache,"${avx512_feature}" \
|
||||
--doc \
|
||||
-- --test-threads="${doctest_threads}" integer::
|
||||
|
||||
@@ -9,12 +9,14 @@ function usage() {
|
||||
echo "--rust-toolchain The toolchain to run the tests with default: stable"
|
||||
echo "--multi-bit Run multi-bit tests only: default off"
|
||||
echo "--cargo-profile The cargo profile used to build tests"
|
||||
echo "--tfhe-package The package spec like tfhe@0.4.2, default=tfhe"
|
||||
echo
|
||||
}
|
||||
|
||||
RUST_TOOLCHAIN="+stable"
|
||||
multi_bit=""
|
||||
cargo_profile="release"
|
||||
tfhe_package="tfhe"
|
||||
|
||||
while [ -n "$1" ]
|
||||
do
|
||||
@@ -38,6 +40,11 @@ do
|
||||
cargo_profile="$1"
|
||||
;;
|
||||
|
||||
"--tfhe-package" )
|
||||
shift
|
||||
tfhe_package="$1"
|
||||
;;
|
||||
|
||||
*)
|
||||
echo "Unknown param : $1"
|
||||
exit 1
|
||||
@@ -111,7 +118,7 @@ and not test(~smart_add_and_mul)""" # This test is too slow
|
||||
cargo "${RUST_TOOLCHAIN}" nextest run \
|
||||
--tests \
|
||||
--cargo-profile "${cargo_profile}" \
|
||||
--package tfhe \
|
||||
--package "${tfhe_package}" \
|
||||
--profile ci \
|
||||
--features="${ARCH_FEATURE}",shortint,internal-keycache \
|
||||
--test-threads "${n_threads_small}" \
|
||||
@@ -128,7 +135,7 @@ and not test(~smart_add_and_mul)"""
|
||||
cargo "${RUST_TOOLCHAIN}" nextest run \
|
||||
--tests \
|
||||
--cargo-profile "${cargo_profile}" \
|
||||
--package tfhe \
|
||||
--package "${tfhe_package}" \
|
||||
--profile ci \
|
||||
--features="${ARCH_FEATURE}",shortint,internal-keycache \
|
||||
--test-threads "${n_threads_big}" \
|
||||
@@ -137,7 +144,7 @@ and not test(~smart_add_and_mul)"""
|
||||
if [[ "${multi_bit}" == "" ]]; then
|
||||
cargo "${RUST_TOOLCHAIN}" test \
|
||||
--profile "${cargo_profile}" \
|
||||
--package tfhe \
|
||||
--package "${tfhe_package}" \
|
||||
--features="${ARCH_FEATURE}",shortint,internal-keycache \
|
||||
--doc \
|
||||
-- shortint::
|
||||
@@ -177,7 +184,7 @@ and not test(~smart_add_and_mul)""" # This test is too slow
|
||||
cargo "${RUST_TOOLCHAIN}" nextest run \
|
||||
--tests \
|
||||
--cargo-profile "${cargo_profile}" \
|
||||
--package tfhe \
|
||||
--package "${tfhe_package}" \
|
||||
--profile ci \
|
||||
--features="${ARCH_FEATURE}",shortint,internal-keycache \
|
||||
--test-threads "$(${nproc_bin})" \
|
||||
@@ -186,7 +193,7 @@ and not test(~smart_add_and_mul)""" # This test is too slow
|
||||
if [[ "${multi_bit}" == "" ]]; then
|
||||
cargo "${RUST_TOOLCHAIN}" test \
|
||||
--profile "${cargo_profile}" \
|
||||
--package tfhe \
|
||||
--package "${tfhe_package}" \
|
||||
--features="${ARCH_FEATURE}",shortint,internal-keycache \
|
||||
--doc \
|
||||
-- --test-threads="$(${nproc_bin})" shortint::
|
||||
|
||||
@@ -196,7 +196,7 @@ fn find_contiguous_doc_test<'a>(
|
||||
|
||||
fn find_contiguous_part_in_doc_test_or_comment(
|
||||
part_is_code_block: bool,
|
||||
full_doc_comment_content: &Vec<CommentContent>,
|
||||
full_doc_comment_content: &[CommentContent],
|
||||
part_start_idx: usize,
|
||||
) -> (usize, usize) {
|
||||
let mut next_line_idx = part_start_idx + 1;
|
||||
@@ -348,7 +348,7 @@ fn process_doc_lines_until_impossible<'a>(
|
||||
}
|
||||
|
||||
fn process_non_doc_lines_until_impossible(
|
||||
lines: &Vec<&str>,
|
||||
lines: &[&str],
|
||||
rewritten_content: &mut String,
|
||||
mut line_idx: usize,
|
||||
) -> usize {
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "tfhe"
|
||||
version = "0.5.0"
|
||||
version = "0.5.5"
|
||||
edition = "2021"
|
||||
readme = "../README.md"
|
||||
keywords = ["fully", "homomorphic", "encryption", "fhe", "cryptography"]
|
||||
@@ -46,7 +46,7 @@ hex = "0.4.3"
|
||||
# End regex-engine deps
|
||||
|
||||
[build-dependencies]
|
||||
cbindgen = { version = "0.26.0", optional = true }
|
||||
cbindgen = { package = "tfhe-c-api-bindgen", version = "0.26.1", optional = true }
|
||||
|
||||
[dependencies]
|
||||
concrete-csprng = { version = "0.4.0", path = "../concrete-csprng", features = [
|
||||
@@ -60,8 +60,8 @@ lazy_static = { version = "1.4.0", optional = true }
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
rayon = { version = "1.5.0" }
|
||||
bincode = "1.3.3"
|
||||
concrete-fft = { version = "0.3.0", features = ["serde", "fft128"] }
|
||||
pulp = "0.13"
|
||||
concrete-fft = { version = "0.4.0", features = ["serde", "fft128"] }
|
||||
pulp = "0.18.8"
|
||||
tfhe-cuda-backend = { version = "0.1.2", path = "../backends/tfhe-cuda-backend", optional = true }
|
||||
aligned-vec = { version = "0.5", features = ["serde"] }
|
||||
dyn-stack = { version = "0.9" }
|
||||
@@ -69,6 +69,7 @@ paste = "1.0.7"
|
||||
fs2 = { version = "0.4.3", optional = true }
|
||||
# While we wait for repeat_n in rust standard library
|
||||
itertools = "0.11.0"
|
||||
next_tfhe = { package = "tfhe", version = "0.6.1", optional = true }
|
||||
|
||||
# wasm deps
|
||||
wasm-bindgen = { version = "0.2.86", features = [
|
||||
@@ -79,21 +80,33 @@ js-sys = { version = "0.3", optional = true }
|
||||
console_error_panic_hook = { version = "0.1.7", optional = true }
|
||||
serde-wasm-bindgen = { version = "0.6.0", optional = true }
|
||||
getrandom = { version = "0.2.8", optional = true }
|
||||
bytemuck = "1.13.1"
|
||||
bytemuck = "1.14.3"
|
||||
|
||||
[features]
|
||||
boolean = []
|
||||
shortint = []
|
||||
integer = ["shortint"]
|
||||
internal-keycache = ["dep:lazy_static", "dep:fs2"]
|
||||
forward_compatibility = ["dep:tfhe-c-api-dynamic-buffer", "dep:next_tfhe"]
|
||||
|
||||
boolean = ["next_tfhe?/boolean"]
|
||||
shortint = ["next_tfhe?/shortint"]
|
||||
integer = ["shortint", "next_tfhe?/integer"]
|
||||
internal-keycache = [
|
||||
"dep:lazy_static",
|
||||
"dep:fs2",
|
||||
"next_tfhe?/internal-keycache",
|
||||
]
|
||||
gpu = ["tfhe-cuda-backend"]
|
||||
|
||||
# Experimental section
|
||||
experimental = []
|
||||
experimental-force_fft_algo_dif4 = []
|
||||
experimental-force_fft_algo_dif4 = [
|
||||
"next_tfhe?/experimental-force_fft_algo_dif4",
|
||||
]
|
||||
# End experimental section
|
||||
|
||||
__c_api = ["dep:cbindgen", "dep:tfhe-c-api-dynamic-buffer"]
|
||||
__c_api = [
|
||||
"dep:cbindgen",
|
||||
"dep:tfhe-c-api-dynamic-buffer",
|
||||
"next_tfhe?/__force_skip_cbindgen",
|
||||
]
|
||||
# For the semver trick to skip the build.rs
|
||||
__force_skip_cbindgen = []
|
||||
boolean-c-api = ["boolean", "__c_api"]
|
||||
@@ -114,32 +127,45 @@ integer-client-js-wasm-api = ["integer", "__wasm_api"]
|
||||
high-level-client-js-wasm-api = ["boolean", "shortint", "integer", "__wasm_api"]
|
||||
parallel-wasm-api = ["dep:wasm-bindgen-rayon"]
|
||||
|
||||
nightly-avx512 = ["concrete-fft/nightly", "pulp/nightly"]
|
||||
nightly-avx512 = [
|
||||
"concrete-fft/nightly",
|
||||
"pulp/nightly",
|
||||
"next_tfhe?/nightly-avx512",
|
||||
]
|
||||
|
||||
# Enable the x86_64 specific accelerated implementation of the random generator for the default
|
||||
# backend
|
||||
generator_x86_64_aesni = ["concrete-csprng/generator_x86_64_aesni"]
|
||||
generator_x86_64_aesni = [
|
||||
"concrete-csprng/generator_x86_64_aesni",
|
||||
"next_tfhe?/generator_x86_64_aesni",
|
||||
]
|
||||
|
||||
# Enable the aarch64 specific accelerated implementation of the random generator for the default
|
||||
# backend
|
||||
generator_aarch64_aes = ["concrete-csprng/generator_aarch64_aes"]
|
||||
generator_aarch64_aes = [
|
||||
"concrete-csprng/generator_aarch64_aes",
|
||||
"next_tfhe?/generator_aarch64_aes",
|
||||
]
|
||||
|
||||
# Private features
|
||||
__profiling = []
|
||||
__coverage = []
|
||||
|
||||
seeder_unix = ["concrete-csprng/seeder_unix"]
|
||||
seeder_x86_64_rdseed = ["concrete-csprng/seeder_x86_64_rdseed"]
|
||||
seeder_unix = ["concrete-csprng/seeder_unix", "next_tfhe?/seeder_unix"]
|
||||
seeder_x86_64_rdseed = [
|
||||
"concrete-csprng/seeder_x86_64_rdseed",
|
||||
"next_tfhe?/seeder_x86_64_rdseed",
|
||||
]
|
||||
|
||||
# These target_arch features enable a set of public features for tfhe if users want a known
|
||||
# good/working configuration for tfhe.
|
||||
# For a target_arch that does not yet have such a feature, one can still enable features manually or
|
||||
# create a feature for said target_arch to make its use simpler.
|
||||
x86_64 = ["generator_x86_64_aesni", "seeder_x86_64_rdseed"]
|
||||
x86_64-unix = ["x86_64", "seeder_unix"]
|
||||
x86_64 = ["generator_x86_64_aesni", "seeder_x86_64_rdseed", "next_tfhe?/x86_64"]
|
||||
x86_64-unix = ["x86_64", "seeder_unix", "next_tfhe?/x86_64-unix"]
|
||||
|
||||
aarch64 = ["generator_aarch64_aes"]
|
||||
aarch64-unix = ["aarch64", "seeder_unix"]
|
||||
aarch64 = ["generator_aarch64_aes", "next_tfhe?/aarch64"]
|
||||
aarch64-unix = ["aarch64", "seeder_unix", "next_tfhe?/aarch64-unix"]
|
||||
|
||||
[package.metadata.docs.rs]
|
||||
# TODO: manage builds for docs.rs based on their documentation https://docs.rs/about
|
||||
|
||||
@@ -8,6 +8,7 @@ use serde::Serialize;
|
||||
use tfhe::boolean::parameters::{
|
||||
BooleanParameters, DEFAULT_PARAMETERS, PARAMETERS_ERROR_PROB_2_POW_MINUS_165,
|
||||
};
|
||||
|
||||
use tfhe::core_crypto::prelude::*;
|
||||
use tfhe::keycache::NamedParam;
|
||||
use tfhe::shortint::parameters::*;
|
||||
@@ -43,29 +44,6 @@ const BOOLEAN_BENCH_PARAMS: [(&str, BooleanParameters); 2] = [
|
||||
),
|
||||
];
|
||||
|
||||
criterion_group!(
|
||||
name = pbs_group;
|
||||
config = Criterion::default().sample_size(2000);
|
||||
targets = mem_optimized_pbs::<u64>, mem_optimized_pbs::<u32>
|
||||
);
|
||||
|
||||
criterion_group!(
|
||||
name = multi_bit_pbs_group;
|
||||
config = Criterion::default().sample_size(2000);
|
||||
targets = multi_bit_pbs::<u64>,
|
||||
multi_bit_pbs::<u32>,
|
||||
multi_bit_deterministic_pbs::<u64>,
|
||||
multi_bit_deterministic_pbs::<u32>,
|
||||
);
|
||||
|
||||
criterion_group!(
|
||||
name = pbs_throughput_group;
|
||||
config = Criterion::default().sample_size(100);
|
||||
targets = pbs_throughput::<u64>, pbs_throughput::<u32>
|
||||
);
|
||||
|
||||
criterion_main!(pbs_group, multi_bit_pbs_group, pbs_throughput_group);
|
||||
|
||||
fn benchmark_parameters<Scalar: UnsignedInteger>() -> Vec<(String, CryptoParametersRecord<Scalar>)>
|
||||
{
|
||||
if Scalar::BITS == 64 {
|
||||
@@ -121,25 +99,35 @@ fn throughput_benchmark_parameters<Scalar: UnsignedInteger>(
|
||||
fn multi_bit_benchmark_parameters<Scalar: UnsignedInteger + Default>(
|
||||
) -> Vec<(String, CryptoParametersRecord<Scalar>, LweBskGroupingFactor)> {
|
||||
if Scalar::BITS == 64 {
|
||||
vec![
|
||||
PARAM_MULTI_BIT_MESSAGE_1_CARRY_1_GROUP_2_KS_PBS,
|
||||
PARAM_MULTI_BIT_MESSAGE_2_CARRY_2_GROUP_2_KS_PBS,
|
||||
PARAM_MULTI_BIT_MESSAGE_3_CARRY_3_GROUP_2_KS_PBS,
|
||||
PARAM_MULTI_BIT_MESSAGE_1_CARRY_1_GROUP_3_KS_PBS,
|
||||
PARAM_MULTI_BIT_MESSAGE_2_CARRY_2_GROUP_3_KS_PBS,
|
||||
PARAM_MULTI_BIT_MESSAGE_3_CARRY_3_GROUP_3_KS_PBS,
|
||||
]
|
||||
.iter()
|
||||
.map(|params| {
|
||||
(
|
||||
params.name(),
|
||||
<MultiBitPBSParameters as Into<PBSParameters>>::into(*params)
|
||||
.to_owned()
|
||||
.into(),
|
||||
params.grouping_factor,
|
||||
)
|
||||
})
|
||||
.collect()
|
||||
let parameters = if cfg!(feature = "gpu") {
|
||||
vec![
|
||||
PARAM_MULTI_BIT_MESSAGE_1_CARRY_1_GROUP_3_KS_PBS,
|
||||
PARAM_MULTI_BIT_MESSAGE_2_CARRY_2_GROUP_3_KS_PBS,
|
||||
PARAM_MULTI_BIT_MESSAGE_3_CARRY_3_GROUP_3_KS_PBS,
|
||||
]
|
||||
} else {
|
||||
vec![
|
||||
PARAM_MULTI_BIT_MESSAGE_1_CARRY_1_GROUP_2_KS_PBS,
|
||||
PARAM_MULTI_BIT_MESSAGE_2_CARRY_2_GROUP_2_KS_PBS,
|
||||
PARAM_MULTI_BIT_MESSAGE_3_CARRY_3_GROUP_2_KS_PBS,
|
||||
PARAM_MULTI_BIT_MESSAGE_1_CARRY_1_GROUP_3_KS_PBS,
|
||||
PARAM_MULTI_BIT_MESSAGE_2_CARRY_2_GROUP_3_KS_PBS,
|
||||
PARAM_MULTI_BIT_MESSAGE_3_CARRY_3_GROUP_3_KS_PBS,
|
||||
]
|
||||
};
|
||||
|
||||
parameters
|
||||
.iter()
|
||||
.map(|params| {
|
||||
(
|
||||
params.name(),
|
||||
<MultiBitPBSParameters as Into<PBSParameters>>::into(*params)
|
||||
.to_owned()
|
||||
.into(),
|
||||
params.grouping_factor,
|
||||
)
|
||||
})
|
||||
.collect()
|
||||
} else {
|
||||
// For now there are no parameters available to test multi bit PBS on 32 bits.
|
||||
vec![]
|
||||
@@ -547,3 +535,294 @@ fn pbs_throughput<Scalar: UnsignedTorus + CastInto<usize> + Sync + Send + Serial
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "gpu")]
|
||||
mod cuda {
|
||||
use super::{benchmark_parameters, multi_bit_benchmark_parameters};
|
||||
use crate::utilities::{write_to_json, OperatorType};
|
||||
use criterion::{black_box, criterion_group, Criterion};
|
||||
use serde::Serialize;
|
||||
use tfhe::core_crypto::gpu::glwe_ciphertext_list::CudaGlweCiphertextList;
|
||||
use tfhe::core_crypto::gpu::lwe_bootstrap_key::CudaLweBootstrapKey;
|
||||
use tfhe::core_crypto::gpu::lwe_ciphertext_list::CudaLweCiphertextList;
|
||||
use tfhe::core_crypto::gpu::lwe_multi_bit_bootstrap_key::CudaLweMultiBitBootstrapKey;
|
||||
use tfhe::core_crypto::gpu::{
|
||||
cuda_multi_bit_programmable_bootstrap_lwe_ciphertext,
|
||||
cuda_programmable_bootstrap_lwe_ciphertext, CudaDevice, CudaStream,
|
||||
};
|
||||
use tfhe::core_crypto::prelude::*;
|
||||
|
||||
fn cuda_pbs<Scalar: UnsignedTorus + CastInto<usize> + Serialize>(c: &mut Criterion) {
|
||||
let bench_name = "cuda::pbs";
|
||||
let mut bench_group = c.benchmark_group(bench_name);
|
||||
|
||||
// Create the PRNG
|
||||
let mut seeder = new_seeder();
|
||||
let seeder = seeder.as_mut();
|
||||
let mut encryption_generator =
|
||||
EncryptionRandomGenerator::<ActivatedRandomGenerator>::new(seeder.seed(), seeder);
|
||||
let mut secret_generator =
|
||||
SecretRandomGenerator::<ActivatedRandomGenerator>::new(seeder.seed());
|
||||
|
||||
let gpu_index = 0;
|
||||
let device = CudaDevice::new(gpu_index);
|
||||
let stream = CudaStream::new_unchecked(device);
|
||||
|
||||
for (name, params) in benchmark_parameters::<Scalar>().iter() {
|
||||
// Create the LweSecretKey
|
||||
let input_lwe_secret_key = allocate_and_generate_new_binary_lwe_secret_key(
|
||||
params.lwe_dimension.unwrap(),
|
||||
&mut secret_generator,
|
||||
);
|
||||
let output_glwe_secret_key: GlweSecretKeyOwned<Scalar> =
|
||||
allocate_and_generate_new_binary_glwe_secret_key(
|
||||
params.glwe_dimension.unwrap(),
|
||||
params.polynomial_size.unwrap(),
|
||||
&mut secret_generator,
|
||||
);
|
||||
let output_lwe_secret_key = output_glwe_secret_key.into_lwe_secret_key();
|
||||
|
||||
let bsk = LweBootstrapKey::new(
|
||||
Scalar::ZERO,
|
||||
params.glwe_dimension.unwrap().to_glwe_size(),
|
||||
params.polynomial_size.unwrap(),
|
||||
params.pbs_base_log.unwrap(),
|
||||
params.pbs_level.unwrap(),
|
||||
params.lwe_dimension.unwrap(),
|
||||
tfhe::core_crypto::prelude::CiphertextModulus::new_native(),
|
||||
);
|
||||
let bsk_gpu = CudaLweBootstrapKey::from_lwe_bootstrap_key(&bsk, &stream);
|
||||
|
||||
// Allocate a new LweCiphertext and encrypt our plaintext
|
||||
let lwe_ciphertext_in = allocate_and_encrypt_new_lwe_ciphertext(
|
||||
&input_lwe_secret_key,
|
||||
Plaintext(Scalar::ZERO),
|
||||
params.lwe_modular_std_dev.unwrap(),
|
||||
tfhe::core_crypto::prelude::CiphertextModulus::new_native(),
|
||||
&mut encryption_generator,
|
||||
);
|
||||
let lwe_ciphertext_in_gpu =
|
||||
CudaLweCiphertextList::from_lwe_ciphertext(&lwe_ciphertext_in, &stream);
|
||||
|
||||
let accumulator = GlweCiphertext::new(
|
||||
Scalar::ZERO,
|
||||
params.glwe_dimension.unwrap().to_glwe_size(),
|
||||
params.polynomial_size.unwrap(),
|
||||
tfhe::core_crypto::prelude::CiphertextModulus::new_native(),
|
||||
);
|
||||
let accumulator_gpu =
|
||||
CudaGlweCiphertextList::from_glwe_ciphertext(&accumulator, &stream);
|
||||
|
||||
// Allocate the LweCiphertext to store the result of the PBS
|
||||
let mut out_pbs_ct = LweCiphertext::new(
|
||||
Scalar::ZERO,
|
||||
output_lwe_secret_key.lwe_dimension().to_lwe_size(),
|
||||
tfhe::core_crypto::prelude::CiphertextModulus::new_native(),
|
||||
);
|
||||
let mut out_pbs_ct_gpu =
|
||||
CudaLweCiphertextList::from_lwe_ciphertext(&out_pbs_ct, &stream);
|
||||
let h_indexes = &[Scalar::ZERO];
|
||||
stream.synchronize();
|
||||
let mut d_input_indexes = unsafe { stream.malloc_async::<Scalar>(1u32) };
|
||||
let mut d_output_indexes = unsafe { stream.malloc_async::<Scalar>(1u32) };
|
||||
let mut d_lut_indexes = unsafe { stream.malloc_async::<Scalar>(1u32) };
|
||||
unsafe {
|
||||
stream.copy_to_gpu_async(&mut d_input_indexes, h_indexes.as_ref());
|
||||
stream.copy_to_gpu_async(&mut d_output_indexes, h_indexes.as_ref());
|
||||
stream.copy_to_gpu_async(&mut d_input_indexes, h_indexes.as_ref());
|
||||
}
|
||||
stream.synchronize();
|
||||
|
||||
let id = format!("{bench_name}_{name}");
|
||||
{
|
||||
bench_group.bench_function(&id, |b| {
|
||||
b.iter(|| {
|
||||
cuda_programmable_bootstrap_lwe_ciphertext(
|
||||
&lwe_ciphertext_in_gpu,
|
||||
&mut out_pbs_ct_gpu,
|
||||
&accumulator_gpu,
|
||||
&d_lut_indexes,
|
||||
&d_output_indexes,
|
||||
&d_input_indexes,
|
||||
LweCiphertextCount(1),
|
||||
&bsk_gpu,
|
||||
&stream,
|
||||
);
|
||||
black_box(&mut out_pbs_ct_gpu);
|
||||
})
|
||||
});
|
||||
}
|
||||
|
||||
let bit_size = (params.message_modulus.unwrap_or(2) as u32).ilog2();
|
||||
write_to_json(
|
||||
&id,
|
||||
*params,
|
||||
name,
|
||||
"pbs",
|
||||
&OperatorType::Atomic,
|
||||
bit_size,
|
||||
vec![bit_size],
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
fn cuda_multi_bit_pbs<
|
||||
Scalar: UnsignedTorus + CastInto<usize> + CastFrom<usize> + Default + Serialize + Sync,
|
||||
>(
|
||||
c: &mut Criterion,
|
||||
) {
|
||||
let bench_name = "cuda::pbs";
|
||||
let mut bench_group = c.benchmark_group(bench_name);
|
||||
|
||||
// Create the PRNG
|
||||
let mut seeder = new_seeder();
|
||||
let seeder = seeder.as_mut();
|
||||
let mut encryption_generator =
|
||||
EncryptionRandomGenerator::<ActivatedRandomGenerator>::new(seeder.seed(), seeder);
|
||||
let mut secret_generator =
|
||||
SecretRandomGenerator::<ActivatedRandomGenerator>::new(seeder.seed());
|
||||
|
||||
let gpu_index = 0;
|
||||
let device = CudaDevice::new(gpu_index);
|
||||
let stream = CudaStream::new_unchecked(device);
|
||||
|
||||
for (name, params, grouping_factor) in multi_bit_benchmark_parameters::<Scalar>().iter() {
|
||||
// Create the LweSecretKey
|
||||
let input_lwe_secret_key = allocate_and_generate_new_binary_lwe_secret_key(
|
||||
params.lwe_dimension.unwrap(),
|
||||
&mut secret_generator,
|
||||
);
|
||||
let output_glwe_secret_key: GlweSecretKeyOwned<Scalar> =
|
||||
allocate_and_generate_new_binary_glwe_secret_key(
|
||||
params.glwe_dimension.unwrap(),
|
||||
params.polynomial_size.unwrap(),
|
||||
&mut secret_generator,
|
||||
);
|
||||
let output_lwe_secret_key = output_glwe_secret_key.into_lwe_secret_key();
|
||||
|
||||
let multi_bit_bsk = LweMultiBitBootstrapKey::new(
|
||||
Scalar::ZERO,
|
||||
params.glwe_dimension.unwrap().to_glwe_size(),
|
||||
params.polynomial_size.unwrap(),
|
||||
params.pbs_base_log.unwrap(),
|
||||
params.pbs_level.unwrap(),
|
||||
params.lwe_dimension.unwrap(),
|
||||
*grouping_factor,
|
||||
tfhe::core_crypto::prelude::CiphertextModulus::new_native(),
|
||||
);
|
||||
let multi_bit_bsk_gpu = CudaLweMultiBitBootstrapKey::from_lwe_multi_bit_bootstrap_key(
|
||||
&multi_bit_bsk,
|
||||
&stream,
|
||||
);
|
||||
|
||||
// Allocate a new LweCiphertext and encrypt our plaintext
|
||||
let lwe_ciphertext_in = allocate_and_encrypt_new_lwe_ciphertext(
|
||||
&input_lwe_secret_key,
|
||||
Plaintext(Scalar::ZERO),
|
||||
params.lwe_modular_std_dev.unwrap(),
|
||||
tfhe::core_crypto::prelude::CiphertextModulus::new_native(),
|
||||
&mut encryption_generator,
|
||||
);
|
||||
let lwe_ciphertext_in_gpu =
|
||||
CudaLweCiphertextList::from_lwe_ciphertext(&lwe_ciphertext_in, &stream);
|
||||
|
||||
let accumulator = GlweCiphertext::new(
|
||||
Scalar::ZERO,
|
||||
params.glwe_dimension.unwrap().to_glwe_size(),
|
||||
params.polynomial_size.unwrap(),
|
||||
tfhe::core_crypto::prelude::CiphertextModulus::new_native(),
|
||||
);
|
||||
let accumulator_gpu =
|
||||
CudaGlweCiphertextList::from_glwe_ciphertext(&accumulator, &stream);
|
||||
|
||||
// Allocate the LweCiphertext to store the result of the PBS
|
||||
let mut out_pbs_ct = LweCiphertext::new(
|
||||
Scalar::ZERO,
|
||||
output_lwe_secret_key.lwe_dimension().to_lwe_size(),
|
||||
tfhe::core_crypto::prelude::CiphertextModulus::new_native(),
|
||||
);
|
||||
let mut out_pbs_ct_gpu =
|
||||
CudaLweCiphertextList::from_lwe_ciphertext(&out_pbs_ct, &stream);
|
||||
let h_indexes = &[Scalar::ZERO];
|
||||
stream.synchronize();
|
||||
let mut d_input_indexes = unsafe { stream.malloc_async::<Scalar>(1u32) };
|
||||
let mut d_output_indexes = unsafe { stream.malloc_async::<Scalar>(1u32) };
|
||||
let mut d_lut_indexes = unsafe { stream.malloc_async::<Scalar>(1u32) };
|
||||
unsafe {
|
||||
stream.copy_to_gpu_async(&mut d_input_indexes, h_indexes.as_ref());
|
||||
stream.copy_to_gpu_async(&mut d_output_indexes, h_indexes.as_ref());
|
||||
stream.copy_to_gpu_async(&mut d_input_indexes, h_indexes.as_ref());
|
||||
}
|
||||
stream.synchronize();
|
||||
|
||||
let id = format!("{bench_name}_{name}");
|
||||
bench_group.bench_function(&id, |b| {
|
||||
b.iter(|| {
|
||||
cuda_multi_bit_programmable_bootstrap_lwe_ciphertext(
|
||||
&lwe_ciphertext_in_gpu,
|
||||
&mut out_pbs_ct_gpu,
|
||||
&accumulator_gpu,
|
||||
&d_lut_indexes,
|
||||
&d_output_indexes,
|
||||
&d_input_indexes,
|
||||
&multi_bit_bsk_gpu,
|
||||
&stream,
|
||||
);
|
||||
black_box(&mut out_pbs_ct_gpu);
|
||||
})
|
||||
});
|
||||
|
||||
let bit_size = params.message_modulus.unwrap().ilog2();
|
||||
write_to_json(
|
||||
&id,
|
||||
*params,
|
||||
name,
|
||||
"pbs",
|
||||
&OperatorType::Atomic,
|
||||
bit_size,
|
||||
vec![bit_size],
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
criterion_group!(
|
||||
name = cuda_pbs_group;
|
||||
config = Criterion::default().sample_size(2000);
|
||||
targets = cuda_pbs::<u64>
|
||||
);
|
||||
|
||||
criterion_group!(
|
||||
name = cuda_multi_bit_pbs_group;
|
||||
config = Criterion::default().sample_size(2000);
|
||||
targets = cuda_multi_bit_pbs::<u64>
|
||||
);
|
||||
}
|
||||
|
||||
#[cfg(feature = "gpu")]
|
||||
use cuda::{cuda_multi_bit_pbs_group, cuda_pbs_group};
|
||||
|
||||
criterion_group!(
|
||||
name = pbs_group;
|
||||
config = Criterion::default().sample_size(2000);
|
||||
targets = mem_optimized_pbs::<u64>, mem_optimized_pbs::<u32>
|
||||
);
|
||||
|
||||
criterion_group!(
|
||||
name = multi_bit_pbs_group;
|
||||
config = Criterion::default().sample_size(2000);
|
||||
targets = multi_bit_pbs::<u64>,
|
||||
multi_bit_pbs::<u32>,
|
||||
multi_bit_deterministic_pbs::<u64>,
|
||||
multi_bit_deterministic_pbs::<u32>,
|
||||
);
|
||||
|
||||
criterion_group!(
|
||||
name = pbs_throughput_group;
|
||||
config = Criterion::default().sample_size(100);
|
||||
targets = pbs_throughput::<u64>, pbs_throughput::<u32>
|
||||
);
|
||||
|
||||
#[cfg(not(feature = "gpu"))]
|
||||
criterion_main!(pbs_group, multi_bit_pbs_group, pbs_throughput_group);
|
||||
#[cfg(feature = "gpu")]
|
||||
criterion_main!(cuda_pbs_group, cuda_multi_bit_pbs_group);
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
#[path = "../utilities.rs"]
|
||||
mod utilities;
|
||||
|
||||
use crate::utilities::{write_to_json, OperatorType};
|
||||
use crate::utilities::{write_to_json, EnvConfig, OperatorType};
|
||||
use std::env;
|
||||
|
||||
use criterion::{criterion_group, Criterion};
|
||||
@@ -11,8 +11,9 @@ use itertools::iproduct;
|
||||
use rand::prelude::*;
|
||||
use rand::Rng;
|
||||
use std::vec::IntoIter;
|
||||
use tfhe::core_crypto::algorithms::misc::divide_ceil;
|
||||
use tfhe::integer::keycache::KEY_CACHE;
|
||||
use tfhe::integer::{IntegerKeyKind, RadixCiphertext, ServerKey};
|
||||
use tfhe::integer::{IntegerKeyKind, RadixCiphertext, RadixClientKey, ServerKey};
|
||||
use tfhe::keycache::NamedParam;
|
||||
|
||||
use tfhe::integer::U256;
|
||||
@@ -28,9 +29,6 @@ use tfhe::shortint::parameters::{
|
||||
/// It must be as big as the largest bit size tested
|
||||
type ScalarType = U256;
|
||||
|
||||
const FAST_BENCH_BIT_SIZES: [usize; 1] = [32];
|
||||
const BENCH_BIT_SIZES: [usize; 7] = [8, 16, 32, 40, 64, 128, 256];
|
||||
|
||||
fn gen_random_u256(rng: &mut ThreadRng) -> U256 {
|
||||
let clearlow = rng.gen::<u128>();
|
||||
let clearhigh = rng.gen::<u128>();
|
||||
@@ -48,37 +46,15 @@ struct ParamsAndNumBlocksIter {
|
||||
|
||||
impl Default for ParamsAndNumBlocksIter {
|
||||
fn default() -> Self {
|
||||
let is_multi_bit = match env::var("__TFHE_RS_BENCH_TYPE") {
|
||||
Ok(val) => val.to_lowercase() == "multi_bit",
|
||||
Err(_) => false,
|
||||
};
|
||||
let env_config = EnvConfig::new();
|
||||
|
||||
let is_fast_bench = match env::var("__TFHE_RS_FAST_BENCH") {
|
||||
Ok(val) => val.to_lowercase() == "true",
|
||||
Err(_) => false,
|
||||
};
|
||||
|
||||
let bit_sizes = if is_fast_bench {
|
||||
FAST_BENCH_BIT_SIZES.to_vec()
|
||||
} else {
|
||||
BENCH_BIT_SIZES.to_vec()
|
||||
};
|
||||
|
||||
if is_multi_bit {
|
||||
if env_config.is_multi_bit {
|
||||
#[cfg(feature = "gpu")]
|
||||
let params = vec![PARAM_MULTI_BIT_MESSAGE_2_CARRY_2_GROUP_3_KS_PBS.into()];
|
||||
#[cfg(not(feature = "gpu"))]
|
||||
let params = vec![PARAM_MULTI_BIT_MESSAGE_2_CARRY_2_GROUP_2_KS_PBS.into()];
|
||||
|
||||
let bit_sizes = if is_fast_bench {
|
||||
vec![32]
|
||||
} else if cfg!(feature = "gpu") {
|
||||
BENCH_BIT_SIZES.to_vec()
|
||||
} else {
|
||||
vec![8, 16, 32, 40, 64]
|
||||
};
|
||||
|
||||
let params_and_bit_sizes = iproduct!(params, bit_sizes);
|
||||
let params_and_bit_sizes = iproduct!(params, env_config.bit_sizes());
|
||||
Self {
|
||||
params_and_bit_sizes,
|
||||
}
|
||||
@@ -91,7 +67,7 @@ impl Default for ParamsAndNumBlocksIter {
|
||||
// PARAM_MESSAGE_4_CARRY_4_KS_PBS.into(),
|
||||
];
|
||||
|
||||
let params_and_bit_sizes = iproduct!(params, bit_sizes);
|
||||
let params_and_bit_sizes = iproduct!(params, env_config.bit_sizes());
|
||||
Self {
|
||||
params_and_bit_sizes,
|
||||
}
|
||||
@@ -566,6 +542,65 @@ fn if_then_else_parallelized(c: &mut Criterion) {
|
||||
bench_group.finish()
|
||||
}
|
||||
|
||||
fn ciphertexts_sum_parallelized(c: &mut Criterion) {
|
||||
let bench_name = "integer::sum_ciphertexts_parallelized";
|
||||
let display_name = "sum_ctxts";
|
||||
|
||||
let mut bench_group = c.benchmark_group(bench_name);
|
||||
bench_group
|
||||
.sample_size(15)
|
||||
.measurement_time(std::time::Duration::from_secs(60));
|
||||
let mut rng = rand::thread_rng();
|
||||
|
||||
for (param, num_block, bit_size) in ParamsAndNumBlocksIter::default() {
|
||||
let param_name = param.name();
|
||||
let max_for_bit_size = ScalarType::MAX >> (ScalarType::BITS as usize - bit_size);
|
||||
|
||||
for len in [5, 10, 20] {
|
||||
let bench_id = format!("{bench_name}_{len}_ctxts::{param_name}::{bit_size}_bits");
|
||||
bench_group.bench_function(&bench_id, |b| {
|
||||
let (cks, sks) = KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix);
|
||||
|
||||
let nb_ctxt = divide_ceil(bit_size, param.message_modulus().0.ilog2() as usize);
|
||||
let cks = RadixClientKey::from((cks, nb_ctxt));
|
||||
|
||||
let encrypt_values = || {
|
||||
let clears = (0..len)
|
||||
.map(|_| gen_random_u256(&mut rng) & max_for_bit_size)
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
// encryption of integers
|
||||
let ctxts = clears
|
||||
.iter()
|
||||
.copied()
|
||||
.map(|clear| cks.encrypt(clear))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
ctxts
|
||||
};
|
||||
|
||||
b.iter_batched(
|
||||
encrypt_values,
|
||||
|ctxts| sks.sum_ciphertexts_parallelized(&ctxts),
|
||||
criterion::BatchSize::SmallInput,
|
||||
)
|
||||
});
|
||||
|
||||
write_to_json::<u64, _>(
|
||||
&bench_id,
|
||||
param,
|
||||
param.name(),
|
||||
display_name,
|
||||
&OperatorType::Atomic,
|
||||
bit_size as u32,
|
||||
vec![param.message_modulus().0.ilog2(); num_block],
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
bench_group.finish()
|
||||
}
|
||||
|
||||
macro_rules! define_server_key_bench_unary_fn (
|
||||
(method_name: $server_key_method:ident, display_name:$name:ident) => {
|
||||
fn $server_key_method(c: &mut Criterion) {
|
||||
@@ -1036,12 +1071,6 @@ define_server_key_bench_unary_default_fn!(method_name: abs_parallelized, display
|
||||
|
||||
define_server_key_bench_unary_default_fn!(method_name: unchecked_abs_parallelized, display_name: abs);
|
||||
|
||||
define_server_key_bench_unary_fn!(method_name: full_propagate, display_name: carry_propagation);
|
||||
define_server_key_bench_unary_fn!(
|
||||
method_name: full_propagate_parallelized,
|
||||
display_name: carry_propagation
|
||||
);
|
||||
|
||||
define_server_key_bench_default_fn!(method_name: unchecked_max, display_name: max);
|
||||
define_server_key_bench_default_fn!(method_name: unchecked_min, display_name: min);
|
||||
define_server_key_bench_default_fn!(method_name: unchecked_eq, display_name: equal);
|
||||
@@ -1927,6 +1956,7 @@ criterion_group!(
|
||||
right_shift_parallelized,
|
||||
rotate_left_parallelized,
|
||||
rotate_right_parallelized,
|
||||
ciphertexts_sum_parallelized,
|
||||
);
|
||||
|
||||
criterion_group!(
|
||||
@@ -2086,6 +2116,92 @@ criterion_group!(
|
||||
unchecked_scalar_ge_parallelized,
|
||||
);
|
||||
|
||||
//================================================================================
|
||||
// Miscellaneous Benches
|
||||
//================================================================================
|
||||
|
||||
fn bench_server_key_cast_function<F>(
|
||||
c: &mut Criterion,
|
||||
bench_name: &str,
|
||||
display_name: &str,
|
||||
cast_op: F,
|
||||
) where
|
||||
F: Fn(&ServerKey, RadixCiphertext, usize),
|
||||
{
|
||||
let mut bench_group = c.benchmark_group(bench_name);
|
||||
bench_group
|
||||
.sample_size(15)
|
||||
.measurement_time(std::time::Duration::from_secs(30));
|
||||
let mut rng = rand::thread_rng();
|
||||
|
||||
let env_config = EnvConfig::new();
|
||||
|
||||
for (param, num_blocks, bit_size) in ParamsAndNumBlocksIter::default() {
|
||||
let all_num_blocks = env_config
|
||||
.bit_sizes()
|
||||
.iter()
|
||||
.copied()
|
||||
.map(|bit| divide_ceil(bit, param.message_modulus().0.ilog2() as usize))
|
||||
.collect::<Vec<_>>();
|
||||
let param_name = param.name();
|
||||
|
||||
for target_num_blocks in all_num_blocks.iter().copied() {
|
||||
let target_bit_size = target_num_blocks * param.message_modulus().0.ilog2() as usize;
|
||||
let bench_id = format!("{bench_name}::{param_name}::{bit_size}_to_{target_bit_size}");
|
||||
bench_group.bench_function(&bench_id, |b| {
|
||||
let (cks, sks) = KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix);
|
||||
|
||||
let encrypt_one_value = || cks.encrypt_radix(gen_random_u256(&mut rng), num_blocks);
|
||||
|
||||
b.iter_batched(
|
||||
encrypt_one_value,
|
||||
|ct| {
|
||||
cast_op(&sks, ct, target_num_blocks);
|
||||
},
|
||||
criterion::BatchSize::SmallInput,
|
||||
)
|
||||
});
|
||||
|
||||
write_to_json::<u64, _>(
|
||||
&bench_id,
|
||||
param,
|
||||
param.name(),
|
||||
display_name,
|
||||
&OperatorType::Atomic,
|
||||
bit_size as u32,
|
||||
vec![param.message_modulus().0.ilog2(); num_blocks],
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
bench_group.finish()
|
||||
}
|
||||
|
||||
macro_rules! define_server_key_bench_cast_fn (
|
||||
(method_name: $server_key_method:ident, display_name:$name:ident) => {
|
||||
fn $server_key_method(c: &mut Criterion) {
|
||||
bench_server_key_cast_function(
|
||||
c,
|
||||
concat!("integer::", stringify!($server_key_method)),
|
||||
stringify!($name),
|
||||
|server_key, lhs, rhs| {
|
||||
server_key.$server_key_method(lhs, rhs);
|
||||
})
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
define_server_key_bench_cast_fn!(method_name: cast_to_unsigned, display_name: cast_to_unsigned);
|
||||
define_server_key_bench_cast_fn!(method_name: cast_to_signed, display_name: cast_to_signed);
|
||||
|
||||
criterion_group!(cast_ops, cast_to_unsigned, cast_to_signed);
|
||||
|
||||
define_server_key_bench_unary_fn!(method_name: full_propagate, display_name: carry_propagation);
|
||||
define_server_key_bench_unary_fn!(
|
||||
method_name: full_propagate_parallelized,
|
||||
display_name: carry_propagation
|
||||
);
|
||||
|
||||
criterion_group!(misc, full_propagate, full_propagate_parallelized);
|
||||
|
||||
#[cfg(feature = "gpu")]
|
||||
@@ -2109,7 +2225,8 @@ fn go_through_cpu_bench_groups(val: &str) {
|
||||
default_parallelized_ops();
|
||||
default_parallelized_ops_comp();
|
||||
default_scalar_parallelized_ops();
|
||||
default_scalar_parallelized_ops_comp()
|
||||
default_scalar_parallelized_ops_comp();
|
||||
cast_ops()
|
||||
}
|
||||
"smart" => {
|
||||
smart_ops();
|
||||
@@ -2143,7 +2260,8 @@ fn main() {
|
||||
default_parallelized_ops();
|
||||
default_parallelized_ops_comp();
|
||||
default_scalar_parallelized_ops();
|
||||
default_scalar_parallelized_ops_comp()
|
||||
default_scalar_parallelized_ops_comp();
|
||||
cast_ops()
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
#[path = "../utilities.rs"]
|
||||
mod utilities;
|
||||
|
||||
use crate::utilities::{write_to_json, OperatorType};
|
||||
use crate::utilities::{write_to_json, EnvConfig, OperatorType};
|
||||
use std::env;
|
||||
|
||||
use criterion::{criterion_group, Criterion};
|
||||
@@ -9,6 +9,7 @@ use itertools::iproduct;
|
||||
use rand::prelude::*;
|
||||
use rand::Rng;
|
||||
use std::vec::IntoIter;
|
||||
use tfhe::core_crypto::algorithms::misc::divide_ceil;
|
||||
use tfhe::integer::keycache::KEY_CACHE;
|
||||
use tfhe::integer::{IntegerKeyKind, RadixCiphertext, ServerKey, SignedRadixCiphertext, I256};
|
||||
use tfhe::keycache::NamedParam;
|
||||
@@ -34,26 +35,12 @@ struct ParamsAndNumBlocksIter {
|
||||
|
||||
impl Default for ParamsAndNumBlocksIter {
|
||||
fn default() -> Self {
|
||||
let is_multi_bit = match env::var("__TFHE_RS_BENCH_TYPE") {
|
||||
Ok(val) => val.to_lowercase() == "multi_bit",
|
||||
Err(_) => false,
|
||||
};
|
||||
let env_config = EnvConfig::new();
|
||||
|
||||
let is_fast_bench = match env::var("__TFHE_RS_FAST_BENCH") {
|
||||
Ok(val) => val.to_lowercase() == "true",
|
||||
Err(_) => false,
|
||||
};
|
||||
|
||||
if is_multi_bit {
|
||||
if env_config.is_multi_bit {
|
||||
let params = vec![PARAM_MULTI_BIT_MESSAGE_2_CARRY_2_GROUP_2_KS_PBS.into()];
|
||||
|
||||
let bit_sizes = if is_fast_bench {
|
||||
vec![32]
|
||||
} else {
|
||||
vec![8, 16, 32, 40, 64]
|
||||
};
|
||||
|
||||
let params_and_bit_sizes = iproduct!(params, bit_sizes);
|
||||
let params_and_bit_sizes = iproduct!(params, env_config.bit_sizes());
|
||||
Self {
|
||||
params_and_bit_sizes,
|
||||
}
|
||||
@@ -66,13 +53,7 @@ impl Default for ParamsAndNumBlocksIter {
|
||||
// PARAM_MESSAGE_4_CARRY_4_KS_PBS.into(),
|
||||
];
|
||||
|
||||
let bit_sizes = if is_fast_bench {
|
||||
vec![32]
|
||||
} else {
|
||||
vec![8, 16, 32, 40, 64, 128, 256]
|
||||
};
|
||||
|
||||
let params_and_bit_sizes = iproduct!(params, bit_sizes);
|
||||
let params_and_bit_sizes = iproduct!(params, env_config.bit_sizes());
|
||||
Self {
|
||||
params_and_bit_sizes,
|
||||
}
|
||||
@@ -1125,6 +1106,83 @@ criterion_group!(
|
||||
unchecked_scalar_min_parallelized,
|
||||
);
|
||||
|
||||
fn bench_server_key_signed_cast_function<F>(
|
||||
c: &mut Criterion,
|
||||
bench_name: &str,
|
||||
display_name: &str,
|
||||
cast_op: F,
|
||||
) where
|
||||
F: Fn(&ServerKey, SignedRadixCiphertext, usize),
|
||||
{
|
||||
let mut bench_group = c.benchmark_group(bench_name);
|
||||
bench_group
|
||||
.sample_size(15)
|
||||
.measurement_time(std::time::Duration::from_secs(30));
|
||||
let mut rng = rand::thread_rng();
|
||||
|
||||
let env_config = EnvConfig::new();
|
||||
|
||||
for (param, num_blocks, bit_size) in ParamsAndNumBlocksIter::default() {
|
||||
let all_num_blocks = env_config
|
||||
.bit_sizes()
|
||||
.iter()
|
||||
.copied()
|
||||
.map(|bit| divide_ceil(bit, param.message_modulus().0.ilog2() as usize))
|
||||
.collect::<Vec<_>>();
|
||||
let param_name = param.name();
|
||||
|
||||
for target_num_blocks in all_num_blocks.iter().copied() {
|
||||
let target_bit_size = target_num_blocks * param.message_modulus().0.ilog2() as usize;
|
||||
let bench_id = format!("{bench_name}::{param_name}::{bit_size}_to_{target_bit_size}");
|
||||
bench_group.bench_function(&bench_id, |b| {
|
||||
let (cks, sks) = KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix);
|
||||
|
||||
let encrypt_one_value =
|
||||
|| cks.encrypt_signed_radix(gen_random_i256(&mut rng), num_blocks);
|
||||
|
||||
b.iter_batched(
|
||||
encrypt_one_value,
|
||||
|ct| {
|
||||
cast_op(&sks, ct, target_num_blocks);
|
||||
},
|
||||
criterion::BatchSize::SmallInput,
|
||||
)
|
||||
});
|
||||
|
||||
write_to_json::<u64, _>(
|
||||
&bench_id,
|
||||
param,
|
||||
param.name(),
|
||||
display_name,
|
||||
&OperatorType::Atomic,
|
||||
bit_size as u32,
|
||||
vec![param.message_modulus().0.ilog2(); num_blocks],
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
bench_group.finish()
|
||||
}
|
||||
|
||||
macro_rules! define_server_key_bench_cast_fn (
|
||||
(method_name: $server_key_method:ident, display_name:$name:ident) => {
|
||||
fn $server_key_method(c: &mut Criterion) {
|
||||
bench_server_key_signed_cast_function(
|
||||
c,
|
||||
concat!("integer::signed::", stringify!($server_key_method)),
|
||||
stringify!($name),
|
||||
|server_key, lhs, rhs| {
|
||||
server_key.$server_key_method(lhs, rhs);
|
||||
})
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
define_server_key_bench_cast_fn!(method_name: cast_to_unsigned, display_name: cast_to_unsigned);
|
||||
define_server_key_bench_cast_fn!(method_name: cast_to_signed, display_name: cast_to_signed);
|
||||
|
||||
criterion_group!(cast_ops, cast_to_unsigned, cast_to_signed);
|
||||
|
||||
fn main() {
|
||||
match env::var("__TFHE_RS_BENCH_OP_FLAVOR") {
|
||||
Ok(val) => {
|
||||
@@ -1133,7 +1191,8 @@ fn main() {
|
||||
default_parallelized_ops();
|
||||
default_parallelized_ops_comp();
|
||||
default_scalar_parallelized_ops();
|
||||
default_scalar_parallelized_ops_comp()
|
||||
default_scalar_parallelized_ops_comp();
|
||||
cast_ops()
|
||||
}
|
||||
"unchecked" => {
|
||||
unchecked_ops();
|
||||
@@ -1147,6 +1206,7 @@ fn main() {
|
||||
Err(_) => {
|
||||
default_parallelized_ops();
|
||||
default_scalar_parallelized_ops();
|
||||
cast_ops()
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
use serde::Serialize;
|
||||
use std::fs;
|
||||
use std::path::PathBuf;
|
||||
use std::{env, fs};
|
||||
#[cfg(feature = "boolean")]
|
||||
use tfhe::boolean::parameters::BooleanParameters;
|
||||
use tfhe::core_crypto::prelude::*;
|
||||
@@ -226,6 +226,54 @@ pub fn write_to_json<
|
||||
fs::write(params_directory, serde_json::to_string(&record).unwrap()).unwrap();
|
||||
}
|
||||
|
||||
const FAST_BENCH_BIT_SIZES: [usize; 1] = [32];
|
||||
const BENCH_BIT_SIZES: [usize; 7] = [8, 16, 32, 40, 64, 128, 256];
|
||||
|
||||
/// User configuration in which benchmarks must be run.
|
||||
#[derive(Default)]
|
||||
pub struct EnvConfig {
|
||||
pub is_multi_bit: bool,
|
||||
pub is_fast_bench: bool,
|
||||
}
|
||||
|
||||
impl EnvConfig {
|
||||
#[allow(dead_code)]
|
||||
pub fn new() -> Self {
|
||||
let is_multi_bit = match env::var("__TFHE_RS_BENCH_TYPE") {
|
||||
Ok(val) => val.to_lowercase() == "multi_bit",
|
||||
Err(_) => false,
|
||||
};
|
||||
|
||||
let is_fast_bench = match env::var("__TFHE_RS_FAST_BENCH") {
|
||||
Ok(val) => val.to_lowercase() == "true",
|
||||
Err(_) => false,
|
||||
};
|
||||
|
||||
EnvConfig {
|
||||
is_multi_bit,
|
||||
is_fast_bench,
|
||||
}
|
||||
}
|
||||
|
||||
/// Get precisions values to benchmark.
|
||||
#[allow(dead_code)]
|
||||
pub fn bit_sizes(&self) -> Vec<usize> {
|
||||
if self.is_multi_bit {
|
||||
if self.is_fast_bench {
|
||||
FAST_BENCH_BIT_SIZES.to_vec()
|
||||
} else if cfg!(feature = "gpu") {
|
||||
BENCH_BIT_SIZES.to_vec()
|
||||
} else {
|
||||
vec![8, 16, 32, 40, 64]
|
||||
}
|
||||
} else if self.is_fast_bench {
|
||||
FAST_BENCH_BIT_SIZES.to_vec()
|
||||
} else {
|
||||
BENCH_BIT_SIZES.to_vec()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Empty main to please clippy.
|
||||
#[allow(dead_code)]
|
||||
pub fn main() {}
|
||||
|
||||
@@ -55,6 +55,10 @@ fn gen_c_api() {
|
||||
"shortint",
|
||||
#[cfg(feature = "integer")]
|
||||
"integer",
|
||||
#[cfg(feature = "gpu")]
|
||||
"gpu",
|
||||
#[cfg(feature = "forward_compatibility")]
|
||||
"forward_compatibility",
|
||||
];
|
||||
|
||||
let parse_expand_vec = if parse_expand_features_vec.is_empty() {
|
||||
@@ -63,14 +67,16 @@ fn gen_c_api() {
|
||||
vec![package_name.as_str()]
|
||||
};
|
||||
|
||||
cbindgen::Builder::new()
|
||||
let builder = cbindgen::Builder::new()
|
||||
.with_crate(crate_dir.as_path())
|
||||
.with_config(cbindgen::Config::from_file(crate_dir.join("cbindgen.toml")).unwrap())
|
||||
.with_parse_expand(&parse_expand_vec)
|
||||
.with_parse_expand_features(&parse_expand_features_vec)
|
||||
.generate()
|
||||
.unwrap()
|
||||
.write_to_file(output_file);
|
||||
.with_parse_expand_features(&parse_expand_features_vec);
|
||||
|
||||
#[cfg(feature = "forward_compatibility")]
|
||||
let builder = builder.with_include("tfhe-c-api-dynamic-buffer.h");
|
||||
|
||||
builder.generate().unwrap().write_to_file(output_file);
|
||||
}
|
||||
|
||||
fn main() {
|
||||
|
||||
@@ -7,6 +7,9 @@ if(NOT CARGO_PROFILE)
|
||||
endif()
|
||||
set(TFHE_C_API_RELEASE "${CMAKE_CURRENT_SOURCE_DIR}/../../target/${CARGO_PROFILE}")
|
||||
|
||||
option(WITH_FEATURE_GPU "Enable if tfhe-rs C API was compiled with the 'gpu' feature activated" OFF)
|
||||
option(WITH_FORWARD_COMPATIBILITY "Enable if tfhe-rs C API was compiled with the 'forward_compatibility' feature activated" OFF)
|
||||
|
||||
include_directories(${TFHE_C_API_RELEASE})
|
||||
# This one is to fetch the dynamic buffer header
|
||||
include_directories(${TFHE_C_API_RELEASE}/deps)
|
||||
@@ -22,6 +25,15 @@ if(APPLE)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if (WITH_FEATURE_GPU)
|
||||
find_package(CUDAToolkit 10.0 REQUIRED)
|
||||
find_package(OpenMP REQUIRED)
|
||||
endif()
|
||||
|
||||
if("${WITH_FORWARD_COMPATIBILITY}" STREQUAL "ON")
|
||||
add_definitions(-DWITH_FORWARD_COMPATIBILITY)
|
||||
endif()
|
||||
|
||||
file(GLOB TEST_CASES test_*.c)
|
||||
foreach (testsourcefile ${TEST_CASES})
|
||||
get_filename_component(testname ${testsourcefile} NAME_WLE)
|
||||
@@ -34,6 +46,12 @@ foreach (testsourcefile ${TEST_CASES})
|
||||
)
|
||||
target_include_directories(${testname} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
|
||||
target_link_libraries(${testname} LINK_PUBLIC Tfhe TfheDynamicBuffer m pthread dl)
|
||||
|
||||
if (WITH_FEATURE_GPU)
|
||||
target_link_libraries(${testname} LINK_PUBLIC CUDA::cudart -lstdc++ OpenMP::OpenMP_CXX)
|
||||
target_compile_definitions(${testname} PUBLIC -DWITH_FEATURE_GPU)
|
||||
endif()
|
||||
|
||||
if(APPLE)
|
||||
target_link_libraries(${testname} LINK_PUBLIC ${SECURITY_FRAMEWORK})
|
||||
endif()
|
||||
|
||||
115
tfhe/c_api_tests/test_forward_compatibility.c
Normal file
115
tfhe/c_api_tests/test_forward_compatibility.c
Normal file
@@ -0,0 +1,115 @@
|
||||
#include <tfhe.h>
|
||||
|
||||
#include <assert.h>
|
||||
#include <inttypes.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#ifdef WITH_FORWARD_COMPATIBILITY
|
||||
int uint8_format_update(const ClientKey *client_key, const ServerKey *server_key) {
|
||||
int ok;
|
||||
FheUint8 *lhs = NULL;
|
||||
FheUint8 *deserialized_lhs = NULL;
|
||||
FheUint8 *result = NULL;
|
||||
DynamicBuffer value_buffer = {.pointer = NULL, .length = 0, .destructor = NULL};
|
||||
DynamicBuffer conformant_value_buffer = {.pointer = NULL, .length = 0, .destructor = NULL};
|
||||
DynamicBuffer cks_buffer = {.pointer = NULL, .length = 0, .destructor = NULL};
|
||||
DynamicBufferView deser_view = {.pointer = NULL, .length = 0};
|
||||
ClientKey *deserialized_client_key = NULL;
|
||||
DynamicBuffer out_buffer = {.pointer = NULL, .length = 0, .destructor = NULL};
|
||||
|
||||
const uint64_t max_serialization_size = UINT64_C(1) << UINT64_C(20);
|
||||
|
||||
uint8_t lhs_clear = 123;
|
||||
|
||||
ok = client_key_serialize(client_key, &cks_buffer);
|
||||
assert(ok == 0);
|
||||
|
||||
deser_view.pointer = cks_buffer.pointer;
|
||||
deser_view.length = cks_buffer.length;
|
||||
|
||||
ok = client_key_update_serialization_from_0_5_to_0_6(deser_view, &out_buffer);
|
||||
assert(ok == 0);
|
||||
|
||||
destroy_dynamic_buffer(&out_buffer);
|
||||
|
||||
deser_view.pointer = cks_buffer.pointer;
|
||||
deser_view.length = cks_buffer.length;
|
||||
ok = client_key_deserialize(deser_view, &deserialized_client_key);
|
||||
assert(ok == 0);
|
||||
|
||||
ok = fhe_uint8_try_encrypt_with_client_key_u8(lhs_clear, deserialized_client_key, &lhs);
|
||||
assert(ok == 0);
|
||||
|
||||
ok = fhe_uint8_serialize(lhs, &value_buffer);
|
||||
assert(ok == 0);
|
||||
|
||||
deser_view.pointer = value_buffer.pointer;
|
||||
deser_view.length = value_buffer.length;
|
||||
|
||||
ok = fhe_uint8_update_serialization_from_0_5_to_0_6(deser_view, &out_buffer);
|
||||
assert(ok == 0);
|
||||
|
||||
destroy_dynamic_buffer(&out_buffer);
|
||||
|
||||
ok = fhe_uint8_safe_serialize(lhs, &conformant_value_buffer, max_serialization_size);
|
||||
assert(ok == 0);
|
||||
|
||||
deser_view.pointer = conformant_value_buffer.pointer;
|
||||
deser_view.length = conformant_value_buffer.length;
|
||||
|
||||
ok = fhe_uint8_safe_update_serialization_conformant_from_0_5_to_0_6(
|
||||
deser_view, max_serialization_size, server_key, &out_buffer);
|
||||
assert(ok == 0);
|
||||
|
||||
destroy_dynamic_buffer(&out_buffer);
|
||||
|
||||
deser_view.pointer = value_buffer.pointer;
|
||||
deser_view.length = value_buffer.length;
|
||||
ok = fhe_uint8_deserialize(deser_view, &deserialized_lhs);
|
||||
assert(ok == 0);
|
||||
|
||||
uint8_t clear;
|
||||
ok = fhe_uint8_decrypt(deserialized_lhs, deserialized_client_key, &clear);
|
||||
assert(ok == 0);
|
||||
|
||||
assert(clear == lhs_clear);
|
||||
|
||||
destroy_dynamic_buffer(&value_buffer);
|
||||
destroy_dynamic_buffer(&conformant_value_buffer);
|
||||
|
||||
fhe_uint8_destroy(lhs);
|
||||
fhe_uint8_destroy(deserialized_lhs);
|
||||
fhe_uint8_destroy(result);
|
||||
return ok;
|
||||
}
|
||||
#endif
|
||||
|
||||
int main(void) {
|
||||
int ok = 0;
|
||||
|
||||
#ifdef WITH_FORWARD_COMPATIBILITY
|
||||
{
|
||||
ConfigBuilder *builder;
|
||||
Config *config;
|
||||
|
||||
ok = config_builder_default(&builder);
|
||||
assert(ok == 0);
|
||||
ok = config_builder_build(builder, &config);
|
||||
assert(ok == 0);
|
||||
|
||||
ClientKey *client_key = NULL;
|
||||
ServerKey *server_key = NULL;
|
||||
PublicKey *public_key = NULL;
|
||||
|
||||
ok = generate_keys(config, &client_key, &server_key);
|
||||
assert(ok == 0);
|
||||
ok = uint8_format_update(client_key, server_key);
|
||||
|
||||
client_key_destroy(client_key);
|
||||
public_key_destroy(public_key);
|
||||
server_key_destroy(server_key);
|
||||
}
|
||||
#endif
|
||||
|
||||
return ok;
|
||||
}
|
||||
123
tfhe/c_api_tests/test_high_level_array.c
Normal file
123
tfhe/c_api_tests/test_high_level_array.c
Normal file
@@ -0,0 +1,123 @@
|
||||
#include "tfhe.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <inttypes.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
// Encrypts a string in a FheUint array
|
||||
// No error handling is made, it asserts on all error for demo purposes
|
||||
FheUint8 **encrypt_str(const char *const str, const size_t str_len, const ClientKey *ck) {
|
||||
assert(str != NULL && str_len > 0);
|
||||
|
||||
FheUint8 **result = malloc(sizeof(*result) * str_len);
|
||||
assert(result != NULL);
|
||||
|
||||
for (size_t i = 0; i < str_len; ++i) {
|
||||
assert(fhe_uint8_try_encrypt_with_client_key_u8(str[i], ck, &result[i]) == 0);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
void destroy_fhe_uint8_array(FheUint8 **begin, const size_t len) {
|
||||
for (size_t i = 0; i < len; ++i) {
|
||||
fhe_uint8_destroy(begin[i]);
|
||||
}
|
||||
free(begin);
|
||||
}
|
||||
|
||||
int main(void) {
|
||||
int ok = 0;
|
||||
ConfigBuilder *builder;
|
||||
Config *config;
|
||||
|
||||
config_builder_default(&builder);
|
||||
config_builder_build(builder, &config);
|
||||
|
||||
ClientKey *client_key = NULL;
|
||||
ServerKey *server_key = NULL;
|
||||
|
||||
ok = generate_keys(config, &client_key, &server_key);
|
||||
assert(ok == 0);
|
||||
|
||||
ok = set_server_key(server_key);
|
||||
assert(ok == 0);
|
||||
|
||||
char const *const sentence = "The quick brown fox jumps over the lazy dog";
|
||||
char const *const pattern_1 = "wn fox ";
|
||||
char const *const pattern_2 = "tfhe-rs";
|
||||
|
||||
size_t sentence_len = strlen(sentence);
|
||||
size_t pattern_1_len = strlen(pattern_1);
|
||||
size_t pattern_2_len = strlen(pattern_2);
|
||||
|
||||
assert(pattern_1_len == pattern_2_len); // We use this later in the tests
|
||||
|
||||
FheUint8 **encrypted_sentence = encrypt_str(sentence, sentence_len, client_key);
|
||||
FheUint8 **encrypted_pattern_1 = encrypt_str(pattern_1, pattern_1_len, client_key);
|
||||
FheUint8 **encrypted_pattern_2 = encrypt_str(pattern_2, pattern_2_len, client_key);
|
||||
|
||||
// Equality
|
||||
{
|
||||
FheBool *result;
|
||||
bool clear_result;
|
||||
|
||||
// This one is trivial as the length are not the same
|
||||
ok = fhe_uint8_array_eq(encrypted_sentence, sentence_len, encrypted_pattern_1, pattern_1_len,
|
||||
&result);
|
||||
assert(ok == 0);
|
||||
ok = fhe_bool_decrypt(result, client_key, &clear_result);
|
||||
assert(ok == 0 && clear_result == false);
|
||||
fhe_bool_destroy(result);
|
||||
|
||||
ok = fhe_uint8_array_eq(encrypted_pattern_2, pattern_2_len, encrypted_pattern_1, pattern_1_len,
|
||||
&result);
|
||||
assert(ok == 0);
|
||||
ok = fhe_bool_decrypt(result, client_key, &clear_result);
|
||||
assert(ok == 0 && clear_result == false);
|
||||
fhe_bool_destroy(result);
|
||||
|
||||
ok = fhe_uint8_array_eq(encrypted_sentence, sentence_len, encrypted_sentence, sentence_len,
|
||||
&result);
|
||||
assert(ok == 0);
|
||||
ok = fhe_bool_decrypt(result, client_key, &clear_result);
|
||||
assert(ok == 0 && clear_result == true);
|
||||
fhe_bool_destroy(result);
|
||||
}
|
||||
|
||||
// contains sub slice
|
||||
{
|
||||
FheBool *result;
|
||||
bool clear_result;
|
||||
|
||||
// This one is trivial as the length are not the same
|
||||
ok = fhe_uint8_array_contains_sub_slice(encrypted_sentence, sentence_len, encrypted_pattern_1,
|
||||
pattern_1_len, &result);
|
||||
assert(ok == 0);
|
||||
ok = fhe_bool_decrypt(result, client_key, &clear_result);
|
||||
assert(ok == 0 && clear_result == true);
|
||||
fhe_bool_destroy(result);
|
||||
|
||||
ok = fhe_uint8_array_contains_sub_slice(encrypted_sentence, sentence_len, encrypted_pattern_2,
|
||||
pattern_2_len, &result);
|
||||
assert(ok == 0);
|
||||
ok = fhe_bool_decrypt(result, client_key, &clear_result);
|
||||
assert(ok == 0 && clear_result == false);
|
||||
fhe_bool_destroy(result);
|
||||
|
||||
ok = fhe_uint8_array_contains_sub_slice(encrypted_sentence, sentence_len, encrypted_sentence,
|
||||
sentence_len, &result);
|
||||
assert(ok == 0);
|
||||
ok = fhe_bool_decrypt(result, client_key, &clear_result);
|
||||
assert(ok == 0 && clear_result == true);
|
||||
fhe_bool_destroy(result);
|
||||
}
|
||||
|
||||
destroy_fhe_uint8_array(encrypted_sentence, sentence_len);
|
||||
destroy_fhe_uint8_array(encrypted_pattern_1, pattern_1_len);
|
||||
destroy_fhe_uint8_array(encrypted_pattern_2, pattern_2_len);
|
||||
|
||||
client_key_destroy(client_key);
|
||||
server_key_destroy(server_key);
|
||||
return 0;
|
||||
}
|
||||
103
tfhe/c_api_tests/test_high_level_integers_cuda.c
Normal file
103
tfhe/c_api_tests/test_high_level_integers_cuda.c
Normal file
@@ -0,0 +1,103 @@
|
||||
#if defined(WITH_FEATURE_GPU)
|
||||
#include <tfhe.h>
|
||||
|
||||
#include <assert.h>
|
||||
#include <inttypes.h>
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
|
||||
int uint8_client_key(const ClientKey *client_key) {
|
||||
int ok;
|
||||
FheUint8 *lhs = NULL;
|
||||
FheUint8 *rhs = NULL;
|
||||
FheUint8 *result = NULL;
|
||||
|
||||
uint8_t lhs_clear = 123;
|
||||
uint8_t rhs_clear = 14;
|
||||
|
||||
ok = fhe_uint8_try_encrypt_with_client_key_u8(lhs_clear, client_key, &lhs);
|
||||
assert(ok == 0);
|
||||
|
||||
ok = fhe_uint8_try_encrypt_with_client_key_u8(rhs_clear, client_key, &rhs);
|
||||
assert(ok == 0);
|
||||
|
||||
uint8_t clear;
|
||||
|
||||
// Check addition
|
||||
{
|
||||
ok = fhe_uint8_add(lhs, rhs, &result);
|
||||
assert(ok == 0);
|
||||
|
||||
ok = fhe_uint8_decrypt(result, client_key, &clear);
|
||||
assert(ok == 0);
|
||||
|
||||
assert(clear == (lhs_clear + rhs_clear));
|
||||
}
|
||||
|
||||
// Check sum
|
||||
{
|
||||
FheUint8 *sum_result;
|
||||
const FheUint8 *data[2] = {lhs, rhs};
|
||||
ok = fhe_uint8_sum(data, 2, &sum_result);
|
||||
assert(ok == 0);
|
||||
|
||||
clear = 0;
|
||||
ok = fhe_uint8_decrypt(result, client_key, &clear);
|
||||
assert(ok == 0);
|
||||
|
||||
assert(clear == (lhs_clear + rhs_clear));
|
||||
fhe_uint8_destroy(sum_result);
|
||||
}
|
||||
|
||||
fhe_uint8_destroy(lhs);
|
||||
fhe_uint8_destroy(rhs);
|
||||
fhe_uint8_destroy(result);
|
||||
return ok;
|
||||
}
|
||||
|
||||
|
||||
int main(void) {
|
||||
int ok = 0;
|
||||
{
|
||||
ConfigBuilder *builder;
|
||||
Config *config;
|
||||
|
||||
ok = config_builder_default(&builder);
|
||||
assert(ok == 0);
|
||||
ok = config_builder_build(builder, &config);
|
||||
assert(ok == 0);
|
||||
|
||||
ClientKey *client_key = NULL;
|
||||
CompressedServerKey *compressed_sks = NULL;
|
||||
CudaServerKey *cuda_server_key = NULL;
|
||||
|
||||
ok = client_key_generate(config, &client_key);
|
||||
assert(ok == 0);
|
||||
|
||||
ok = compressed_server_key_new(client_key, &compressed_sks);
|
||||
assert(ok == 0);
|
||||
|
||||
ok = compressed_server_key_decompress_to_gpu(compressed_sks, &cuda_server_key);
|
||||
assert(ok == 0);
|
||||
|
||||
ok = set_cuda_server_key(cuda_server_key);
|
||||
assert(ok == 0);
|
||||
|
||||
uint8_client_key(client_key);
|
||||
|
||||
client_key_destroy(client_key);
|
||||
compressed_server_key_destroy(compressed_sks);
|
||||
cuda_server_key_destroy(cuda_server_key);
|
||||
}
|
||||
|
||||
return ok;
|
||||
}
|
||||
|
||||
#else
|
||||
#include <stdio.h>
|
||||
|
||||
int main(void) {
|
||||
fputs("tfhe-rs was not compiled with gpu support\n", stdout);
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
@@ -45,7 +45,8 @@ usize_is_size_t = true
|
||||
|
||||
[defines]
|
||||
# "target_os = freebsd" = "DEFINE_FREEBSD"
|
||||
# "feature = serde" = "DEFINE_SERDE"
|
||||
"feature = gpu" = "WITH_FEATURE_GPU"
|
||||
"feature = forward_compatibility" = "WITH_FORWARD_COMPATIBILITY"
|
||||
|
||||
|
||||
[export]
|
||||
|
||||
@@ -26,6 +26,8 @@
|
||||
* [Use Parallelized PBS](how_to/parallelized_pbs.md)
|
||||
* [Use the C API](how_to/c_api.md)
|
||||
* [Use the JS on WASM API](how_to/js_on_wasm_api.md)
|
||||
* [Use multi-threading using the rayon crate](how_to/rayon_crate.md)
|
||||
* [Debug](how_to/debug.md)
|
||||
|
||||
## Fine-grained APIs
|
||||
* [Quick Start](fine_grained_api/quick_start.md)
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# Quick Start
|
||||
|
||||
The `core_crypto` module from `TFHE-rs` is dedicated to the implementation of the cryptographic tools related to TFHE. To construct an FHE application, the [shortint](../fine_grained_api/shortint/tutorial.md) and/or [Boolean](../fine_grained_api/Boolean/tutorial.md) modules (based on `core_crypto`) are recommended.
|
||||
The `core_crypto` module from `TFHE-rs` is dedicated to the implementation of the cryptographic tools related to TFHE. To construct an FHE application, the [shortint](../fine_grained_api/shortint/readme.md) and/or [Boolean](../fine_grained_api/Boolean/readme.md) modules (based on `core_crypto`) are recommended.
|
||||
|
||||
The `core_crypto` module offers an API to low-level cryptographic primitives and objects, like `lwe_encryption` or `rlwe_ciphertext`. The goal is to propose an easy-to-use API for cryptographers.
|
||||
|
||||
|
||||
@@ -9,7 +9,7 @@ Welcome to this tutorial about `TFHE-rs` `core_crypto` module.
|
||||
To use `TFHE-rs`, it first has to be added as a dependency in the `Cargo.toml`:
|
||||
|
||||
```toml
|
||||
tfhe = { version = "0.5.0", features = [ "x86_64-unix" ] }
|
||||
tfhe = { version = "0.5.5", features = [ "x86_64-unix" ] }
|
||||
```
|
||||
|
||||
This enables the `x86_64-unix` feature to have efficient implementations of various algorithms for `x86_64` CPUs on a Unix-like system. The 'unix' suffix indicates that the `UnixSeeder`, which uses `/dev/random` to generate random numbers, is activated as a fallback if no hardware number generator is available (like `rdseed` on `x86_64` or if the [`Randomization Services`](https://developer.apple.com/documentation/security/1399291-secrandomcopybytes?language=objc) on Apple platforms are not available). To avoid having the `UnixSeeder` as a potential fallback or to run on non-Unix systems (e.g., Windows), the `x86_64` feature is sufficient.
|
||||
@@ -19,19 +19,19 @@ For Apple Silicon, the `aarch64-unix` or `aarch64` feature should be enabled. `a
|
||||
In short: For `x86_64`-based machines running Unix-like OSes:
|
||||
|
||||
```toml
|
||||
tfhe = { version = "0.5.0", features = ["x86_64-unix"] }
|
||||
tfhe = { version = "0.5.5", features = ["x86_64-unix"] }
|
||||
```
|
||||
|
||||
For Apple Silicon or aarch64-based machines running Unix-like OSes:
|
||||
|
||||
```toml
|
||||
tfhe = { version = "0.5.0", features = ["aarch64-unix"] }
|
||||
tfhe = { version = "0.5.5", features = ["aarch64-unix"] }
|
||||
```
|
||||
|
||||
For `x86_64`-based machines with the [`rdseed instruction`](https://en.wikipedia.org/wiki/RDRAND) running Windows:
|
||||
|
||||
```toml
|
||||
tfhe = { version = "0.5.0", features = ["x86_64"] }
|
||||
tfhe = { version = "0.5.5", features = ["x86_64"] }
|
||||
```
|
||||
|
||||
### Commented code to double a 2-bit message in a leveled fashion and using a PBS with the `core_crypto` module.
|
||||
|
||||
@@ -3,26 +3,42 @@
|
||||
Due to their nature, homomorphic operations are naturally slower than their cleartext equivalents. Some timings are exposed for basic operations. For completeness, benchmarks for other libraries are also given.
|
||||
|
||||
{% hint style="info" %}
|
||||
All benchmarks were launched on an AWS m6i.metal with the following specifications: Intel(R) Xeon(R) Platinum 8375C CPU @ 2.90GHz and 512GB of RAM.
|
||||
All benchmarks were launched on an AWS hpc7a.96xlarge instance with the following specifications: AMD EPYC 9R14 CPU @ 2.60GHz and 740GB of RAM.
|
||||
{% endhint %}
|
||||
|
||||
## Integer
|
||||
|
||||
This measures the execution time for some operation sets of tfhe-rs::integer (the unsigned version). Note that the timings for `FheInt` (i.e., the signed integers) are similar.
|
||||
|
||||
The table below reports the timing when the inputs of the benchmarked operation are encrypted.
|
||||
|
||||
| Operation \ Size | `FheUint8` | `FheUint16` | `FheUint32` | `FheUint64` | `FheUint128` | `FheUint256` |
|
||||
|--------------------------------------------------------|------------|-------------|-------------|-------------|--------------|--------------|
|
||||
| Negation (`-`) | 70.9 ms | 99.3 ms | 129 ms | 180 ms | 239 ms | 333 ms |
|
||||
| Add / Sub (`+`,`-`) | 70.5 ms | 100 ms | 132 ms | 186 ms | 249 ms | 334 ms |
|
||||
| Mul (`x`) | 144 ms | 216 ms | 333 ms | 832 ms | 2.50 s | 8.85 s |
|
||||
| Equal / Not Equal (`eq`, `ne`) | 36.1 ms | 36.5 ms | 57.4 ms | 64.2 ms | 67.3 ms | 78.1 ms |
|
||||
| Comparisons (`ge`, `gt`, `le`, `lt`) | 52.6 ms | 73.1 ms | 98.8 ms | 124 ms | 165 ms | 201 ms |
|
||||
| Max / Min (`max`,`min`) | 76.2 ms | 102 ms | 135 ms | 171 ms | 212 ms | 301 ms |
|
||||
| Bitwise operations (`&`, `\|`, `^`) | 19.4 ms | 20.3 ms | 21.0 ms | 27.2 ms | 31.6 ms | 40.2 ms |
|
||||
| Div / Rem (`/`, `%`) | 729 ms | 1.93 s | 4.81 s | 12.2 s | 30.7 s | 89.6 s |
|
||||
| Left / Right Shifts (`<<`, `>>`) | 99.4 ms | 129 ms | 180 ms | 243 ms | 372 ms | 762 ms |
|
||||
| Left / Right Rotations (`left_rotate`, `right_rotate`) | 103 ms | 128 ms | 182 ms | 241 ms | 374 ms | 763 ms |
|
||||
| Negation (`-`) | 55.4 ms | 79.7 ms | 105 ms | 133 ms | 163 ms | 199 ms |
|
||||
| Add / Sub (`+`,`-`) | 58.9 ms | 86.0 ms | 106 ms | 124 ms | 151 ms | 193 ms |
|
||||
| Mul (`x`) | 122 ms | 164 ms | 227 ms | 410 ms | 1,04 s | 3,41 s |
|
||||
| Equal / Not Equal (`eq`, `ne`) | 32.0 ms | 32.0 ms | 50.4 ms | 50.9 ms | 53.1 ms | 54.6 ms |
|
||||
| Comparisons (`ge`, `gt`, `le`, `lt`) | 43.7 ms | 65.2 ms | 84.3 ms | 107 ms | 132 ms | 159 ms |
|
||||
| Max / Min (`max`,`min`) | 68.4 ms | 86.8 ms | 106 ms | 132 ms | 160 ms | 200 ms |
|
||||
| Bitwise operations (`&`, `\|`, `^`) | 17.1 ms | 17.3 ms | 17.8 ms | 18.8 ms | 20.2 ms | 22.2 ms |
|
||||
| Div / Rem (`/`, `%`) | 631 ms | 1.59 s | 3.77 s | 8,64 s | 20,3 s | 53,4 s |
|
||||
| Left / Right Shifts (`<<`, `>>`) | 82.8 ms | 99.2 ms | 121 ms | 149 ms | 194 ms | 401 ms |
|
||||
| Left / Right Rotations (`left_rotate`, `right_rotate`) | 82.1 ms | 99.4 ms | 120 ms | 149 ms | 194 ms | 402 ms |
|
||||
|
||||
The table below reports the timing when the left input of the benchmarked operation is encrypted and the other is a clear scalar of the same size.
|
||||
|
||||
| Operation \ Size | `FheUint8` | `FheUint16` | `FheUint32` | `FheUint64` | `FheUint128` | `FheUint256` |
|
||||
|--------------------------------------------------------|------------|-------------|-------------|-------------|--------------|--------------|
|
||||
| Add / Sub (`+`,`-`) | 68.3 ms | 82.4 ms | 102 ms | 122 ms | 151 ms | 191 ms |
|
||||
| Mul (`x`) | 93.7 ms | 139 ms | 178 ms | 242 ms | 516 ms | 1.02 s |
|
||||
| Equal / Not Equal (`eq`, `ne`) | 30.2 ms | 30.8 ms | 32.7 ms | 50.4 ms | 51.2 ms | 54.8 ms |
|
||||
| Comparisons (`ge`, `gt`, `le`, `lt`) | 47.3 ms | 69.9 ms | 96.3 ms | 102 ms | 138 ms | 141 ms |
|
||||
| Max / Min (`max`,`min`) | 75.4 ms | 99.7 ms | 120 ms | 126 ms | 150 ms | 186 ms |
|
||||
| Bitwise operations (`&`, `\|`, `^`) | 17.1 ms | 17.4 ms | 18.2 ms | 19.2 ms | 19.7 ms | 22.6 ms |
|
||||
| Div (`/`) | 160 ms | 212 ms | 272 ms | 402 ms | 796 ms | 2.27 s |
|
||||
| Rem (`%`) | 315 ms | 428 ms | 556 ms | 767 ms | 1.27 s | 2.86 s |
|
||||
| Left / Right Shifts (`<<`, `>>`) | 16.8 ms | 16.8 ms | 17.3 ms | 18.0 ms | 18.9 ms | 22.6 ms |
|
||||
| Left / Right Rotations (`left_rotate`, `right_rotate`) | 16.8 ms | 16.9 ms | 17.3 ms | 18.3 ms | 19.0 ms | 22.8 ms |
|
||||
|
||||
All timings are related to parallelized Radix-based integer operations, where each block is encrypted using the default parameters (i.e., PARAM\_MESSAGE\_2\_CARRY\_2\_KS\_PBS, more information about parameters can be found [here](../fine_grained_api/shortint/parameters.md)).
|
||||
To ensure predictable timings, the operation flavor is the `default` one: the carry is propagated if needed. The operation costs may be reduced by using `unchecked`, `checked`, or `smart`.
|
||||
@@ -36,10 +52,10 @@ This uses the Concrete FFT + AVX-512 configuration.
|
||||
|
||||
| Parameter set | PARAM\_MESSAGE\_1\_CARRY\_1 | PARAM\_MESSAGE\_2\_CARRY\_2 | PARAM\_MESSAGE\_3\_CARRY\_3 | PARAM\_MESSAGE\_4\_CARRY\_4 |
|
||||
|------------------------------------|-----------------------------|-----------------------------|-----------------------------|-----------------------------|
|
||||
| unchecked\_add | 348 ns | 413 ns | 2.95 µs | 12.1 µs |
|
||||
| add | 7.59 ms | 17.0 ms | 121 ms | 835 ms |
|
||||
| mul\_lsb | 8.13 ms | 16.8 ms | 121 ms | 827 ms |
|
||||
| keyswitch\_programmable\_bootstrap | 7.28 ms | 16.6 ms | 121 ms | 811 ms |
|
||||
| unchecked\_add | 341 ns | 555 ns | 2.47 µs | 9.77 µs |
|
||||
| add | 5.96 ms | 12.6 ms | 102 ms | 508 ms |
|
||||
| mul\_lsb | 5.99 ms | 12.3 ms | 101 ms | 500 ms |
|
||||
| keyswitch\_programmable\_bootstrap | 6.40 ms | 12.9 ms | 104 ms | 489 ms |
|
||||
|
||||
|
||||
## Boolean
|
||||
@@ -50,20 +66,20 @@ This measures the execution time of a single binary Boolean gate.
|
||||
|
||||
| Parameter set | Concrete FFT + AVX-512 |
|
||||
|------------------------------------------------------|------------------------|
|
||||
| DEFAULT\_PARAMETERS\_KS\_PBS | 9.19 ms |
|
||||
| PARAMETERS\_ERROR\_PROB\_2\_POW\_MINUS\_165\_KS\_PBS | 14.1 ms |
|
||||
| TFHE\_LIB\_PARAMETERS | 10.0 ms |
|
||||
| DEFAULT\_PARAMETERS\_KS\_PBS | 8.49 ms |
|
||||
| PARAMETERS\_ERROR\_PROB\_2\_POW\_MINUS\_165\_KS\_PBS | 13.7 ms |
|
||||
| TFHE\_LIB\_PARAMETERS | 9.90 ms |
|
||||
|
||||
|
||||
### tfhe-lib.
|
||||
|
||||
Using the same m6i.metal machine as the one for tfhe-rs, the timings are:
|
||||
Using the same hpc7a.96xlarge machine as the one for tfhe-rs, the timings are:
|
||||
|
||||
| Parameter set | spqlios-fma |
|
||||
|--------------------------------------------------|-------------|
|
||||
| default\_128bit\_gate\_bootstrapping\_parameters | 15.4 ms |
|
||||
| default\_128bit\_gate\_bootstrapping\_parameters | 13.5 ms |
|
||||
|
||||
### OpenFHE (v1.1.1).
|
||||
### OpenFHE (v1.1.2).
|
||||
|
||||
Following the official instructions from OpenFHE, `clang14` and the following command are used to setup the project:
|
||||
`cmake -DNATIVE_SIZE=32 -DWITH_NATIVEOPT=ON -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DWITH_OPENMP=OFF ..`
|
||||
@@ -80,12 +96,12 @@ hexl -> y
|
||||
scripts/build-openfhe-development-hexl.sh
|
||||
```
|
||||
|
||||
Using the same m6i.metal machine as the one for tfhe-rs, the timings are:
|
||||
Using the same hpc7a.96xlarge machine as the one for tfhe-rs, the timings are:
|
||||
|
||||
| Parameter set | GINX | GINX w/ Intel HEXL |
|
||||
|----------------------------------|---------|--------------------|
|
||||
| FHEW\_BINGATE/STD128\_OR | 40.2 ms | 31.0 ms |
|
||||
| FHEW\_BINGATE/STD128\_LMKCDEY_OR | 38.6 ms | 28.4 ms |
|
||||
| FHEW\_BINGATE/STD128\_OR | 25.5 ms | 21,6 ms |
|
||||
| FHEW\_BINGATE/STD128\_LMKCDEY_OR | 25.4 ms | 19.9 ms |
|
||||
|
||||
|
||||
## How to reproduce TFHE-rs benchmarks
|
||||
|
||||
@@ -8,12 +8,12 @@ To use `TFHE-rs` in your project, you first need to add it as a dependency in yo
|
||||
|
||||
If you are using an `x86` machine:
|
||||
```toml
|
||||
tfhe = { version = "0.5.0", features = [ "boolean", "shortint", "integer", "x86_64-unix" ] }
|
||||
tfhe = { version = "0.5.5", features = [ "boolean", "shortint", "integer", "x86_64-unix" ] }
|
||||
```
|
||||
|
||||
If you are using an `ARM` machine:
|
||||
```toml
|
||||
tfhe = { version = "0.5.0", features = [ "boolean", "shortint", "integer", "aarch64-unix" ] }
|
||||
tfhe = { version = "0.5.5", features = [ "boolean", "shortint", "integer", "aarch64-unix" ] }
|
||||
```
|
||||
|
||||
{% hint style="info" %}
|
||||
|
||||
@@ -44,7 +44,7 @@ fn main() {
|
||||
|
||||
The default configuration for x86 Unix machines:
|
||||
```toml
|
||||
tfhe = { version = "0.5.0", features = ["integer", "x86_64-unix"]}
|
||||
tfhe = { version = "0.5.5", features = ["integer", "x86_64-unix"]}
|
||||
```
|
||||
|
||||
Configuration options for different platforms can be found [here](../getting_started/installation.md). Other rust and homomorphic types features can be found [here](../how_to/rust_configuration.md).
|
||||
|
||||
72
tfhe/docs/how_to/debug.md
Normal file
72
tfhe/docs/how_to/debug.md
Normal file
@@ -0,0 +1,72 @@
|
||||
# Debugging FHE Code
|
||||
|
||||
Since tfhe-rs 0.5, [trivial ciphertexts](./trivial_ciphertext.md) have another application.
|
||||
They can be used to allow debugging via a debugger or print statements as well as speeding-up execution time
|
||||
so that you won't have to spend minutes waiting for execution to progress.
|
||||
|
||||
This can greatly improve the pace at which one develops FHE applications.
|
||||
|
||||
{% hint style="warning" %}
|
||||
Keep in mind that trivial ciphertexts are not secure at all, thus an application released/deployed in production
|
||||
must never receive trivial ciphertext from a client.
|
||||
{% endhint %}
|
||||
|
||||
|
||||
## Example
|
||||
|
||||
To use this feature, simply call your circuits/functions with trivially encrypted values (made using `encrypt_trivial`)
|
||||
instead of real encryptions (made using `encrypt`)
|
||||
|
||||
```rust
|
||||
use tfhe::prelude::*;
|
||||
use tfhe::{set_server_key, generate_keys, ConfigBuilder, FheUint128};
|
||||
|
||||
|
||||
fn mul_all(a: &FheUint128, b: &FheUint128, c: &FheUint128) -> FheUint128 {
|
||||
// Use the debug format ('{:?}'), if you don't want to unwrap()
|
||||
// and panic if the value is not a trivial.
|
||||
println!(
|
||||
"a: {:?}, b: {:?}, c: {:?}",
|
||||
a.try_decrypt_trivial::<u128>(),
|
||||
b.try_decrypt_trivial::<u128>(),
|
||||
c.try_decrypt_trivial::<u128>(),
|
||||
);
|
||||
let tmp = a * b;
|
||||
|
||||
println!("a * b = {:?}", tmp.try_decrypt_trivial::<u128>());
|
||||
|
||||
tmp * c
|
||||
}
|
||||
|
||||
|
||||
fn main() {
|
||||
let (cks, sks) = generate_keys(ConfigBuilder::default().build());
|
||||
|
||||
set_server_key(sks);
|
||||
|
||||
let a = FheUint128::encrypt_trivial(1234u128);
|
||||
let b = FheUint128::encrypt_trivial(4567u128);
|
||||
let c = FheUint128::encrypt_trivial(89101112u128);
|
||||
|
||||
// since all inputs are trivially encrypted, this is going to be
|
||||
// much faster
|
||||
let result = mul_all(&a, &b, &c);
|
||||
}
|
||||
```
|
||||
|
||||
This example is going to print.
|
||||
```text
|
||||
a: Ok(1234), b: Ok(4567), c: Ok(89101112)
|
||||
a * b = Ok(5635678)
|
||||
```
|
||||
|
||||
If any input to `mul_all` is not a trivial ciphertexts, the computations would be done 100% in FHE, and the program
|
||||
would output:
|
||||
|
||||
```text
|
||||
a: Err(NotTrivialCiphertextError), b: Err(NotTrivialCiphertextError), c: Err(NotTrivialCiphertextError)
|
||||
a * b = Err(NotTrivialCiphertextError)
|
||||
```
|
||||
|
||||
Using trivial encryptions as input, the example runs in **980 ms** on a standard 12 cores laptop, using real encryptions
|
||||
it would run in **7.5 seconds** on a 128-core machine.
|
||||
@@ -1,3 +1,66 @@
|
||||
# Migrating Data to TFHE-rs 0.5.0 (This Release)
|
||||
# Managing Data Through Various TFHE-rs Versions
|
||||
|
||||
Forward compatibility code to migrate data from TFHE-rs 0.4 to TFHE-rs 0.5 has been added in a minor release of TFHE-rs 0.4, the documentation about the process can be found [here](https://docs.zama.ai/tfhe-rs/0.4-1/how-to/migrate_data).
|
||||
In what follows, the process to manage data when upgrading the TFHE-rs version (starting from the 0.5.5 release) is given. This page details the methods to make data, which have initially been generated with an older version of TFHE-rs, usable with a newer version.
|
||||
|
||||
## Forward Compatibility Strategy
|
||||
|
||||
The current strategy that has been adopted for TFHE-rs is the following:
|
||||
|
||||
- TFHE-rs has a global `SERIALIZATION_VERSION` constant;
|
||||
- When breaking serialization changes are introduced, this global version is bumped;
|
||||
- Safe serialization primitives check this constant upon deserialization, if the data is incompatible, these primitives return an error.
|
||||
|
||||
To be able to use older serialized data with newer versions, the following is done on new major TFHE-rs releases:
|
||||
|
||||
- A minor update is done to the previously released branch to add the new release as an optional dependency;
|
||||
- Conversion code is added to the previous branch to be able to load old data and convert it to the new data format.
|
||||
|
||||
In practice, if we take the 0.6 release as a concrete example, here is what will happen:
|
||||
|
||||
- 0.6.0 is released with breaking changes to the serialization;
|
||||
- 0.5.5 has tfhe@0.6.0 as optional dependency gated by the `forward_compatibility` feature;
|
||||
- Conversion code is added to 0.5.5, if possible without any user input, but some data migration will likely require some information to be provided by the developer writing the migration code;
|
||||
- 0.5.5 is released.
|
||||
|
||||
{% hint style="info" %}
|
||||
Note that if you do not need forward compatibility 0.5.5 will be equivalent to 0.5.3 from a usability perspective and you can safely update.
|
||||
Note also that the 0.6.0 has no knowledge of previous releases.
|
||||
{% endhint %}
|
||||
|
||||
## What it means from a developer perspective
|
||||
|
||||
A set of generic tooling is given to allow migrating data by using several workflows. The data migration is considered to be an application/protocol layer concern to avoid imposing design choices.
|
||||
|
||||
Examples to migrate data:
|
||||
|
||||
An `Application` uses TFHE-rs 0.5.3 and needs/wants to upgrade to 0.6.0 to benefit from various improvements.
|
||||
|
||||
Example timeline of the data migration or `Bulk Data Migration`:
|
||||
- A new transition version of the `Application` is compiled with the 0.5.5 release of TFHE-rs;
|
||||
- The transition version of the `Application` adds code to read previously stored data, convert it to the proper format for 0.6.0 and save it back to disk;
|
||||
- The service enters a maintenance period (if relevant);
|
||||
- Migration of data from 0.5.5 to 0.6.0 is done with the transition version of the `Application`, note that depending on the volume of data this transition can take a significant amount of time;
|
||||
- The updated version of the `Application` is compiled with the 0.6.0 release of TFHE-rs and put in production;
|
||||
- Service is resumed with the updated `Application` (if relevant).
|
||||
|
||||
The above case is describing a simple use case, where only a single version of data has to be managed. Moreover, the above strategy is not relevant in the case where the data is so large that migrating it in one go is not doable, or if the service cannot suffer any interruption.
|
||||
|
||||
In order to manage more complicated cases, another method called `Migrate On Read` can be used.
|
||||
|
||||
Here is an example timeline where data is migrated only as needed with the `Migrate On Read` approach:
|
||||
- A new version of the `Application` is compiled, it has tfhe@0.5.5 as dependency (the dependency will have to be renamed to avoid conflicts, a possible name is to use the major version like `tfhe_0_5`) and tfhe@0.6.0 which will not be renamed and can be accessed as `tfhe`
|
||||
- Code to manage reading the data is added to the `Application`:
|
||||
- The code determines whether the data was saved with the 0.5 `Application` or the 0.6 `Application`, if the data is already up to date with the 0.6 format it can be loaded right away, if it's in the 0.5 format the `Application` can check if an updated version of the data is already available in the 0.6 format and loads that if it's available, otherwise it converts the data to 0.6, saves the converted data to avoid having to convert it every time it is accessed and continue processing with the 0.6 data
|
||||
|
||||
The above is more complicated to manage as data will be present on disk with several versions, however it allows to run the service continuously or near-continuously once the new `Application` is deployed (it will require careful routing or error handling as nodes with outdated `Application` won't be able to process the 0.6 data).
|
||||
|
||||
Also, if required, several version of TFHE-rs can be "chained" to upgrade very old data to newer formats.
|
||||
The above pattern can be extended to have `tfhe_0_5` (tfhe@0.5.5 renamed), `tfhe_0_6` (tfhe@0.6.0 renamed) and `tfhe` being tfhe@0.7.0, this will require special handling from the developers so that their protocol can handle data from 0.5.5, 0.6.0 and 0.7.0 using all the conversion tooling from the relevant version.
|
||||
|
||||
E.g., if some computation requires data from version 0.5.5 a conversion function could be called `upgrade_data_from_0_5_to_0_7` and do:
|
||||
|
||||
- read data from 0.5.5
|
||||
- convert to 0.6.0 format using `tfhe_0_6`
|
||||
- convert to 0.7.0 format using `tfhe_0_7`
|
||||
- save to disk in 0.7.0 format
|
||||
- process 0.7.0 data with `tfhe` which is tfhe@0.7.0
|
||||
|
||||
195
tfhe/docs/how_to/rayon_crate.md
Normal file
195
tfhe/docs/how_to/rayon_crate.md
Normal file
@@ -0,0 +1,195 @@
|
||||
# Making Rayon And TFHE-RS Work Together
|
||||
|
||||
[rayon](https://crates.io/crates/rayon) is a popular crate to easily write multi-threaded code in Rust.
|
||||
|
||||
It is possible to use rayon to write multi-threaded TFHE-rs code. However due to internal details of `rayon` and
|
||||
`TFHE-rs`, there is some special setup that needs to be done.
|
||||
|
||||
## Single Client Application
|
||||
|
||||
### The Problem
|
||||
|
||||
The high level api requires to call `set_server_key` on each thread where computations needs to be done.
|
||||
So a first attempt at using rayon with `TFHE-rs` might look like this:
|
||||
|
||||
```rust
|
||||
use rayon::prelude::*;
|
||||
use tfhe::prelude::*;
|
||||
use tfhe::{ConfigBuilder, set_server_key, FheUint8, generate_keys};
|
||||
|
||||
fn main() {
|
||||
let (cks, sks) = generate_keys(ConfigBuilder::default());
|
||||
|
||||
let xs = [
|
||||
FheUint8::encrypt(1u8, &cks),
|
||||
FheUint8::encrypt(2u8, &cks),
|
||||
];
|
||||
|
||||
let ys = [
|
||||
FheUint8::encrypt(3u8, &cks),
|
||||
FheUint8::encrypt(4u8, &cks),
|
||||
];
|
||||
|
||||
|
||||
// set_server_key in each closure as they might be
|
||||
// running in different threads
|
||||
let (a, b) = rayon::join(
|
||||
|| {
|
||||
set_server_key(sks.clone());
|
||||
&xs[0] + &ys[0]
|
||||
},
|
||||
|| {
|
||||
set_server_key(sks.clone());
|
||||
&xs[1] + &ys[1]
|
||||
}
|
||||
);
|
||||
}
|
||||
```
|
||||
|
||||
However, due to rayon's work stealing mechanism and TFHE-rs's internals, this may create `BorrowMutError'.
|
||||
|
||||
|
||||
### Working Example
|
||||
|
||||
The correct way is to call `rayon::broadcast`
|
||||
|
||||
```rust
|
||||
use rayon::prelude::*;
|
||||
use tfhe::prelude::*;
|
||||
use tfhe::{ConfigBuilder, set_server_key, FheUint8, generate_keys};
|
||||
|
||||
fn main() {
|
||||
let (cks, sks) = generate_keys(ConfigBuilder::default());
|
||||
|
||||
// set the server key in all of the rayon's threads so that
|
||||
// we won't need to do it later
|
||||
rayon::broadcast(|_| set_server_key(sks.clone()));
|
||||
// Set the server key in the main thread
|
||||
set_server_key(sks);
|
||||
|
||||
let xs = [
|
||||
FheUint8::encrypt(1u8, &cks),
|
||||
FheUint8::encrypt(2u8, &cks),
|
||||
];
|
||||
|
||||
let ys = [
|
||||
FheUint8::encrypt(3u8, &cks),
|
||||
FheUint8::encrypt(4u8, &cks),
|
||||
];
|
||||
|
||||
let (a, b) = rayon::join(
|
||||
|| {
|
||||
&xs[0] + &ys[0]
|
||||
},
|
||||
|| {
|
||||
&xs[1] + &ys[1]
|
||||
}
|
||||
);
|
||||
|
||||
let a: u8 = a.decrypt(&cks);
|
||||
let b: u8 = b.decrypt(&cks);
|
||||
assert_eq!(a, 4u8);
|
||||
assert_eq!(b, 6u8);
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
## Multi-Client Applications
|
||||
|
||||
If your application needs to operate on data from different clients concurrently, and that you want each client to use
|
||||
multiple threads, you will need to create different rayon thread pools
|
||||
|
||||
```rust
|
||||
use rayon::prelude::*;
|
||||
use tfhe::prelude::*;
|
||||
use tfhe::{ConfigBuilder, set_server_key, FheUint8, generate_keys};
|
||||
|
||||
fn main() {
|
||||
let (cks1, sks1) = generate_keys(ConfigBuilder::default());
|
||||
let xs1 = [
|
||||
FheUint8::encrypt(1u8, &cks1),
|
||||
FheUint8::encrypt(2u8, &cks1),
|
||||
];
|
||||
|
||||
let ys1 = [
|
||||
FheUint8::encrypt(3u8, &cks1),
|
||||
FheUint8::encrypt(4u8, &cks1),
|
||||
];
|
||||
|
||||
let (cks2, sks2) = generate_keys(ConfigBuilder::default());
|
||||
let xs2 = [
|
||||
FheUint8::encrypt(100u8, &cks2),
|
||||
FheUint8::encrypt(200u8, &cks2),
|
||||
];
|
||||
|
||||
let ys2 = [
|
||||
FheUint8::encrypt(103u8, &cks2),
|
||||
FheUint8::encrypt(204u8, &cks2),
|
||||
];
|
||||
|
||||
let client_1_pool = rayon::ThreadPoolBuilder::new().num_threads(4).build().unwrap();
|
||||
let client_2_pool = rayon::ThreadPoolBuilder::new().num_threads(2).build().unwrap();
|
||||
|
||||
client_1_pool.broadcast(|_| set_server_key(sks1.clone()));
|
||||
client_2_pool.broadcast(|_| set_server_key(sks2.clone()));
|
||||
|
||||
let ((a1, b1), (a2, b2)) = rayon::join(|| {
|
||||
client_1_pool.install(|| {
|
||||
rayon::join(
|
||||
|| {
|
||||
&xs1[0] + &ys1[0]
|
||||
},
|
||||
|| {
|
||||
&xs1[1] + &ys1[1]
|
||||
}
|
||||
)
|
||||
})
|
||||
}, || {
|
||||
client_2_pool.install(|| {
|
||||
rayon::join(
|
||||
|| {
|
||||
&xs2[0] + &ys2[0]
|
||||
},
|
||||
|| {
|
||||
&xs2[1] + &ys2[1]
|
||||
}
|
||||
)
|
||||
})
|
||||
});
|
||||
|
||||
let a1: u8 = a1.decrypt(&cks1);
|
||||
let b1: u8 = b1.decrypt(&cks1);
|
||||
assert_eq!(a1, 4u8);
|
||||
assert_eq!(b1, 6u8);
|
||||
|
||||
let a2: u8 = a2.decrypt(&cks2);
|
||||
let b2: u8 = b2.decrypt(&cks2);
|
||||
assert_eq!(a2, 203u8);
|
||||
assert_eq!(b2, 148u8);
|
||||
}
|
||||
```
|
||||
|
||||
This can be useful if you have some rust `#[test]`
|
||||
|
||||
```Rust
|
||||
// Pseudo code
|
||||
#[test]
|
||||
fn test_1() {
|
||||
let pool = rayon::ThreadPoolBuilder::new().num_threads(4).build().unwrap();
|
||||
pool.broadcast(|_| set_server_key(sks1.clone()));
|
||||
pool.install(|| {
|
||||
let result = call_to_a_multithreaded_function(...);
|
||||
assert_eq!(result, expected_value);
|
||||
})
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_2() {
|
||||
let pool = rayon::ThreadPoolBuilder::new().num_threads(4).build().unwrap();
|
||||
pool.broadcast(|_| set_server_key(sks1.clone()));
|
||||
pool.install(|| {
|
||||
let result = call_to_another_multithreaded_function(...);
|
||||
assert_eq!(result, expected_value);
|
||||
})
|
||||
}
|
||||
```
|
||||
@@ -13,12 +13,12 @@ To use the `TFHE-rs GPU backend` in your project, you first need to add it as a
|
||||
|
||||
If you are using an `x86` machine:
|
||||
```toml
|
||||
tfhe = { version = "0.5.0", features = [ "boolean", "shortint", "integer", "x86_64-unix", "gpu" ] }
|
||||
tfhe = { version = "0.5.5", features = [ "boolean", "shortint", "integer", "x86_64-unix", "gpu" ] }
|
||||
```
|
||||
|
||||
If you are using an `ARM` machine:
|
||||
```toml
|
||||
tfhe = { version = "0.5.0", features = [ "boolean", "shortint", "integer", "aarch64-unix", "gpu" ] }
|
||||
tfhe = { version = "0.5.5", features = [ "boolean", "shortint", "integer", "aarch64-unix", "gpu" ] }
|
||||
```
|
||||
|
||||
|
||||
@@ -39,7 +39,7 @@ TFHE-rs GPU backend is supported on Linux (x86, aarch64).
|
||||
|
||||
# A first example
|
||||
## Configuring and creating keys.
|
||||
In comparison with the [CPU example](../getting_started/quick_start), the only difference lies into the key creation, which is detailed [here](#Setting-the-keys)
|
||||
In comparison with the [CPU example](../getting_started/quick_start.md), the only difference lies into the key creation, which is detailed [here](#Setting-the-keys)
|
||||
|
||||
Here is a full example (combining the client and server parts):
|
||||
|
||||
@@ -80,8 +80,8 @@ fn main() {
|
||||
|
||||
|
||||
## Setting the keys
|
||||
The configuration of the key is different from the CPU. More precisely, if both client and server keys are still generated by the Client (which is assumed to run on a CPU), the server key has then to be decompressed by the Server to be converted into the right format.
|
||||
To do so, the server should run this function: ```decompressed_to_gpu()```.
|
||||
The configuration of the key is different from the CPU. More precisely, if both client and server keys are still generated by the Client (which is assumed to run on a CPU), the server key has then to be decompressed by the Server to be converted into the right format.
|
||||
To do so, the server should run this function: ```decompressed_to_gpu()```.
|
||||
From then on, there is no difference between the CPU and the GPU.
|
||||
|
||||
|
||||
@@ -90,14 +90,14 @@ On the client-side, the method to encrypt the data is exactly the same than the
|
||||
```Rust
|
||||
let clear_a = 27u8;
|
||||
let clear_b = 128u8;
|
||||
|
||||
|
||||
let a = FheUint8::encrypt(clear_a, &client_key);
|
||||
let b = FheUint8::encrypt(clear_b, &client_key);
|
||||
```
|
||||
|
||||
## Computation.
|
||||
The server must first set its keys up, like in the CPU, with: ``` set_server_key(gpu_key);``` .
|
||||
Then, homomorphic computations are done with the same code than the one described [here](../getting_started/operations).
|
||||
Then, homomorphic computations are done with the same code than the one described [here](../getting_started/operations.md).
|
||||
|
||||
```Rust
|
||||
//Server-side
|
||||
@@ -117,13 +117,13 @@ Finally, the client gets the decrypted results by computing:
|
||||
|
||||
```Rust
|
||||
let decrypted_result: u8 = result.decrypt(&client_key);
|
||||
```
|
||||
```
|
||||
## Improving performance.
|
||||
TFHE-rs includes the possibility to leverage the high number of threads given by a GPU.
|
||||
To do so, the configuration should be updated with ```Rust let config = ConfigBuilder::with_custom_parameters(PARAM_MULTI_BIT_MESSAGE_2_CARRY_2_GROUP_3_KS_PBS, None).build();```
|
||||
The complete example becomes:
|
||||
|
||||
```rust
|
||||
```rust
|
||||
use tfhe::{ConfigBuilder, set_server_key, FheUint8, ClientKey, CompressedServerKey};
|
||||
use tfhe::prelude::*;
|
||||
use tfhe::shortint::parameters::PARAM_MULTI_BIT_MESSAGE_2_CARRY_2_GROUP_3_KS_PBS;
|
||||
@@ -157,10 +157,10 @@ fn main() {
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
|
||||
# List of available operations
|
||||
|
||||
The GPU backend includes the following operations:
|
||||
The GPU backend includes the following operations:
|
||||
| name | symbol | `Enc`/`Enc` | `Enc`/ `Int` |
|
||||
|-----------------------|----------------|--------------------------|--------------------------|
|
||||
| Neg | `-` | :heavy_check_mark: | N/A |
|
||||
|
||||
@@ -11,7 +11,7 @@ To serialize our data, a [data format](https://serde.rs/#data-formats) should be
|
||||
|
||||
[dependencies]
|
||||
# ...
|
||||
tfhe = { version = "0.5.0", features = ["integer","x86_64-unix"]}
|
||||
tfhe = { version = "0.5.5", features = ["integer","x86_64-unix"]}
|
||||
bincode = "1.3.3"
|
||||
```
|
||||
|
||||
|
||||
@@ -24,7 +24,7 @@ To use the `FheUint8` type, the `integer` feature must be activated:
|
||||
|
||||
[dependencies]
|
||||
# Default configuration for x86 Unix machines:
|
||||
tfhe = { version = "0.5.0", features = ["integer", "x86_64-unix"]}
|
||||
tfhe = { version = "0.5.5", features = ["integer", "x86_64-unix"]}
|
||||
```
|
||||
|
||||
Other configurations can be found [here](../getting_started/installation.md).
|
||||
|
||||
@@ -19,7 +19,7 @@ This function returns a Boolean that will be either `true` or `false` so that th
|
||||
# Cargo.toml
|
||||
|
||||
# Default configuration for x86 Unix machines:
|
||||
tfhe = { version = "0.5.0", features = ["integer", "x86_64-unix"]}
|
||||
tfhe = { version = "0.5.5", features = ["integer", "x86_64-unix"]}
|
||||
```
|
||||
|
||||
Other configurations can be found [here](../getting_started/installation.md).
|
||||
|
||||
@@ -442,16 +442,19 @@ fn sha256_fhe_parallel(input: Vec<FheUint32>) -> [FheUint32; 8] {
|
||||
let mut h = hash[7].clone();
|
||||
|
||||
for i in 0..64 {
|
||||
// Please clippy
|
||||
let e_rotations = || {
|
||||
let rotations = par_rotr(&e, [6u32, 11, 25]);
|
||||
&rotations[0] ^ &rotations[1] ^ &rotations[2]
|
||||
};
|
||||
let a_rotations = || {
|
||||
let rotations = par_rotr(&a, [2u32, 13, 22]);
|
||||
&rotations[0] ^ &rotations[1] ^ &rotations[2]
|
||||
};
|
||||
let (s1, ch, s0, maj) = join!(
|
||||
|| {
|
||||
let rotations = par_rotr(&e, [6u32, 11, 25]);
|
||||
&rotations[0] ^ &rotations[1] ^ &rotations[2]
|
||||
},
|
||||
e_rotations,
|
||||
|| (&e & &f) ^ ((&e ^ &all_ones) & &g),
|
||||
|| {
|
||||
let rotations = par_rotr(&a, [2u32, 13, 22]);
|
||||
&rotations[0] ^ &rotations[1] ^ &rotations[2]
|
||||
},
|
||||
a_rotations,
|
||||
|| (&a & &b) ^ (&a & &c) ^ (&b & &c)
|
||||
);
|
||||
|
||||
|
||||
@@ -4,20 +4,18 @@
|
||||
//! underlying `core_crypto` module.
|
||||
|
||||
use crate::boolean::ciphertext::{Ciphertext, CompressedCiphertext};
|
||||
use crate::boolean::parameters::{
|
||||
BooleanKeySwitchingParameters, BooleanParameters, EncryptionKeyChoice,
|
||||
};
|
||||
use crate::boolean::{ClientKey, CompressedPublicKey, PublicKey, PLAINTEXT_FALSE, PLAINTEXT_TRUE};
|
||||
use crate::core_crypto::algorithms::*;
|
||||
use crate::core_crypto::entities::*;
|
||||
use std::cell::RefCell;
|
||||
pub mod bootstrapping;
|
||||
use crate::boolean::engine::bootstrapping::{Bootstrapper, CompressedServerKey, ServerKey};
|
||||
use crate::boolean::parameters::{BooleanKeySwitchingParameters, BooleanParameters};
|
||||
use crate::core_crypto::commons::generators::{
|
||||
DeterministicSeeder, EncryptionRandomGenerator, SecretRandomGenerator,
|
||||
};
|
||||
use crate::core_crypto::commons::math::random::{ActivatedRandomGenerator, Seeder};
|
||||
use crate::core_crypto::commons::parameters::{PBSOrder, *};
|
||||
use crate::core_crypto::commons::parameters::*;
|
||||
use crate::core_crypto::seeders::new_seeder;
|
||||
|
||||
#[cfg(test)]
|
||||
|
||||
54
tfhe/src/c_api/high_level_api/array.rs
Normal file
54
tfhe/src/c_api/high_level_api/array.rs
Normal file
@@ -0,0 +1,54 @@
|
||||
use crate::c_api::high_level_api::booleans::FheBool;
|
||||
use crate::c_api::high_level_api::integers::{
|
||||
FheUint10, FheUint12, FheUint128, FheUint14, FheUint16, FheUint2, FheUint256, FheUint32,
|
||||
FheUint4, FheUint6, FheUint64, FheUint8,
|
||||
};
|
||||
|
||||
macro_rules! impl_array_fn {
|
||||
(
|
||||
name: $name:ident,
|
||||
inner_func: $inner_func:path,
|
||||
output_type_name: $output_type_name:ty,
|
||||
type_name: $($type_name:ty),*
|
||||
$(,)?
|
||||
) => {
|
||||
$( // type_name
|
||||
::paste::paste! {
|
||||
#[no_mangle]
|
||||
pub unsafe extern "C" fn [<$type_name:snake _ $name>](
|
||||
lhs: *const *mut $type_name,
|
||||
lhs_len: usize,
|
||||
rhs: *const *mut $type_name,
|
||||
rhs_len: usize,
|
||||
result: *mut *mut $output_type_name,
|
||||
) -> ::std::os::raw::c_int {
|
||||
crate::c_api::utils::catch_panic(|| {
|
||||
let lhs: &[*mut $type_name] = std::slice::from_raw_parts(lhs, lhs_len);
|
||||
let rhs: &[*mut $type_name] = std::slice::from_raw_parts(rhs, rhs_len);
|
||||
|
||||
let cloned_lhs = lhs.iter().map(|e: &*mut $type_name| e.as_ref().unwrap().0.clone()).collect::<Vec<_>>();
|
||||
let cloned_rhs = rhs.iter().map(|e: &*mut $type_name| e.as_ref().unwrap().0.clone()).collect::<Vec<_>>();
|
||||
|
||||
let inner = $inner_func(&cloned_lhs, &cloned_rhs);
|
||||
|
||||
*result = Box::into_raw(Box::new($output_type_name(inner)));
|
||||
})
|
||||
}
|
||||
}
|
||||
)*
|
||||
};
|
||||
}
|
||||
|
||||
impl_array_fn!(
|
||||
name: array_eq,
|
||||
inner_func: crate::high_level_api::array::fhe_uint_array_eq,
|
||||
output_type_name: FheBool,
|
||||
type_name: FheUint2, FheUint4, FheUint6, FheUint8, FheUint10, FheUint12, FheUint14, FheUint16, FheUint32, FheUint64, FheUint128, FheUint256,
|
||||
);
|
||||
|
||||
impl_array_fn!(
|
||||
name: array_contains_sub_slice,
|
||||
inner_func: crate::high_level_api::array::fhe_uint_array_contains_sub_slice,
|
||||
output_type_name: FheBool,
|
||||
type_name: FheUint2, FheUint4, FheUint6, FheUint8, FheUint10, FheUint12, FheUint14, FheUint16, FheUint32, FheUint64, FheUint128, FheUint256,
|
||||
);
|
||||
@@ -1,15 +1,42 @@
|
||||
use crate::high_level_api::prelude::*;
|
||||
|
||||
use crate::c_api::utils::check_ptr_is_non_null_and_aligned;
|
||||
use std::ops::{BitAnd, BitOr, BitXor, Not};
|
||||
use std::ops::{BitAnd, BitAndAssign, BitOr, BitOrAssign, BitXor, BitXorAssign, Not};
|
||||
|
||||
pub struct FheBool(pub(in crate::c_api) crate::high_level_api::FheBool);
|
||||
|
||||
impl_destroy_on_type!(FheBool);
|
||||
impl_clone_on_type!(FheBool);
|
||||
impl_serialize_deserialize_on_type!(FheBool);
|
||||
impl_safe_serialize_on_type!(FheBool);
|
||||
impl_safe_deserialize_conformant_integer!(
|
||||
FheBool,
|
||||
crate::high_level_api::safe_deserialize_conformant
|
||||
);
|
||||
|
||||
impl_binary_fn_on_type!(FheBool => bitand, bitor, bitxor);
|
||||
impl_binary_assign_fn_on_type!(FheBool => bitand_assign, bitor_assign, bitxor_assign);
|
||||
impl_unary_fn_on_type!(FheBool => not);
|
||||
impl_comparison_fn_on_type!(
|
||||
lhs_type: FheBool,
|
||||
rhs_type: FheBool,
|
||||
comparison_fn_names: eq, ne,
|
||||
);
|
||||
impl_scalar_binary_fn_on_type!(FheBool, bool =>
|
||||
bitand,
|
||||
bitor,
|
||||
bitxor,
|
||||
);
|
||||
impl_scalar_binary_assign_fn_on_type!(FheBool, bool =>
|
||||
bitand_assign,
|
||||
bitor_assign,
|
||||
bitxor_assign,
|
||||
);
|
||||
impl_scalar_comparison_fn_on_type!(
|
||||
lhs_type: FheBool,
|
||||
clear_type: bool,
|
||||
comparison_fn_names: eq, ne
|
||||
);
|
||||
|
||||
impl_decrypt_on_type!(FheBool, bool);
|
||||
impl_try_decrypt_trivial_on_type!(FheBool, bool);
|
||||
@@ -23,6 +50,12 @@ pub struct CompressedFheBool(crate::high_level_api::CompressedFheBool);
|
||||
impl_destroy_on_type!(CompressedFheBool);
|
||||
impl_clone_on_type!(CompressedFheBool);
|
||||
impl_serialize_deserialize_on_type!(CompressedFheBool);
|
||||
impl_safe_serialize_on_type!(CompressedFheBool);
|
||||
impl_safe_deserialize_conformant_integer!(
|
||||
CompressedFheBool,
|
||||
crate::high_level_api::safe_deserialize_conformant
|
||||
);
|
||||
impl_try_encrypt_with_client_key_on_type!(CompressedFheBool{crate::high_level_api::CompressedFheBool}, bool);
|
||||
|
||||
#[no_mangle]
|
||||
pub unsafe extern "C" fn compressed_fhe_bool_decompress(
|
||||
@@ -42,6 +75,11 @@ pub struct CompactFheBool(crate::high_level_api::CompactFheBool);
|
||||
impl_destroy_on_type!(CompactFheBool);
|
||||
impl_clone_on_type!(CompactFheBool);
|
||||
impl_serialize_deserialize_on_type!(CompactFheBool);
|
||||
impl_safe_serialize_on_type!(CompactFheBool);
|
||||
impl_safe_deserialize_conformant_integer!(
|
||||
CompactFheBool,
|
||||
crate::high_level_api::safe_deserialize_conformant
|
||||
);
|
||||
impl_try_encrypt_with_compact_public_key_on_type!(CompactFheBool{crate::high_level_api::CompactFheBool}, bool);
|
||||
|
||||
#[no_mangle]
|
||||
@@ -62,6 +100,8 @@ pub struct CompactFheBoolList(crate::high_level_api::CompactFheBoolList);
|
||||
impl_destroy_on_type!(CompactFheBoolList);
|
||||
impl_clone_on_type!(CompactFheBoolList);
|
||||
impl_serialize_deserialize_on_type!(CompactFheBoolList);
|
||||
impl_safe_serialize_on_type!(CompactFheBoolList);
|
||||
impl_try_encrypt_list_with_compact_public_key_on_type!(CompactFheBoolList{crate::high_level_api::CompactFheBoolList}, bool);
|
||||
|
||||
#[no_mangle]
|
||||
pub unsafe extern "C" fn compact_fhe_bool_list_len(
|
||||
|
||||
@@ -250,7 +250,7 @@ macro_rules! create_integer_wrapper_type {
|
||||
$(,)?
|
||||
) => {
|
||||
|
||||
pub struct $name($crate::high_level_api::$name);
|
||||
pub struct $name(pub(in $crate::c_api) $crate::high_level_api::$name);
|
||||
|
||||
impl_destroy_on_type!($name);
|
||||
|
||||
@@ -522,6 +522,167 @@ impl_try_encrypt_list_with_compact_public_key_on_type!(CompactFheInt16List{crate
|
||||
impl_try_encrypt_list_with_compact_public_key_on_type!(CompactFheInt32List{crate::high_level_api::CompactFheInt32List}, i32);
|
||||
impl_try_encrypt_list_with_compact_public_key_on_type!(CompactFheInt64List{crate::high_level_api::CompactFheInt64List}, i64);
|
||||
|
||||
#[cfg(feature = "forward_compatibility")]
|
||||
pub mod forward_compatibility {
|
||||
use super::*;
|
||||
|
||||
// FheInt don't have the 10, 12, 14 variants so we define the impl here
|
||||
impl_update_serialization_format_on_type!(FheUint8);
|
||||
impl_update_serialization_format_on_type!(FheUint10);
|
||||
impl_update_serialization_format_on_type!(FheUint12);
|
||||
impl_update_serialization_format_on_type!(FheUint14);
|
||||
impl_update_serialization_format_on_type!(FheUint16);
|
||||
impl_update_serialization_format_on_type!(FheUint32);
|
||||
impl_update_serialization_format_on_type!(FheUint64);
|
||||
impl_update_serialization_format_on_type!(FheUint128);
|
||||
impl_update_serialization_format_on_type!(FheUint256);
|
||||
impl_update_serialization_format_on_type!(CompressedFheUint8);
|
||||
impl_update_serialization_format_on_type!(CompressedFheUint10);
|
||||
impl_update_serialization_format_on_type!(CompressedFheUint12);
|
||||
impl_update_serialization_format_on_type!(CompressedFheUint14);
|
||||
impl_update_serialization_format_on_type!(CompressedFheUint16);
|
||||
impl_update_serialization_format_on_type!(CompressedFheUint32);
|
||||
impl_update_serialization_format_on_type!(CompressedFheUint64);
|
||||
impl_update_serialization_format_on_type!(CompressedFheUint128);
|
||||
impl_update_serialization_format_on_type!(CompressedFheUint256);
|
||||
|
||||
impl_safe_update_serialization_format_conformant_on_type!(
|
||||
FheUint8,
|
||||
crate::high_level_api::safe_deserialize_conformant
|
||||
);
|
||||
impl_safe_update_serialization_format_conformant_on_type!(
|
||||
FheUint10,
|
||||
crate::high_level_api::safe_deserialize_conformant
|
||||
);
|
||||
impl_safe_update_serialization_format_conformant_on_type!(
|
||||
FheUint12,
|
||||
crate::high_level_api::safe_deserialize_conformant
|
||||
);
|
||||
impl_safe_update_serialization_format_conformant_on_type!(
|
||||
FheUint14,
|
||||
crate::high_level_api::safe_deserialize_conformant
|
||||
);
|
||||
impl_safe_update_serialization_format_conformant_on_type!(
|
||||
FheUint16,
|
||||
crate::high_level_api::safe_deserialize_conformant
|
||||
);
|
||||
impl_safe_update_serialization_format_conformant_on_type!(
|
||||
FheUint32,
|
||||
crate::high_level_api::safe_deserialize_conformant
|
||||
);
|
||||
impl_safe_update_serialization_format_conformant_on_type!(
|
||||
FheUint64,
|
||||
crate::high_level_api::safe_deserialize_conformant
|
||||
);
|
||||
impl_safe_update_serialization_format_conformant_on_type!(
|
||||
FheUint128,
|
||||
crate::high_level_api::safe_deserialize_conformant
|
||||
);
|
||||
impl_safe_update_serialization_format_conformant_on_type!(
|
||||
FheUint256,
|
||||
crate::high_level_api::safe_deserialize_conformant
|
||||
);
|
||||
impl_safe_update_serialization_format_conformant_on_type!(
|
||||
CompressedFheUint8,
|
||||
crate::high_level_api::safe_deserialize_conformant
|
||||
);
|
||||
impl_safe_update_serialization_format_conformant_on_type!(
|
||||
CompressedFheUint10,
|
||||
crate::high_level_api::safe_deserialize_conformant
|
||||
);
|
||||
impl_safe_update_serialization_format_conformant_on_type!(
|
||||
CompressedFheUint12,
|
||||
crate::high_level_api::safe_deserialize_conformant
|
||||
);
|
||||
impl_safe_update_serialization_format_conformant_on_type!(
|
||||
CompressedFheUint14,
|
||||
crate::high_level_api::safe_deserialize_conformant
|
||||
);
|
||||
impl_safe_update_serialization_format_conformant_on_type!(
|
||||
CompressedFheUint16,
|
||||
crate::high_level_api::safe_deserialize_conformant
|
||||
);
|
||||
impl_safe_update_serialization_format_conformant_on_type!(
|
||||
CompressedFheUint32,
|
||||
crate::high_level_api::safe_deserialize_conformant
|
||||
);
|
||||
impl_safe_update_serialization_format_conformant_on_type!(
|
||||
CompressedFheUint64,
|
||||
crate::high_level_api::safe_deserialize_conformant
|
||||
);
|
||||
impl_safe_update_serialization_format_conformant_on_type!(
|
||||
CompressedFheUint128,
|
||||
crate::high_level_api::safe_deserialize_conformant
|
||||
);
|
||||
impl_safe_update_serialization_format_conformant_on_type!(
|
||||
CompressedFheUint256,
|
||||
crate::high_level_api::safe_deserialize_conformant
|
||||
);
|
||||
|
||||
// FheInt don't have the 10, 12, 14 variants so we define the impl here
|
||||
impl_update_serialization_format_on_type!(FheInt8);
|
||||
impl_update_serialization_format_on_type!(FheInt16);
|
||||
impl_update_serialization_format_on_type!(FheInt32);
|
||||
impl_update_serialization_format_on_type!(FheInt64);
|
||||
impl_update_serialization_format_on_type!(FheInt128);
|
||||
impl_update_serialization_format_on_type!(FheInt256);
|
||||
impl_update_serialization_format_on_type!(CompressedFheInt8);
|
||||
impl_update_serialization_format_on_type!(CompressedFheInt16);
|
||||
impl_update_serialization_format_on_type!(CompressedFheInt32);
|
||||
impl_update_serialization_format_on_type!(CompressedFheInt64);
|
||||
impl_update_serialization_format_on_type!(CompressedFheInt128);
|
||||
impl_update_serialization_format_on_type!(CompressedFheInt256);
|
||||
|
||||
impl_safe_update_serialization_format_conformant_on_type!(
|
||||
FheInt8,
|
||||
crate::high_level_api::safe_deserialize_conformant
|
||||
);
|
||||
impl_safe_update_serialization_format_conformant_on_type!(
|
||||
FheInt16,
|
||||
crate::high_level_api::safe_deserialize_conformant
|
||||
);
|
||||
impl_safe_update_serialization_format_conformant_on_type!(
|
||||
FheInt32,
|
||||
crate::high_level_api::safe_deserialize_conformant
|
||||
);
|
||||
impl_safe_update_serialization_format_conformant_on_type!(
|
||||
FheInt64,
|
||||
crate::high_level_api::safe_deserialize_conformant
|
||||
);
|
||||
impl_safe_update_serialization_format_conformant_on_type!(
|
||||
FheInt128,
|
||||
crate::high_level_api::safe_deserialize_conformant
|
||||
);
|
||||
impl_safe_update_serialization_format_conformant_on_type!(
|
||||
FheInt256,
|
||||
crate::high_level_api::safe_deserialize_conformant
|
||||
);
|
||||
impl_safe_update_serialization_format_conformant_on_type!(
|
||||
CompressedFheInt8,
|
||||
crate::high_level_api::safe_deserialize_conformant
|
||||
);
|
||||
impl_safe_update_serialization_format_conformant_on_type!(
|
||||
CompressedFheInt16,
|
||||
crate::high_level_api::safe_deserialize_conformant
|
||||
);
|
||||
impl_safe_update_serialization_format_conformant_on_type!(
|
||||
CompressedFheInt32,
|
||||
crate::high_level_api::safe_deserialize_conformant
|
||||
);
|
||||
impl_safe_update_serialization_format_conformant_on_type!(
|
||||
CompressedFheInt64,
|
||||
crate::high_level_api::safe_deserialize_conformant
|
||||
);
|
||||
impl_safe_update_serialization_format_conformant_on_type!(
|
||||
CompressedFheInt128,
|
||||
crate::high_level_api::safe_deserialize_conformant
|
||||
);
|
||||
impl_safe_update_serialization_format_conformant_on_type!(
|
||||
CompressedFheInt256,
|
||||
crate::high_level_api::safe_deserialize_conformant
|
||||
);
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub unsafe extern "C" fn compact_fhe_uint128_list_try_encrypt_with_compact_public_key_u128(
|
||||
input: *const U128,
|
||||
|
||||
@@ -6,18 +6,44 @@ pub struct PublicKey(pub(crate) crate::high_level_api::PublicKey);
|
||||
pub struct CompactPublicKey(pub(crate) crate::high_level_api::CompactPublicKey);
|
||||
pub struct CompressedCompactPublicKey(pub(crate) crate::high_level_api::CompressedCompactPublicKey);
|
||||
pub struct ServerKey(pub(crate) crate::high_level_api::ServerKey);
|
||||
/// Compressed version of the ServerKey
|
||||
///
|
||||
/// Allows to save storage space and transfer time.
|
||||
/// Also, the CompressedServerKey is the key format that allows to select
|
||||
/// the target hardware of the actual ServerKey when decompressing it.
|
||||
pub struct CompressedServerKey(pub(crate) crate::high_level_api::CompressedServerKey);
|
||||
|
||||
/// ServerKey that lives on a Cuda GPU
|
||||
#[cfg(feature = "gpu")]
|
||||
pub struct CudaServerKey(pub(crate) crate::high_level_api::CudaServerKey);
|
||||
|
||||
impl_destroy_on_type!(ClientKey);
|
||||
impl_destroy_on_type!(PublicKey);
|
||||
impl_destroy_on_type!(CompactPublicKey);
|
||||
impl_destroy_on_type!(CompressedCompactPublicKey);
|
||||
impl_destroy_on_type!(ServerKey);
|
||||
impl_destroy_on_type!(CompressedServerKey);
|
||||
#[cfg(feature = "gpu")]
|
||||
impl_destroy_on_type!(CudaServerKey);
|
||||
|
||||
impl_serialize_deserialize_on_type!(ClientKey);
|
||||
impl_serialize_deserialize_on_type!(PublicKey);
|
||||
impl_serialize_deserialize_on_type!(CompactPublicKey);
|
||||
impl_serialize_deserialize_on_type!(CompressedCompactPublicKey);
|
||||
impl_serialize_deserialize_on_type!(ServerKey);
|
||||
impl_serialize_deserialize_on_type!(CompressedServerKey);
|
||||
|
||||
#[cfg(feature = "forward_compatibility")]
|
||||
mod forward_compatibility {
|
||||
use super::*;
|
||||
|
||||
impl_update_serialization_format_on_type!(ClientKey);
|
||||
impl_update_serialization_format_on_type!(PublicKey);
|
||||
impl_update_serialization_format_on_type!(CompactPublicKey);
|
||||
impl_update_serialization_format_on_type!(CompressedCompactPublicKey);
|
||||
impl_update_serialization_format_on_type!(ServerKey);
|
||||
impl_update_serialization_format_on_type!(CompressedServerKey);
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub unsafe extern "C" fn generate_keys(
|
||||
@@ -51,6 +77,23 @@ pub unsafe extern "C" fn set_server_key(server_key: *const ServerKey) -> c_int {
|
||||
})
|
||||
}
|
||||
|
||||
/// Sets the cuda server key.
|
||||
///
|
||||
/// Once a cuda server key is set in a thread, all computations done in
|
||||
/// that thread will actually happend on the Cuda GPU.
|
||||
///
|
||||
/// Does not take ownership of the key
|
||||
#[cfg(feature = "gpu")]
|
||||
#[no_mangle]
|
||||
pub unsafe extern "C" fn set_cuda_server_key(server_key: *const CudaServerKey) -> c_int {
|
||||
catch_panic(|| {
|
||||
let server_key = get_ref_checked(server_key).unwrap();
|
||||
|
||||
let cloned = server_key.0.clone();
|
||||
crate::high_level_api::set_server_key(cloned);
|
||||
})
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub unsafe extern "C" fn unset_server_key() -> c_int {
|
||||
catch_panic(|| {
|
||||
@@ -58,6 +101,83 @@ pub unsafe extern "C" fn unset_server_key() -> c_int {
|
||||
})
|
||||
}
|
||||
|
||||
/// Creates a new compressed server key
|
||||
#[no_mangle]
|
||||
pub unsafe extern "C" fn compressed_server_key_new(
|
||||
client_key: *const ClientKey,
|
||||
result_server_key: *mut *mut CompressedServerKey,
|
||||
) -> c_int {
|
||||
catch_panic(|| {
|
||||
check_ptr_is_non_null_and_aligned(result_server_key).unwrap();
|
||||
*result_server_key = std::ptr::null_mut();
|
||||
|
||||
let cks = get_ref_checked(client_key).unwrap();
|
||||
|
||||
let sks = crate::high_level_api::CompressedServerKey::new(&cks.0);
|
||||
|
||||
*result_server_key = Box::into_raw(Box::new(CompressedServerKey(sks)));
|
||||
})
|
||||
}
|
||||
|
||||
/// Decompresses the CompressedServerKey to a ServerKey that lives on CPU
|
||||
#[no_mangle]
|
||||
pub unsafe extern "C" fn compressed_server_key_decompress(
|
||||
compressed_server_key: *const CompressedServerKey,
|
||||
result_server_key: *mut *mut ServerKey,
|
||||
) -> c_int {
|
||||
catch_panic(|| {
|
||||
check_ptr_is_non_null_and_aligned(result_server_key).unwrap();
|
||||
*result_server_key = std::ptr::null_mut();
|
||||
|
||||
let c_sks = get_ref_checked(compressed_server_key).unwrap();
|
||||
|
||||
let sks = c_sks.0.clone().decompress();
|
||||
|
||||
*result_server_key = Box::into_raw(Box::new(ServerKey(sks)));
|
||||
})
|
||||
}
|
||||
|
||||
/// Decompresses the CompressedServerKey to a CudaServerKey that lives on GPU
|
||||
#[cfg(feature = "gpu")]
|
||||
#[no_mangle]
|
||||
pub unsafe extern "C" fn compressed_server_key_decompress_to_gpu(
|
||||
compressed_server_key: *const CompressedServerKey,
|
||||
result_server_key: *mut *mut CudaServerKey,
|
||||
) -> c_int {
|
||||
catch_panic(|| {
|
||||
check_ptr_is_non_null_and_aligned(result_server_key).unwrap();
|
||||
*result_server_key = std::ptr::null_mut();
|
||||
|
||||
let c_sks = get_ref_checked(compressed_server_key).unwrap();
|
||||
|
||||
let sks = c_sks.0.decompress_to_gpu();
|
||||
|
||||
*result_server_key = Box::into_raw(Box::new(CudaServerKey(sks)));
|
||||
})
|
||||
}
|
||||
|
||||
/// Generates a client key with the given config
|
||||
///
|
||||
/// This function takes ownership of the config,
|
||||
/// thus the given config pointer should not be used/freed after.
|
||||
#[no_mangle]
|
||||
pub unsafe extern "C" fn client_key_generate(
|
||||
config: *mut super::config::Config,
|
||||
result_client_key: *mut *mut ClientKey,
|
||||
) -> c_int {
|
||||
catch_panic(|| {
|
||||
check_ptr_is_non_null_and_aligned(result_client_key).unwrap();
|
||||
|
||||
*result_client_key = std::ptr::null_mut();
|
||||
|
||||
let config = Box::from_raw(config);
|
||||
|
||||
let cks = crate::high_level_api::ClientKey::generate(config.0);
|
||||
|
||||
*result_client_key = Box::into_raw(Box::new(ClientKey(cks)));
|
||||
})
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub unsafe extern "C" fn public_key_new(
|
||||
client_key: *const ClientKey,
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
#[macro_use]
|
||||
mod utils;
|
||||
mod array;
|
||||
#[cfg(feature = "boolean")]
|
||||
pub mod booleans;
|
||||
pub mod config;
|
||||
|
||||
@@ -582,7 +582,7 @@ macro_rules! impl_scalar_binary_fn_on_type {
|
||||
lhs: *const $wrapper_type,
|
||||
rhs: $scalar_type,
|
||||
result: *mut *mut $wrapper_type,
|
||||
) -> c_int {
|
||||
) -> ::std::os::raw::c_int {
|
||||
$crate::c_api::utils::catch_panic(|| {
|
||||
let lhs = $crate::c_api::utils::get_ref_checked(lhs).unwrap();
|
||||
let rhs = <$scalar_type as $crate::c_api::high_level_api::utils::CApiIntegerType>::to_rust(rhs);
|
||||
@@ -606,7 +606,7 @@ macro_rules! impl_scalar_binary_assign_fn_on_type {
|
||||
pub unsafe extern "C" fn [<$wrapper_type:snake _scalar_ $binary_assign_fn_name>](
|
||||
lhs: *mut $wrapper_type,
|
||||
rhs: $scalar_type,
|
||||
) -> c_int {
|
||||
) -> ::std::os::raw::c_int {
|
||||
$crate::c_api::utils::catch_panic(|| {
|
||||
let lhs = $crate::c_api::utils::get_mut_checked(lhs).unwrap();
|
||||
let rhs = <$scalar_type as $crate::c_api::high_level_api::utils::CApiIntegerType
|
||||
@@ -642,3 +642,68 @@ macro_rules! define_casting_operation(
|
||||
)*
|
||||
}
|
||||
);
|
||||
|
||||
#[cfg(feature = "forward_compatibility")]
|
||||
macro_rules! impl_update_serialization_format_on_type {
|
||||
($wrapper_type:ty) => {
|
||||
::paste::paste! {
|
||||
#[no_mangle]
|
||||
pub unsafe extern "C" fn [<$wrapper_type:snake _update_serialization_from_0_5_to_0_6>](
|
||||
buffer_view: tfhe_c_api_dynamic_buffer::DynamicBufferView,
|
||||
result: *mut tfhe_c_api_dynamic_buffer::DynamicBuffer,
|
||||
) -> ::std::os::raw::c_int {
|
||||
use crate::forward_compatibility::ConvertInto;
|
||||
crate::c_api::utils::catch_panic(|| {
|
||||
let object: $wrapper_type = $wrapper_type(
|
||||
bincode::deserialize(buffer_view.as_slice()).unwrap()
|
||||
);
|
||||
|
||||
let next_object: next_tfhe::$wrapper_type = (object.0).convert_into();
|
||||
|
||||
let buffer = bincode::serialize(&next_object).unwrap();
|
||||
|
||||
*result = buffer.into();
|
||||
})
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
#[cfg(feature = "forward_compatibility")]
|
||||
macro_rules! impl_safe_update_serialization_format_conformant_on_type {
|
||||
($wrapper_type:ty, $function_name:path) => {
|
||||
::paste::paste! {
|
||||
#[no_mangle]
|
||||
pub unsafe extern "C" fn [<$wrapper_type:snake _safe_update_serialization_conformant_from_0_5_to_0_6>](
|
||||
buffer_view: tfhe_c_api_dynamic_buffer::DynamicBufferView,
|
||||
serialized_size_limit: u64,
|
||||
server_key: *const crate::c_api::high_level_api::keys::ServerKey,
|
||||
result: *mut tfhe_c_api_dynamic_buffer::DynamicBuffer,
|
||||
) -> ::std::os::raw::c_int {
|
||||
crate::c_api::utils::catch_panic(|| {
|
||||
use crate::forward_compatibility::ConvertInto;
|
||||
crate::c_api::utils::check_ptr_is_non_null_and_aligned(result).unwrap();
|
||||
|
||||
let sk = crate::c_api::utils::get_ref_checked(server_key).unwrap();
|
||||
|
||||
let buffer_view: &[u8] = buffer_view.as_slice();
|
||||
|
||||
let object: $wrapper_type = $wrapper_type(
|
||||
$function_name(
|
||||
buffer_view,
|
||||
serialized_size_limit,
|
||||
&sk.0,
|
||||
)
|
||||
.unwrap(),
|
||||
);
|
||||
|
||||
let next_object: next_tfhe::$wrapper_type = (object.0).convert_into();
|
||||
|
||||
let buffer = bincode::serialize(&next_object).unwrap();
|
||||
|
||||
*result = buffer.into();
|
||||
})
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@@ -94,10 +94,8 @@ pub fn fill_lwe_mask_and_body_for_encryption_native_mod_compatible<
|
||||
// generate an error from the normal distribution described by std_dev
|
||||
let noise = generator.random_noise_custom_mod(noise_parameters, ciphertext_modulus);
|
||||
// compute the multisum between the secret key and the mask
|
||||
let mask_key_dot_product = (*output_body.data).wrapping_add(slice_wrapping_dot_product(
|
||||
output_mask.as_ref(),
|
||||
lwe_secret_key.as_ref(),
|
||||
));
|
||||
let mask_key_dot_product =
|
||||
slice_wrapping_dot_product(output_mask.as_ref(), lwe_secret_key.as_ref());
|
||||
|
||||
// Store sum(ai * si) + delta * m + e in the body
|
||||
*output_body.data = mask_key_dot_product
|
||||
|
||||
@@ -167,24 +167,30 @@ fn lwe_encrypt_decrypt_custom_mod<Scalar: UnsignedTorus>(params: ClassicTestPara
|
||||
|
||||
let plaintext = Plaintext(msg * delta);
|
||||
|
||||
encrypt_lwe_ciphertext(
|
||||
&lwe_sk,
|
||||
&mut ct,
|
||||
plaintext,
|
||||
lwe_modular_std_dev,
|
||||
&mut rsc.encryption_random_generator,
|
||||
);
|
||||
// This may look silly, but this is to catch a regression where the previous content of
|
||||
// the ciphertext was wrongly used during encryption, re-encrypting in a ciphertext
|
||||
// where we already encrypted allows to check the encryption is valid even if the
|
||||
// destination LWE is dirty
|
||||
for _ in 0..2 {
|
||||
encrypt_lwe_ciphertext(
|
||||
&lwe_sk,
|
||||
&mut ct,
|
||||
plaintext,
|
||||
lwe_modular_std_dev,
|
||||
&mut rsc.encryption_random_generator,
|
||||
);
|
||||
|
||||
assert!(check_encrypted_content_respects_mod(
|
||||
&ct,
|
||||
ciphertext_modulus
|
||||
));
|
||||
assert!(check_encrypted_content_respects_mod(
|
||||
&ct,
|
||||
ciphertext_modulus
|
||||
));
|
||||
|
||||
let decrypted = decrypt_lwe_ciphertext(&lwe_sk, &ct);
|
||||
let decrypted = decrypt_lwe_ciphertext(&lwe_sk, &ct);
|
||||
|
||||
let decoded = round_decode(decrypted.0, delta) % msg_modulus;
|
||||
let decoded = round_decode(decrypted.0, delta) % msg_modulus;
|
||||
|
||||
assert_eq!(msg, decoded);
|
||||
assert_eq!(msg, decoded);
|
||||
}
|
||||
}
|
||||
|
||||
// In coverage, we break after one while loop iteration, changing message values does not
|
||||
|
||||
@@ -299,10 +299,10 @@ fn collect_next_term_split_avx512(
|
||||
let mod_b_mask_hi = simd.splat_u64x8(mod_b_mask_hi);
|
||||
|
||||
let shift_minus_64 = simd.splat_u64x8(shift.wrapping_sub(64));
|
||||
let _64_minus_shift = simd.splat_u64x8(64u64.wrapping_sub(shift));
|
||||
let shift_complement = simd.splat_u64x8(64u64.wrapping_sub(shift));
|
||||
let shift = simd.splat_u64x8(shift);
|
||||
let base_log_minus_64 = simd.splat_u64x8(base_log.wrapping_sub(64));
|
||||
let _64_minus_base_log = simd.splat_u64x8(64u64.wrapping_sub(base_log));
|
||||
let base_log_complement = simd.splat_u64x8(64u64.wrapping_sub(base_log));
|
||||
let base_log = simd.splat_u64x8(base_log);
|
||||
|
||||
for (out_lo, out_hi, state_lo, state_hi) in izip!(
|
||||
@@ -320,7 +320,7 @@ fn collect_next_term_split_avx512(
|
||||
vstate_lo = simd.or_u64x8(
|
||||
simd.shr_dyn_u64x8(vstate_hi, base_log_minus_64),
|
||||
simd.or_u64x8(
|
||||
simd.shl_dyn_u64x8(vstate_hi, _64_minus_base_log),
|
||||
simd.shl_dyn_u64x8(vstate_hi, base_log_complement),
|
||||
simd.shr_dyn_u64x8(vstate_lo, base_log),
|
||||
),
|
||||
);
|
||||
@@ -338,7 +338,7 @@ fn collect_next_term_split_avx512(
|
||||
simd.shr_dyn_u64x8(carry_hi, shift_minus_64),
|
||||
simd.or_u64x8(
|
||||
simd.shr_dyn_u64x8(carry_lo, shift),
|
||||
simd.shr_dyn_u64x8(carry_hi, _64_minus_shift),
|
||||
simd.shr_dyn_u64x8(carry_hi, shift_complement),
|
||||
),
|
||||
);
|
||||
carry_hi = simd.shr_dyn_u64x8(carry_hi, shift);
|
||||
@@ -351,7 +351,7 @@ fn collect_next_term_split_avx512(
|
||||
carry_hi = simd.or_u64x8(
|
||||
simd.or_u64x8(
|
||||
simd.shl_dyn_u64x8(carry_hi, base_log),
|
||||
simd.shr_dyn_u64x8(carry_lo, _64_minus_base_log),
|
||||
simd.shr_dyn_u64x8(carry_lo, base_log_complement),
|
||||
),
|
||||
simd.shl_dyn_u64x8(carry_lo, base_log_minus_64),
|
||||
);
|
||||
@@ -433,10 +433,10 @@ fn collect_next_term_split_avx2(
|
||||
let mod_b_mask_hi = simd.splat_u64x4(mod_b_mask_hi);
|
||||
|
||||
let shift_minus_64 = simd.splat_u64x4(shift.wrapping_sub(64));
|
||||
let _64_minus_shift = simd.splat_u64x4(64u64.wrapping_sub(shift));
|
||||
let shift_complement = simd.splat_u64x4(64u64.wrapping_sub(shift));
|
||||
let shift = simd.splat_u64x4(shift);
|
||||
let base_log_minus_64 = simd.splat_u64x4(base_log.wrapping_sub(64));
|
||||
let _64_minus_base_log = simd.splat_u64x4(64u64.wrapping_sub(base_log));
|
||||
let base_log_complement = simd.splat_u64x4(64u64.wrapping_sub(base_log));
|
||||
let base_log = simd.splat_u64x4(base_log);
|
||||
|
||||
for (out_lo, out_hi, state_lo, state_hi) in izip!(
|
||||
@@ -454,7 +454,7 @@ fn collect_next_term_split_avx2(
|
||||
vstate_lo = simd.or_u64x4(
|
||||
simd.shr_dyn_u64x4(vstate_hi, base_log_minus_64),
|
||||
simd.or_u64x4(
|
||||
simd.shl_dyn_u64x4(vstate_hi, _64_minus_base_log),
|
||||
simd.shl_dyn_u64x4(vstate_hi, base_log_complement),
|
||||
simd.shr_dyn_u64x4(vstate_lo, base_log),
|
||||
),
|
||||
);
|
||||
@@ -471,7 +471,7 @@ fn collect_next_term_split_avx2(
|
||||
simd.shr_dyn_u64x4(carry_hi, shift_minus_64),
|
||||
simd.or_u64x4(
|
||||
simd.shr_dyn_u64x4(carry_lo, shift),
|
||||
simd.shr_dyn_u64x4(carry_hi, _64_minus_shift),
|
||||
simd.shr_dyn_u64x4(carry_hi, shift_complement),
|
||||
),
|
||||
);
|
||||
carry_hi = simd.shr_dyn_u64x4(carry_hi, shift);
|
||||
@@ -484,7 +484,7 @@ fn collect_next_term_split_avx2(
|
||||
carry_hi = simd.or_u64x4(
|
||||
simd.or_u64x4(
|
||||
simd.shl_dyn_u64x4(carry_hi, base_log),
|
||||
simd.shr_dyn_u64x4(carry_lo, _64_minus_base_log),
|
||||
simd.shr_dyn_u64x4(carry_lo, base_log_complement),
|
||||
),
|
||||
simd.shl_dyn_u64x4(carry_lo, base_log_minus_64),
|
||||
);
|
||||
|
||||
@@ -4,10 +4,10 @@ use concrete_fft::fft128::f128;
|
||||
use dyn_stack::PodStack;
|
||||
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
use pulp::x86::{f64x4, u64x4, V3};
|
||||
use pulp::{f64x4, u64x4, x86::V3};
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
#[cfg(feature = "nightly-avx512")]
|
||||
use pulp::x86::{f64x8, u64x8, V4};
|
||||
use pulp::{f64x8, u64x8, x86::V4};
|
||||
|
||||
#[inline(always)]
|
||||
pub fn zeroing_shl(x: u64, shift: u64) -> u64 {
|
||||
|
||||
@@ -670,7 +670,7 @@ pub(crate) fn update_with_fmadd(
|
||||
let lhs = S::c64s_as_simd(ggsw_poly).0;
|
||||
|
||||
for (out, &lhs, &rhs) in izip!(out, lhs, rhs) {
|
||||
*out = simd.c64s_mul_adde(lhs, rhs, *out);
|
||||
*out = simd.c64s_mul_add_e(lhs, rhs, *out);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -736,7 +736,7 @@ pub(crate) fn update_with_fmadd_factor(
|
||||
} else {
|
||||
for (out, &lhs, &rhs) in izip!(out, lhs, rhs) {
|
||||
// NOTE: see above
|
||||
*out = simd.c64s_mul_adde(factor, simd.c64s_mul(lhs, rhs), *out);
|
||||
*out = simd.c64s_mul_add_e(factor, simd.c64s_mul(lhs, rhs), *out);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -120,4 +120,5 @@ pub fn cuda_multi_bit_programmable_bootstrap_lwe_ciphertext<Scalar>(
|
||||
stream,
|
||||
);
|
||||
}
|
||||
stream.synchronize();
|
||||
}
|
||||
|
||||
@@ -78,4 +78,5 @@ pub fn cuda_programmable_bootstrap_lwe_ciphertext<Scalar>(
|
||||
stream,
|
||||
);
|
||||
}
|
||||
stream.synchronize();
|
||||
}
|
||||
|
||||
@@ -90,10 +90,10 @@ fn lwe_encrypt_ks_decrypt_custom_mod<Scalar: UnsignedTorus + CastFrom<usize>>(
|
||||
.iter()
|
||||
.map(|&x| <usize as CastInto<Scalar>>::cast_into(x))
|
||||
.collect_vec();
|
||||
let mut d_input_indexes = stream.malloc_async::<Scalar>(num_blocks as u32);
|
||||
let mut d_output_indexes = stream.malloc_async::<Scalar>(num_blocks as u32);
|
||||
stream.copy_to_gpu_async(&mut d_input_indexes, &lwe_indexes);
|
||||
stream.copy_to_gpu_async(&mut d_output_indexes, &lwe_indexes);
|
||||
let mut d_input_indexes = unsafe { stream.malloc_async::<Scalar>(num_blocks as u32) };
|
||||
let mut d_output_indexes = unsafe { stream.malloc_async::<Scalar>(num_blocks as u32) };
|
||||
unsafe { stream.copy_to_gpu_async(&mut d_input_indexes, &lwe_indexes) };
|
||||
unsafe { stream.copy_to_gpu_async(&mut d_output_indexes, &lwe_indexes) };
|
||||
|
||||
cuda_keyswitch_lwe_ciphertext(
|
||||
&d_ksk_big_to_small,
|
||||
|
||||
@@ -145,8 +145,8 @@ fn lwe_encrypt_multi_bit_pbs_decrypt_custom_mod<
|
||||
}
|
||||
|
||||
let mut d_test_vector_indexes =
|
||||
stream.malloc_async::<Scalar>(number_of_messages as u32);
|
||||
stream.copy_to_gpu_async(&mut d_test_vector_indexes, &test_vector_indexes);
|
||||
unsafe { stream.malloc_async::<Scalar>(number_of_messages as u32) };
|
||||
unsafe { stream.copy_to_gpu_async(&mut d_test_vector_indexes, &test_vector_indexes) };
|
||||
|
||||
let num_blocks = d_lwe_ciphertext_in.0.lwe_ciphertext_count.0;
|
||||
let lwe_indexes_usize: Vec<usize> = (0..num_blocks).collect_vec();
|
||||
@@ -154,10 +154,12 @@ fn lwe_encrypt_multi_bit_pbs_decrypt_custom_mod<
|
||||
.iter()
|
||||
.map(|&x| <usize as CastInto<Scalar>>::cast_into(x))
|
||||
.collect_vec();
|
||||
let mut d_output_indexes = stream.malloc_async::<Scalar>(num_blocks as u32);
|
||||
let mut d_input_indexes = stream.malloc_async::<Scalar>(num_blocks as u32);
|
||||
stream.copy_to_gpu_async(&mut d_output_indexes, &lwe_indexes);
|
||||
stream.copy_to_gpu_async(&mut d_input_indexes, &lwe_indexes);
|
||||
let mut d_output_indexes = unsafe { stream.malloc_async::<Scalar>(num_blocks as u32) };
|
||||
let mut d_input_indexes = unsafe { stream.malloc_async::<Scalar>(num_blocks as u32) };
|
||||
unsafe {
|
||||
stream.copy_to_gpu_async(&mut d_output_indexes, &lwe_indexes);
|
||||
stream.copy_to_gpu_async(&mut d_input_indexes, &lwe_indexes);
|
||||
}
|
||||
|
||||
cuda_multi_bit_programmable_bootstrap_lwe_ciphertext(
|
||||
&d_lwe_ciphertext_in,
|
||||
|
||||
@@ -127,8 +127,8 @@ fn lwe_encrypt_pbs_decrypt<
|
||||
}
|
||||
|
||||
let mut d_test_vector_indexes =
|
||||
stream.malloc_async::<Scalar>(number_of_messages as u32);
|
||||
stream.copy_to_gpu_async(&mut d_test_vector_indexes, &test_vector_indexes);
|
||||
unsafe { stream.malloc_async::<Scalar>(number_of_messages as u32) };
|
||||
unsafe { stream.copy_to_gpu_async(&mut d_test_vector_indexes, &test_vector_indexes) };
|
||||
|
||||
let num_blocks = d_lwe_ciphertext_in.0.lwe_ciphertext_count.0;
|
||||
let lwe_indexes_usize: Vec<usize> = (0..num_blocks).collect_vec();
|
||||
@@ -136,10 +136,12 @@ fn lwe_encrypt_pbs_decrypt<
|
||||
.iter()
|
||||
.map(|&x| <usize as CastInto<Scalar>>::cast_into(x))
|
||||
.collect_vec();
|
||||
let mut d_output_indexes = stream.malloc_async::<Scalar>(num_blocks as u32);
|
||||
let mut d_input_indexes = stream.malloc_async::<Scalar>(num_blocks as u32);
|
||||
stream.copy_to_gpu_async(&mut d_output_indexes, &lwe_indexes);
|
||||
stream.copy_to_gpu_async(&mut d_input_indexes, &lwe_indexes);
|
||||
let mut d_output_indexes = unsafe { stream.malloc_async::<Scalar>(num_blocks as u32) };
|
||||
let mut d_input_indexes = unsafe { stream.malloc_async::<Scalar>(num_blocks as u32) };
|
||||
unsafe {
|
||||
stream.copy_to_gpu_async(&mut d_output_indexes, &lwe_indexes);
|
||||
stream.copy_to_gpu_async(&mut d_input_indexes, &lwe_indexes);
|
||||
}
|
||||
|
||||
cuda_programmable_bootstrap_lwe_ciphertext(
|
||||
&d_lwe_ciphertext_in,
|
||||
|
||||
@@ -18,11 +18,13 @@ impl<T: UnsignedInteger> CudaGlweCiphertextList<T> {
|
||||
stream: &CudaStream,
|
||||
) -> Self {
|
||||
// Allocate memory in the device
|
||||
let d_vec = stream.malloc_async(
|
||||
(glwe_ciphertext_size(glwe_dimension.to_glwe_size(), polynomial_size)
|
||||
* glwe_ciphertext_count.0) as u32,
|
||||
);
|
||||
|
||||
let d_vec = unsafe {
|
||||
stream.malloc_async(
|
||||
(glwe_ciphertext_size(glwe_dimension.to_glwe_size(), polynomial_size)
|
||||
* glwe_ciphertext_count.0) as u32,
|
||||
)
|
||||
};
|
||||
stream.synchronize();
|
||||
let cuda_glwe_list = CudaGlweList {
|
||||
d_vec,
|
||||
glwe_ciphertext_count,
|
||||
@@ -43,13 +45,17 @@ impl<T: UnsignedInteger> CudaGlweCiphertextList<T> {
|
||||
let polynomial_size = h_ct.polynomial_size();
|
||||
let ciphertext_modulus = h_ct.ciphertext_modulus();
|
||||
|
||||
let mut d_vec = stream.malloc_async(
|
||||
(glwe_ciphertext_size(glwe_dimension.to_glwe_size(), polynomial_size)
|
||||
* glwe_ciphertext_count.0) as u32,
|
||||
);
|
||||
|
||||
let mut d_vec = unsafe {
|
||||
stream.malloc_async(
|
||||
(glwe_ciphertext_size(glwe_dimension.to_glwe_size(), polynomial_size)
|
||||
* glwe_ciphertext_count.0) as u32,
|
||||
)
|
||||
};
|
||||
// Copy to the GPU
|
||||
stream.copy_to_gpu_async(&mut d_vec, h_ct.as_ref());
|
||||
unsafe {
|
||||
stream.copy_to_gpu_async(&mut d_vec, h_ct.as_ref());
|
||||
stream.synchronize();
|
||||
}
|
||||
|
||||
let cuda_glwe_list = CudaGlweList {
|
||||
d_vec,
|
||||
@@ -70,8 +76,10 @@ impl<T: UnsignedInteger> CudaGlweCiphertextList<T> {
|
||||
* glwe_ciphertext_size(self.0.glwe_dimension.to_glwe_size(), self.0.polynomial_size);
|
||||
let mut container: Vec<T> = vec![T::ZERO; glwe_ct_size];
|
||||
|
||||
stream.copy_to_cpu_async(container.as_mut_slice(), &self.0.d_vec);
|
||||
stream.synchronize();
|
||||
unsafe {
|
||||
stream.copy_to_cpu_async(container.as_mut_slice(), &self.0.d_vec);
|
||||
stream.synchronize();
|
||||
}
|
||||
|
||||
GlweCiphertextList::from_container(
|
||||
container,
|
||||
@@ -90,14 +98,20 @@ impl<T: UnsignedInteger> CudaGlweCiphertextList<T> {
|
||||
let polynomial_size = h_ct.polynomial_size();
|
||||
let ciphertext_modulus = h_ct.ciphertext_modulus();
|
||||
|
||||
let mut d_vec = stream.malloc_async(
|
||||
(glwe_ciphertext_size(glwe_dimension.to_glwe_size(), polynomial_size)
|
||||
* glwe_ciphertext_count.0) as u32,
|
||||
);
|
||||
let mut d_vec = unsafe {
|
||||
stream.malloc_async(
|
||||
(glwe_ciphertext_size(glwe_dimension.to_glwe_size(), polynomial_size)
|
||||
* glwe_ciphertext_count.0) as u32,
|
||||
)
|
||||
};
|
||||
|
||||
// Copy to the GPU
|
||||
let h_input = h_ct.as_view().into_container();
|
||||
stream.copy_to_gpu_async(&mut d_vec, h_input.as_ref());
|
||||
stream.synchronize();
|
||||
unsafe {
|
||||
stream.copy_to_gpu_async(&mut d_vec, h_input.as_ref());
|
||||
}
|
||||
stream.synchronize();
|
||||
|
||||
let cuda_glwe_list = CudaGlweList {
|
||||
d_vec,
|
||||
|
||||
@@ -39,21 +39,25 @@ impl CudaLweBootstrapKey {
|
||||
let glwe_dimension = bsk.glwe_size().to_glwe_dimension();
|
||||
|
||||
// Allocate memory
|
||||
let mut d_vec = stream.malloc_async::<f64>(lwe_bootstrap_key_size(
|
||||
input_lwe_dimension,
|
||||
glwe_dimension.to_glwe_size(),
|
||||
polynomial_size,
|
||||
decomp_level_count,
|
||||
) as u32);
|
||||
let mut d_vec = unsafe {
|
||||
stream.malloc_async::<f64>(lwe_bootstrap_key_size(
|
||||
input_lwe_dimension,
|
||||
glwe_dimension.to_glwe_size(),
|
||||
polynomial_size,
|
||||
decomp_level_count,
|
||||
) as u32)
|
||||
};
|
||||
// Copy to the GPU
|
||||
stream.convert_lwe_bootstrap_key_async(
|
||||
&mut d_vec,
|
||||
bsk.as_ref(),
|
||||
input_lwe_dimension,
|
||||
glwe_dimension,
|
||||
decomp_level_count,
|
||||
polynomial_size,
|
||||
);
|
||||
unsafe {
|
||||
stream.convert_lwe_bootstrap_key_async(
|
||||
&mut d_vec,
|
||||
bsk.as_ref(),
|
||||
input_lwe_dimension,
|
||||
glwe_dimension,
|
||||
decomp_level_count,
|
||||
polynomial_size,
|
||||
);
|
||||
}
|
||||
stream.synchronize();
|
||||
Self {
|
||||
d_vec,
|
||||
|
||||
@@ -18,8 +18,10 @@ impl<T: UnsignedInteger> CudaLweCiphertextList<T> {
|
||||
stream: &CudaStream,
|
||||
) -> Self {
|
||||
// Allocate memory in the device
|
||||
let d_vec =
|
||||
stream.malloc_async((lwe_dimension.to_lwe_size().0 * lwe_ciphertext_count.0) as u32);
|
||||
let d_vec = unsafe {
|
||||
stream.malloc_async((lwe_dimension.to_lwe_size().0 * lwe_ciphertext_count.0) as u32)
|
||||
};
|
||||
stream.synchronize();
|
||||
|
||||
let cuda_lwe_list = CudaLweList {
|
||||
d_vec,
|
||||
@@ -41,10 +43,13 @@ impl<T: UnsignedInteger> CudaLweCiphertextList<T> {
|
||||
|
||||
// Copy to the GPU
|
||||
let h_input = h_ct.as_view().into_container();
|
||||
let mut d_vec =
|
||||
stream.malloc_async((lwe_dimension.to_lwe_size().0 * lwe_ciphertext_count.0) as u32);
|
||||
stream.copy_to_gpu_async(&mut d_vec, h_input.as_ref());
|
||||
stream.synchronize();
|
||||
let mut d_vec = unsafe {
|
||||
stream.malloc_async((lwe_dimension.to_lwe_size().0 * lwe_ciphertext_count.0) as u32)
|
||||
};
|
||||
unsafe {
|
||||
stream.copy_to_gpu_async(&mut d_vec, h_input.as_ref());
|
||||
stream.synchronize();
|
||||
}
|
||||
let cuda_lwe_list = CudaLweList {
|
||||
d_vec,
|
||||
lwe_ciphertext_count,
|
||||
@@ -73,8 +78,10 @@ impl<T: UnsignedInteger> CudaLweCiphertextList<T> {
|
||||
let lwe_ct_size = self.0.lwe_ciphertext_count.0 * self.0.lwe_dimension.to_lwe_size().0;
|
||||
let mut container: Vec<T> = vec![T::ZERO; lwe_ct_size];
|
||||
|
||||
stream.copy_to_cpu_async(container.as_mut_slice(), &self.0.d_vec);
|
||||
stream.synchronize();
|
||||
unsafe {
|
||||
stream.copy_to_cpu_async(container.as_mut_slice(), &self.0.d_vec);
|
||||
stream.synchronize();
|
||||
}
|
||||
|
||||
LweCiphertextList::from_container(
|
||||
container,
|
||||
@@ -92,8 +99,11 @@ impl<T: UnsignedInteger> CudaLweCiphertextList<T> {
|
||||
let ciphertext_modulus = h_ct.ciphertext_modulus();
|
||||
|
||||
// Copy to the GPU
|
||||
let mut d_vec = stream.malloc_async((lwe_dimension.to_lwe_size().0) as u32);
|
||||
stream.copy_to_gpu_async(&mut d_vec, h_ct.as_ref());
|
||||
let mut d_vec = unsafe { stream.malloc_async((lwe_dimension.to_lwe_size().0) as u32) };
|
||||
unsafe {
|
||||
stream.copy_to_gpu_async(&mut d_vec, h_ct.as_ref());
|
||||
}
|
||||
stream.synchronize();
|
||||
|
||||
let cuda_lwe_list = CudaLweList {
|
||||
d_vec,
|
||||
@@ -108,7 +118,9 @@ impl<T: UnsignedInteger> CudaLweCiphertextList<T> {
|
||||
let lwe_ct_size = self.0.lwe_dimension.to_lwe_size().0;
|
||||
let mut container: Vec<T> = vec![T::ZERO; lwe_ct_size];
|
||||
|
||||
stream.copy_to_cpu_async(container.as_mut_slice(), &self.0.d_vec);
|
||||
unsafe {
|
||||
stream.copy_to_cpu_async(container.as_mut_slice(), &self.0.d_vec);
|
||||
}
|
||||
stream.synchronize();
|
||||
|
||||
LweCiphertext::from_container(container, self.ciphertext_modulus())
|
||||
@@ -148,8 +160,11 @@ impl<T: UnsignedInteger> CudaLweCiphertextList<T> {
|
||||
let ciphertext_modulus = self.ciphertext_modulus();
|
||||
|
||||
// Copy to the GPU
|
||||
let mut d_vec = stream.malloc_async(self.0.d_vec.len() as u32);
|
||||
stream.copy_gpu_to_gpu_async(&mut d_vec, &self.0.d_vec);
|
||||
let mut d_vec = unsafe { stream.malloc_async(self.0.d_vec.len() as u32) };
|
||||
unsafe {
|
||||
stream.copy_gpu_to_gpu_async(&mut d_vec, &self.0.d_vec);
|
||||
}
|
||||
stream.synchronize();
|
||||
|
||||
let cuda_lwe_list = CudaLweList {
|
||||
d_vec,
|
||||
|
||||
@@ -28,15 +28,19 @@ impl<T: UnsignedInteger> CudaLweKeyswitchKey<T> {
|
||||
let ciphertext_modulus = h_ksk.ciphertext_modulus();
|
||||
|
||||
// Allocate memory
|
||||
let mut d_vec = stream.malloc_async::<T>(
|
||||
(input_lwe_size.to_lwe_dimension().0
|
||||
* lwe_keyswitch_key_input_key_element_encrypted_size(
|
||||
decomp_level_count,
|
||||
output_lwe_size,
|
||||
)) as u32,
|
||||
);
|
||||
let mut d_vec = unsafe {
|
||||
stream.malloc_async::<T>(
|
||||
(input_lwe_size.to_lwe_dimension().0
|
||||
* lwe_keyswitch_key_input_key_element_encrypted_size(
|
||||
decomp_level_count,
|
||||
output_lwe_size,
|
||||
)) as u32,
|
||||
)
|
||||
};
|
||||
|
||||
stream.convert_lwe_keyswitch_key_async(&mut d_vec, h_ksk.as_ref());
|
||||
unsafe {
|
||||
stream.convert_lwe_keyswitch_key_async(&mut d_vec, h_ksk.as_ref());
|
||||
}
|
||||
|
||||
stream.synchronize();
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user