Compare commits

..

1 Commits

Author SHA1 Message Date
Andrei Stoian
64229ca391 fix(gpu): refactor crypto params in backend 2026-04-27 13:09:50 +02:00
96 changed files with 3104 additions and 4942 deletions

View File

@@ -54,7 +54,7 @@ jobs:
- name: Retrieve data from cache
id: retrieve-data-cache
uses: actions/cache/restore@27d5ce7f107fe9357f9df03efb73ab90386fccae #v5.0.5
uses: actions/cache/restore@668228422ae6a00e4ad889ee87cd7109ec5666a7 #v5.0.4
with:
path: |
utils/tfhe-backward-compat-data/**/*.cbor
@@ -89,7 +89,7 @@ jobs:
- name: Store data in cache
if: steps.retrieve-data-cache.outputs.cache-hit != 'true'
continue-on-error: true
uses: actions/cache/save@27d5ce7f107fe9357f9df03efb73ab90386fccae #v5.0.5
uses: actions/cache/save@668228422ae6a00e4ad889ee87cd7109ec5666a7 #v5.0.4
with:
path: |
utils/tfhe-backward-compat-data/**/*.cbor

View File

@@ -69,7 +69,7 @@ jobs:
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
with:
files_yaml: |
dependencies:
@@ -200,7 +200,7 @@ jobs:
- name: Node cache restoration
id: node-cache
uses: actions/cache/restore@27d5ce7f107fe9357f9df03efb73ab90386fccae #v5.0.5
uses: actions/cache/restore@668228422ae6a00e4ad889ee87cd7109ec5666a7 #v5.0.4
with:
path: |
~/.nvm
@@ -213,7 +213,7 @@ jobs:
make install_node
- name: Node cache save
uses: actions/cache/save@27d5ce7f107fe9357f9df03efb73ab90386fccae #v5.0.5
uses: actions/cache/save@668228422ae6a00e4ad889ee87cd7109ec5666a7 #v5.0.4
if: steps.node-cache.outputs.cache-hit != 'true'
with:
path: |

View File

@@ -56,7 +56,7 @@ jobs:
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
with:
files_yaml: |
integer:

View File

@@ -57,7 +57,7 @@ jobs:
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
with:
files_yaml: |
integer:

View File

@@ -78,7 +78,7 @@ jobs:
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
with:
files_yaml: |
dependencies:

View File

@@ -45,7 +45,7 @@ jobs:
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
with:
files_yaml: |
wasm:
@@ -92,7 +92,7 @@ jobs:
- name: Node cache restoration
id: node-cache
uses: actions/cache/restore@27d5ce7f107fe9357f9df03efb73ab90386fccae #v5.0.5
uses: actions/cache/restore@668228422ae6a00e4ad889ee87cd7109ec5666a7 #v5.0.4
with:
path: |
~/.nvm
@@ -105,7 +105,7 @@ jobs:
make install_node
- name: Node cache save
uses: actions/cache/save@27d5ce7f107fe9357f9df03efb73ab90386fccae #v5.0.5
uses: actions/cache/save@668228422ae6a00e4ad889ee87cd7109ec5666a7 #v5.0.4
if: steps.node-cache.outputs.cache-hit != 'true'
with:
path: |

View File

@@ -34,7 +34,7 @@ jobs:
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
with:
files_yaml: |
backward:

View File

@@ -204,7 +204,7 @@ jobs:
uses: foundry-rs/foundry-toolchain@8789b3e21e6c11b2697f5eb56eddae542f746c10
- name: Cache cargo
uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
uses: actions/cache@668228422ae6a00e4ad889ee87cd7109ec5666a7 # v5.0.4
with:
path: |
~/.cargo/registry

View File

@@ -46,7 +46,7 @@ jobs:
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
with:
files_yaml: |
wasm_bench:

View File

@@ -124,7 +124,7 @@ jobs:
- name: Node cache restoration
id: node-cache
uses: actions/cache/restore@27d5ce7f107fe9357f9df03efb73ab90386fccae #v5.0.5
uses: actions/cache/restore@668228422ae6a00e4ad889ee87cd7109ec5666a7 #v5.0.4
with:
path: |
~/.nvm
@@ -137,7 +137,7 @@ jobs:
make install_node
- name: Node cache save
uses: actions/cache/save@27d5ce7f107fe9357f9df03efb73ab90386fccae #v5.0.5
uses: actions/cache/save@668228422ae6a00e4ad889ee87cd7109ec5666a7 #v5.0.4
if: steps.node-cache.outputs.cache-hit != 'true'
with:
path: |

View File

@@ -138,7 +138,7 @@ jobs:
- name: Node cache restoration
if: inputs.run-pcc-cpu-batch == 'pcc_batch_2'
id: node-cache
uses: actions/cache/restore@27d5ce7f107fe9357f9df03efb73ab90386fccae #v5.0.5
uses: actions/cache/restore@668228422ae6a00e4ad889ee87cd7109ec5666a7 #v5.0.4
with:
path: |
~/.nvm
@@ -151,7 +151,7 @@ jobs:
make install_node
- name: Node cache save
uses: actions/cache/save@27d5ce7f107fe9357f9df03efb73ab90386fccae #v5.0.5
uses: actions/cache/save@668228422ae6a00e4ad889ee87cd7109ec5666a7 #v5.0.4
if: inputs.run-pcc-cpu-batch == 'pcc_batch_2' && steps.node-cache.outputs.cache-hit != 'true'
with:
path: |

View File

@@ -40,7 +40,7 @@ jobs:
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
with:
files_yaml: |
fft:

View File

@@ -42,7 +42,7 @@ jobs:
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
with:
files_yaml: |
ntt:

View File

@@ -44,7 +44,7 @@ jobs:
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
with:
files_yaml: |
tfhe:

View File

@@ -46,7 +46,7 @@ jobs:
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
with:
files_yaml: |
csprng:

View File

@@ -49,7 +49,7 @@ jobs:
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
with:
files_yaml: |
gpu:

View File

@@ -47,7 +47,7 @@ jobs:
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
with:
files_yaml: |
gpu:

View File

@@ -48,7 +48,7 @@ jobs:
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
with:
files_yaml: |
gpu:

View File

@@ -49,7 +49,7 @@ jobs:
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
with:
files_yaml: |
gpu:

View File

@@ -45,7 +45,7 @@ jobs:
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
with:
files_yaml: |
gpu:

View File

@@ -48,7 +48,7 @@ jobs:
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
with:
files_yaml: |
gpu:

View File

@@ -48,7 +48,7 @@ jobs:
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
with:
files_yaml: |
gpu:

View File

@@ -48,7 +48,7 @@ jobs:
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
with:
files_yaml: |
gpu:

View File

@@ -49,7 +49,7 @@ jobs:
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
with:
files_yaml: |
gpu:

View File

@@ -49,7 +49,7 @@ jobs:
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
with:
files_yaml: |
gpu:

View File

@@ -48,7 +48,7 @@ jobs:
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
with:
files_yaml: |
gpu:

View File

@@ -49,7 +49,7 @@ jobs:
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
with:
files_yaml: |
gpu:

View File

@@ -49,7 +49,7 @@ jobs:
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
with:
files_yaml: |
gpu:

View File

@@ -47,7 +47,7 @@ jobs:
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
with:
files_yaml: |
gpu:

View File

@@ -41,7 +41,7 @@ jobs:
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
with:
files_yaml: |
hpu:

View File

@@ -53,7 +53,7 @@ jobs:
- name: Restore Sagemath image from cache
id: docker-cache
uses: actions/cache/restore@27d5ce7f107fe9357f9df03efb73ab90386fccae #v5.0.5
uses: actions/cache/restore@668228422ae6a00e4ad889ee87cd7109ec5666a7 #v5.0.4
with:
path: /tmp/sagemath_image
key: sagemath-image-${{ env.SAGEMATH_VERSION }}-${{ github.sha }}
@@ -76,7 +76,7 @@ jobs:
- name: Store Sagemath image in cache
if: steps.docker-cache.outputs.cache-hit != 'true'
continue-on-error: true
uses: actions/cache/save@27d5ce7f107fe9357f9df03efb73ab90386fccae #v5.0.5
uses: actions/cache/save@668228422ae6a00e4ad889ee87cd7109ec5666a7 #v5.0.4
with:
path: /tmp/sagemath_image
key: sagemath-image-${{ env.SAGEMATH_VERSION }}-${{ github.sha }}

View File

@@ -1,6 +1,6 @@
[package]
name = "tfhe-cuda-backend"
version = "0.14.0"
version = "0.15.0"
edition = "2021"
authors = ["Zama team"]
license = "BSD-3-Clause-Clear"

View File

@@ -4,22 +4,18 @@
extern "C" {
uint64_t scratch_cuda_integer_aes_ctr_encrypt_64_async(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t lwe_dimension, uint32_t ks_level,
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
uint32_t grouping_factor, uint32_t message_modulus, uint32_t carry_modulus,
PBS_TYPE pbs_type, bool allocate_gpu_memory,
PBS_MS_REDUCTION_T noise_reduction_type, uint32_t num_aes_inputs,
uint32_t sbox_parallelism);
CudaStreamsFFI streams, int8_t **mem_ptr,
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
uint32_t ks_base_log, uint32_t message_modulus, uint32_t carry_modulus,
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type,
uint32_t num_aes_inputs, uint32_t sbox_parallelism);
uint64_t scratch_cuda_integer_aes_ctr_256_encrypt_64_async(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t lwe_dimension, uint32_t ks_level,
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
uint32_t grouping_factor, uint32_t message_modulus, uint32_t carry_modulus,
PBS_TYPE pbs_type, bool allocate_gpu_memory,
PBS_MS_REDUCTION_T noise_reduction_type, uint32_t num_aes_inputs,
uint32_t sbox_parallelism);
CudaStreamsFFI streams, int8_t **mem_ptr,
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
uint32_t ks_base_log, uint32_t message_modulus, uint32_t carry_modulus,
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type,
uint32_t num_aes_inputs, uint32_t sbox_parallelism);
void cuda_integer_aes_ctr_encrypt_64_async(
CudaStreamsFFI streams, CudaRadixCiphertextFFI *output,
@@ -34,12 +30,10 @@ void cleanup_cuda_integer_aes_ctr_256_encrypt_64(CudaStreamsFFI streams,
int8_t **mem_ptr_void);
uint64_t scratch_cuda_integer_key_expansion_64_async(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t lwe_dimension, uint32_t ks_level,
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
uint32_t grouping_factor, uint32_t message_modulus, uint32_t carry_modulus,
PBS_TYPE pbs_type, bool allocate_gpu_memory,
PBS_MS_REDUCTION_T noise_reduction_type);
CudaStreamsFFI streams, int8_t **mem_ptr,
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
uint32_t ks_base_log, uint32_t message_modulus, uint32_t carry_modulus,
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type);
void cuda_integer_key_expansion_64_async(CudaStreamsFFI streams,
CudaRadixCiphertextFFI *expanded_keys,
@@ -57,12 +51,10 @@ void cuda_integer_aes_ctr_256_encrypt_64_async(
int8_t *mem_ptr, void *const *bsks, void *const *ksks);
uint64_t scratch_cuda_integer_key_expansion_256_64_async(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t lwe_dimension, uint32_t ks_level,
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
uint32_t grouping_factor, uint32_t message_modulus, uint32_t carry_modulus,
PBS_TYPE pbs_type, bool allocate_gpu_memory,
PBS_MS_REDUCTION_T noise_reduction_type);
CudaStreamsFFI streams, int8_t **mem_ptr,
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
uint32_t ks_base_log, uint32_t message_modulus, uint32_t carry_modulus,
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type);
void cuda_integer_key_expansion_256_64_async(
CudaStreamsFFI streams, CudaRadixCiphertextFFI *expanded_keys,

View File

@@ -17,10 +17,9 @@ uint64_t scratch_cuda_integer_decompress_radix_ciphertext_64_async(
CudaStreamsFFI streams, int8_t **mem_ptr,
uint32_t encryption_glwe_dimension, uint32_t encryption_polynomial_size,
uint32_t compression_glwe_dimension, uint32_t compression_polynomial_size,
uint32_t lwe_dimension, uint32_t pbs_level, uint32_t pbs_base_log,
uint32_t grouping_factor, uint32_t num_blocks_to_decompress,
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type);
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t num_blocks_to_decompress,
uint32_t message_modulus, uint32_t carry_modulus, bool allocate_gpu_memory,
PBS_MS_REDUCTION_T noise_reduction_type);
void cuda_integer_compress_radix_ciphertext_64_async(
CudaStreamsFFI streams, CudaPackedGlweCiphertextListFFI *glwe_array_out,

View File

@@ -105,22 +105,32 @@ typedef struct {
uint32_t polynomial_size;
} CudaPackedGlweCiphertextListFFI;
// FFI-boundary parameter struct for a LWE bootstrap key.
// All fields are plain uint32_t for safe Rust/C++ interop.
// Use crypto_params() (defined below) to obtain the strongly-typed C++ form.
typedef struct {
uint32_t input_lwe_dimension;
uint32_t glwe_dimension;
uint32_t polynomial_size;
uint32_t base_log;
uint32_t level_count;
uint32_t big_lwe_dimension;
uint32_t pbs_type;
uint32_t grouping_factor;
} CudaLweBootstrapKeyParamsFFI;
uint64_t scratch_cuda_apply_univariate_lut_64_async(
CudaStreamsFFI streams, int8_t **mem_ptr, void const *input_lut,
uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size,
uint32_t ks_level, uint32_t ks_base_log, uint32_t pbs_level,
uint32_t pbs_base_log, uint32_t grouping_factor,
uint32_t input_lwe_ciphertext_count, uint32_t message_modulus,
uint32_t carry_modulus, PBS_TYPE pbs_type, uint64_t lut_degree,
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
uint32_t ks_base_log, uint32_t input_lwe_ciphertext_count,
uint32_t message_modulus, uint32_t carry_modulus, uint64_t lut_degree,
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type);
uint64_t scratch_cuda_apply_many_univariate_lut_64_async(
CudaStreamsFFI streams, int8_t **mem_ptr, void const *input_lut,
uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size,
uint32_t ks_level, uint32_t ks_base_log, uint32_t pbs_level,
uint32_t pbs_base_log, uint32_t grouping_factor, uint32_t num_radix_blocks,
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
uint32_t num_many_lut, uint64_t lut_degree, bool allocate_gpu_memory,
PBS_MS_REDUCTION_T noise_reduction_type);
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
uint32_t ks_base_log, uint32_t num_radix_blocks, uint32_t message_modulus,
uint32_t carry_modulus, uint32_t num_many_lut, uint64_t lut_degree,
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type);
void cuda_apply_univariate_lut_64_async(
CudaStreamsFFI streams, CudaRadixCiphertextFFI *output_radix_lwe,
CudaRadixCiphertextFFI const *input_radix_lwe, int8_t *mem_ptr,
@@ -139,12 +149,10 @@ void cuda_apply_many_univariate_lut_64_async(
uint32_t lut_stride);
uint64_t scratch_cuda_full_propagation_64_inplace_async(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t lwe_dimension,
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t ks_level,
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
uint32_t grouping_factor, uint32_t message_modulus, uint32_t carry_modulus,
PBS_TYPE pbs_type, bool allocate_gpu_memory,
PBS_MS_REDUCTION_T noise_reduction_type);
CudaStreamsFFI streams, int8_t **mem_ptr,
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
uint32_t ks_base_log, uint32_t message_modulus, uint32_t carry_modulus,
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type);
void cuda_full_propagation_64_inplace_async(
CudaStreamsFFI streams, CudaRadixCiphertextFFI *input_blocks,
@@ -162,11 +170,9 @@ void cuda_integer_mult_inplace_64_async(
uint64_t scratch_cuda_integer_mult_inplace_64_async(
CudaStreamsFFI streams, int8_t **mem_ptr, bool const is_boolean_left,
bool const is_boolean_right, uint32_t message_modulus,
uint32_t carry_modulus, uint32_t glwe_dimension, uint32_t lwe_dimension,
uint32_t polynomial_size, uint32_t pbs_base_log, uint32_t pbs_level,
uint32_t ks_base_log, uint32_t ks_level, uint32_t grouping_factor,
uint32_t num_blocks, PBS_TYPE pbs_type, bool allocate_gpu_memory,
PBS_MS_REDUCTION_T noise_reduction_type);
uint32_t carry_modulus, CudaLweBootstrapKeyParamsFFI bsk_params,
uint32_t ks_base_log, uint32_t ks_level, uint32_t num_blocks,
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type);
void cleanup_cuda_integer_mult_inplace_64(CudaStreamsFFI streams,
int8_t **mem_ptr_void);
@@ -183,12 +189,10 @@ void cuda_scalar_addition_ciphertext_64_inplace(
uint32_t message_modulus, uint32_t carry_modulus);
uint64_t scratch_cuda_logical_scalar_shift_64_inplace_async(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t big_lwe_dimension,
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
uint32_t num_blocks, uint32_t message_modulus, uint32_t carry_modulus,
PBS_TYPE pbs_type, SHIFT_OR_ROTATE_TYPE shift_type,
CudaStreamsFFI streams, int8_t **mem_ptr,
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
uint32_t ks_base_log, uint32_t num_blocks, uint32_t message_modulus,
uint32_t carry_modulus, SHIFT_OR_ROTATE_TYPE shift_type,
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type);
void cuda_logical_scalar_shift_64_inplace_async(
@@ -196,12 +200,10 @@ void cuda_logical_scalar_shift_64_inplace_async(
int8_t *mem_ptr, void *const *bsks, void *const *ksks);
uint64_t scratch_cuda_arithmetic_scalar_shift_64_inplace_async(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t big_lwe_dimension,
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
uint32_t num_blocks, uint32_t message_modulus, uint32_t carry_modulus,
PBS_TYPE pbs_type, SHIFT_OR_ROTATE_TYPE shift_type,
CudaStreamsFFI streams, int8_t **mem_ptr,
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
uint32_t ks_base_log, uint32_t num_blocks, uint32_t message_modulus,
uint32_t carry_modulus, SHIFT_OR_ROTATE_TYPE shift_type,
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type);
void cuda_arithmetic_scalar_shift_64_inplace_async(
@@ -215,12 +217,10 @@ void cleanup_cuda_arithmetic_scalar_shift_64_inplace(CudaStreamsFFI streams,
int8_t **mem_ptr_void);
uint64_t scratch_cuda_shift_and_rotate_64_inplace_async(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t big_lwe_dimension,
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
uint32_t num_blocks, uint32_t message_modulus, uint32_t carry_modulus,
PBS_TYPE pbs_type, SHIFT_OR_ROTATE_TYPE shift_type, bool is_signed,
CudaStreamsFFI streams, int8_t **mem_ptr,
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
uint32_t ks_base_log, uint32_t num_blocks, uint32_t message_modulus,
uint32_t carry_modulus, SHIFT_OR_ROTATE_TYPE shift_type, bool is_signed,
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type);
void cuda_shift_and_rotate_64_inplace_async(
@@ -232,22 +232,18 @@ void cleanup_cuda_shift_and_rotate_64_inplace(CudaStreamsFFI streams,
int8_t **mem_ptr_void);
uint64_t scratch_cuda_integer_comparison_64_async(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t big_lwe_dimension,
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
uint32_t lwe_ciphertext_count, uint32_t message_modulus,
uint32_t carry_modulus, PBS_TYPE pbs_type, COMPARISON_TYPE op_type,
CudaStreamsFFI streams, int8_t **mem_ptr,
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
uint32_t ks_base_log, uint32_t lwe_ciphertext_count,
uint32_t message_modulus, uint32_t carry_modulus, COMPARISON_TYPE op_type,
bool is_signed, bool allocate_gpu_memory,
PBS_MS_REDUCTION_T noise_reduction_type);
uint64_t scratch_cuda_integer_scalar_comparison_64_async(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t big_lwe_dimension,
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
uint32_t lwe_ciphertext_count, uint32_t message_modulus,
uint32_t carry_modulus, PBS_TYPE pbs_type, COMPARISON_TYPE op_type,
CudaStreamsFFI streams, int8_t **mem_ptr,
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
uint32_t ks_base_log, uint32_t lwe_ciphertext_count,
uint32_t message_modulus, uint32_t carry_modulus, COMPARISON_TYPE op_type,
bool is_signed, bool allocate_gpu_memory,
PBS_MS_REDUCTION_T noise_reduction_type);
@@ -276,12 +272,10 @@ void cuda_boolean_bitop_inplace_64_async(
void *const *bsks, void *const *ksks);
uint64_t scratch_cuda_boolean_bitop_inplace_64_async(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t big_lwe_dimension,
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
uint32_t lwe_ciphertext_count, uint32_t message_modulus,
uint32_t carry_modulus, PBS_TYPE pbs_type, BITOP_TYPE op_type,
CudaStreamsFFI streams, int8_t **mem_ptr,
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
uint32_t ks_base_log, uint32_t lwe_ciphertext_count,
uint32_t message_modulus, uint32_t carry_modulus, BITOP_TYPE op_type,
bool is_unchecked, bool allocate_gpu_memory,
PBS_MS_REDUCTION_T noise_reduction_type);
@@ -289,11 +283,9 @@ void cleanup_cuda_boolean_bitop_inplace_64(CudaStreamsFFI streams,
int8_t **mem_ptr_void);
uint64_t scratch_cuda_boolean_bitnot_64_async(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t big_lwe_dimension,
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
CudaStreamsFFI streams, int8_t **mem_ptr,
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
uint32_t ks_base_log, uint32_t message_modulus, uint32_t carry_modulus,
uint32_t lwe_ciphertext_count, bool is_unchecked, bool allocate_gpu_memory,
PBS_MS_REDUCTION_T noise_reduction_type);
@@ -311,6 +303,20 @@ void cuda_bitnot_ciphertext_64(CudaStreamsFFI streams,
uint32_t param_message_modulus,
uint32_t param_carry_modulus);
uint64_t scratch_cuda_integer_bitop_inplace_64_async(
CudaStreamsFFI streams, int8_t **mem_ptr,
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
uint32_t ks_base_log, uint32_t lwe_ciphertext_count,
uint32_t message_modulus, uint32_t carry_modulus, BITOP_TYPE op_type,
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type);
uint64_t scratch_cuda_integer_scalar_bitop_inplace_64_async(
CudaStreamsFFI streams, int8_t **mem_ptr,
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
uint32_t ks_base_log, uint32_t lwe_ciphertext_count,
uint32_t message_modulus, uint32_t carry_modulus, BITOP_TYPE op_type,
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type);
void cuda_integer_bitop_inplace_64_async(
CudaStreamsFFI streams, CudaRadixCiphertextFFI *lwe_array_inout,
CudaRadixCiphertextFFI const *lwe_array_2, int8_t *mem_ptr,
@@ -322,38 +328,20 @@ void cuda_integer_scalar_bitop_inplace_64_async(
uint32_t num_clear_blocks, int8_t *mem_ptr, void *const *bsks,
void *const *ksks);
uint64_t scratch_cuda_integer_bitop_inplace_64_async(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t big_lwe_dimension,
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
uint32_t lwe_ciphertext_count, uint32_t message_modulus,
uint32_t carry_modulus, PBS_TYPE pbs_type, BITOP_TYPE op_type,
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type);
void cleanup_cuda_integer_bitop_inplace_64(CudaStreamsFFI streams,
int8_t **mem_ptr_void);
uint64_t scratch_cuda_integer_scalar_bitop_inplace_64_async(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t big_lwe_dimension,
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
uint32_t lwe_ciphertext_count, uint32_t message_modulus,
uint32_t carry_modulus, PBS_TYPE pbs_type, BITOP_TYPE op_type,
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type);
void cleanup_cuda_integer_scalar_bitop_inplace_64(CudaStreamsFFI streams,
int8_t **mem_ptr_void);
uint64_t scratch_cuda_cmux_64_async(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t big_lwe_dimension,
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
uint32_t lwe_ciphertext_count, uint32_t message_modulus,
uint32_t carry_modulus, PBS_TYPE pbs_type, bool allocate_gpu_memory,
PBS_MS_REDUCTION_T noise_reduction_type);
uint64_t scratch_cuda_cmux_64_async(CudaStreamsFFI streams, int8_t **mem_ptr,
CudaLweBootstrapKeyParamsFFI bsk_params,
uint32_t ks_level, uint32_t ks_base_log,
uint32_t lwe_ciphertext_count,
uint32_t message_modulus,
uint32_t carry_modulus,
bool allocate_gpu_memory,
PBS_MS_REDUCTION_T noise_reduction_type);
void cuda_cmux_64_async(CudaStreamsFFI streams,
CudaRadixCiphertextFFI *lwe_array_out,
@@ -365,12 +353,10 @@ void cuda_cmux_64_async(CudaStreamsFFI streams,
void cleanup_cuda_cmux_64(CudaStreamsFFI streams, int8_t **mem_ptr_void);
uint64_t scratch_cuda_scalar_rotate_64_inplace_async(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t big_lwe_dimension,
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
uint32_t num_blocks, uint32_t message_modulus, uint32_t carry_modulus,
PBS_TYPE pbs_type, SHIFT_OR_ROTATE_TYPE shift_type,
CudaStreamsFFI streams, int8_t **mem_ptr,
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
uint32_t ks_base_log, uint32_t num_blocks, uint32_t message_modulus,
uint32_t carry_modulus, SHIFT_OR_ROTATE_TYPE shift_type,
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type);
void cuda_scalar_rotate_64_inplace_async(CudaStreamsFFI streams,
@@ -382,21 +368,17 @@ void cleanup_cuda_scalar_rotate_64_inplace(CudaStreamsFFI streams,
int8_t **mem_ptr_void);
uint64_t scratch_cuda_propagate_single_carry_64_inplace_async(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t big_lwe_dimension,
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
uint32_t num_blocks, uint32_t message_modulus, uint32_t carry_modulus,
PBS_TYPE pbs_type, uint32_t requested_flag, bool allocate_gpu_memory,
CudaStreamsFFI streams, int8_t **mem_ptr,
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
uint32_t ks_base_log, uint32_t num_blocks, uint32_t message_modulus,
uint32_t carry_modulus, uint32_t requested_flag, bool allocate_gpu_memory,
PBS_MS_REDUCTION_T noise_reduction_type);
uint64_t scratch_cuda_add_and_propagate_single_carry_64_inplace_async(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t big_lwe_dimension,
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
uint32_t num_blocks, uint32_t message_modulus, uint32_t carry_modulus,
PBS_TYPE pbs_type, uint32_t requested_flag, bool allocate_gpu_memory,
CudaStreamsFFI streams, int8_t **mem_ptr,
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
uint32_t ks_base_log, uint32_t num_blocks, uint32_t message_modulus,
uint32_t carry_modulus, uint32_t requested_flag, bool allocate_gpu_memory,
PBS_MS_REDUCTION_T noise_reduction_type);
void cuda_propagate_single_carry_64_inplace_async(
@@ -418,12 +400,10 @@ void cleanup_cuda_add_and_propagate_single_carry_64_inplace(
CudaStreamsFFI streams, int8_t **mem_ptr_void);
uint64_t scratch_cuda_integer_overflowing_sub_64_inplace_async(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t big_lwe_dimension,
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
uint32_t num_blocks, uint32_t message_modulus, uint32_t carry_modulus,
PBS_TYPE pbs_type, uint32_t compute_overflow, bool allocate_gpu_memory,
CudaStreamsFFI streams, int8_t **mem_ptr,
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
uint32_t ks_base_log, uint32_t num_blocks, uint32_t message_modulus,
uint32_t carry_modulus, uint32_t compute_overflow, bool allocate_gpu_memory,
PBS_MS_REDUCTION_T noise_reduction_type);
void cuda_integer_overflowing_sub_64_inplace_async(
@@ -438,14 +418,12 @@ void cleanup_cuda_integer_overflowing_sub_64_inplace(CudaStreamsFFI streams,
int8_t **mem_ptr_void);
uint64_t scratch_cuda_partial_sum_ciphertexts_vec_64_async(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t lwe_dimension, uint32_t ks_level,
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
uint32_t grouping_factor, uint32_t num_blocks_in_radix,
CudaStreamsFFI streams, int8_t **mem_ptr,
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
uint32_t ks_base_log, uint32_t num_blocks_in_radix,
uint32_t max_num_radix_in_vec, uint32_t message_modulus,
uint32_t carry_modulus, PBS_TYPE pbs_type,
bool reduce_degrees_for_single_carry_propagation, bool allocate_gpu_memory,
PBS_MS_REDUCTION_T noise_reduction_type);
uint32_t carry_modulus, bool reduce_degrees_for_single_carry_propagation,
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type);
void cuda_partial_sum_ciphertexts_vec_64_async(
CudaStreamsFFI streams, CudaRadixCiphertextFFI *radix_lwe_out,
@@ -456,12 +434,11 @@ void cleanup_cuda_partial_sum_ciphertexts_vec_64(CudaStreamsFFI streams,
int8_t **mem_ptr_void);
uint64_t scratch_cuda_integer_scalar_mul_64_async(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t lwe_dimension, uint32_t ks_level,
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
uint32_t grouping_factor, uint32_t num_blocks, uint32_t message_modulus,
uint32_t carry_modulus, PBS_TYPE pbs_type, uint32_t num_scalar_bits,
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type);
CudaStreamsFFI streams, int8_t **mem_ptr,
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
uint32_t ks_base_log, uint32_t num_blocks, uint32_t message_modulus,
uint32_t carry_modulus, uint32_t num_scalar_bits, bool allocate_gpu_memory,
PBS_MS_REDUCTION_T noise_reduction_type);
void cuda_integer_scalar_mul_64_async(
CudaStreamsFFI streams, CudaRadixCiphertextFFI *lwe_array,
@@ -474,11 +451,9 @@ void cleanup_cuda_integer_scalar_mul_64(CudaStreamsFFI streams,
uint64_t scratch_cuda_integer_div_rem_64_async(
CudaStreamsFFI streams, bool is_signed, int8_t **mem_ptr,
uint32_t glwe_dimension, uint32_t polynomial_size,
uint32_t big_lwe_dimension, uint32_t small_lwe_dimension, uint32_t ks_level,
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
uint32_t grouping_factor, uint32_t num_blocks, uint32_t message_modulus,
uint32_t carry_modulus, PBS_TYPE pbs_type, bool allocate_gpu_memory,
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
uint32_t ks_base_log, uint32_t num_blocks, uint32_t message_modulus,
uint32_t carry_modulus, bool allocate_gpu_memory,
PBS_MS_REDUCTION_T noise_reduction_type);
void cuda_integer_div_rem_64_async(CudaStreamsFFI streams,
@@ -497,11 +472,9 @@ void cuda_integer_reverse_blocks_64_inplace_async(
uint64_t scratch_cuda_integer_abs_inplace_64_async(
CudaStreamsFFI streams, int8_t **mem_ptr, bool is_signed,
uint32_t glwe_dimension, uint32_t polynomial_size,
uint32_t big_lwe_dimension, uint32_t small_lwe_dimension, uint32_t ks_level,
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
uint32_t grouping_factor, uint32_t num_blocks, uint32_t message_modulus,
uint32_t carry_modulus, PBS_TYPE pbs_type, bool allocate_gpu_memory,
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
uint32_t ks_base_log, uint32_t num_blocks, uint32_t message_modulus,
uint32_t carry_modulus, bool allocate_gpu_memory,
PBS_MS_REDUCTION_T noise_reduction_type);
void cuda_integer_abs_inplace_64_async(CudaStreamsFFI streams,
@@ -513,12 +486,10 @@ void cleanup_cuda_integer_abs_inplace_64(CudaStreamsFFI streams,
int8_t **mem_ptr_void);
uint64_t scratch_cuda_integer_are_all_comparisons_block_true_64_async(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t big_lwe_dimension,
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
uint32_t num_radix_blocks, uint32_t message_modulus, uint32_t carry_modulus,
PBS_TYPE pbs_type, bool allocate_gpu_memory,
CudaStreamsFFI streams, int8_t **mem_ptr,
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
uint32_t ks_base_log, uint32_t num_radix_blocks, uint32_t message_modulus,
uint32_t carry_modulus, bool allocate_gpu_memory,
PBS_MS_REDUCTION_T noise_reduction_type);
void cuda_integer_are_all_comparisons_block_true_64_async(
@@ -530,12 +501,10 @@ void cleanup_cuda_integer_are_all_comparisons_block_true_64(
CudaStreamsFFI streams, int8_t **mem_ptr_void);
uint64_t scratch_cuda_integer_is_at_least_one_comparisons_block_true_64_async(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t big_lwe_dimension,
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
uint32_t num_radix_blocks, uint32_t message_modulus, uint32_t carry_modulus,
PBS_TYPE pbs_type, bool allocate_gpu_memory,
CudaStreamsFFI streams, int8_t **mem_ptr,
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
uint32_t ks_base_log, uint32_t num_radix_blocks, uint32_t message_modulus,
uint32_t carry_modulus, bool allocate_gpu_memory,
PBS_MS_REDUCTION_T noise_reduction_type);
void cuda_integer_is_at_least_one_comparisons_block_true_64_async(
@@ -559,13 +528,11 @@ void trim_radix_blocks_msb_64(CudaRadixCiphertextFFI *output,
CudaStreamsFFI streams);
uint64_t scratch_cuda_apply_noise_squashing_async(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t lwe_dimension,
uint32_t glwe_dimension, uint32_t polynomial_size,
uint32_t input_glwe_dimension, uint32_t input_polynomial_size,
uint32_t ks_level, uint32_t ks_base_log, uint32_t pbs_level,
uint32_t pbs_base_log, uint32_t grouping_factor, uint32_t num_radix_blocks,
uint32_t num_original_blocks, uint32_t message_modulus,
uint32_t carry_modulus, PBS_TYPE pbs_type, bool allocate_gpu_memory,
CudaStreamsFFI streams, int8_t **mem_ptr,
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t input_glwe_dimension,
uint32_t input_polynomial_size, uint32_t ks_level, uint32_t ks_base_log,
uint32_t num_radix_blocks, uint32_t num_original_blocks,
uint32_t message_modulus, uint32_t carry_modulus, bool allocate_gpu_memory,
PBS_MS_REDUCTION_T noise_reduction_type);
void cuda_apply_noise_squashing_async(
@@ -577,12 +544,10 @@ void cleanup_cuda_apply_noise_squashing(CudaStreamsFFI streams,
int8_t **mem_ptr_void);
uint64_t scratch_cuda_sub_and_propagate_single_carry_64_inplace_async(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t big_lwe_dimension,
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
uint32_t num_blocks, uint32_t message_modulus, uint32_t carry_modulus,
PBS_TYPE pbs_type, uint32_t requested_flag, bool allocate_gpu_memory,
CudaStreamsFFI streams, int8_t **mem_ptr,
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
uint32_t ks_base_log, uint32_t num_blocks, uint32_t message_modulus,
uint32_t carry_modulus, uint32_t requested_flag, bool allocate_gpu_memory,
PBS_MS_REDUCTION_T noise_reduction_type);
void cuda_sub_and_propagate_single_carry_64_inplace_async(
@@ -595,13 +560,11 @@ void cleanup_cuda_sub_and_propagate_single_carry_64_inplace(
CudaStreamsFFI streams, int8_t **mem_ptr_void);
uint64_t scratch_cuda_integer_unsigned_scalar_div_radix_64_async(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t lwe_dimension, uint32_t ks_level,
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
uint32_t grouping_factor, uint32_t num_blocks, uint32_t message_modulus,
uint32_t carry_modulus, PBS_TYPE pbs_type,
const CudaScalarDivisorFFI *scalar_divisor_ffi, bool allocate_gpu_memory,
PBS_MS_REDUCTION_T noise_reduction_type);
CudaStreamsFFI streams, int8_t **mem_ptr,
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
uint32_t ks_base_log, uint32_t num_blocks, uint32_t message_modulus,
uint32_t carry_modulus, const CudaScalarDivisorFFI *scalar_divisor_ffi,
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type);
void cuda_integer_unsigned_scalar_div_radix_64_async(
CudaStreamsFFI streams, CudaRadixCiphertextFFI *numerator_ct,
@@ -612,13 +575,11 @@ void cleanup_cuda_integer_unsigned_scalar_div_radix_64(CudaStreamsFFI streams,
int8_t **mem_ptr_void);
uint64_t scratch_cuda_integer_signed_scalar_div_radix_64_async(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t lwe_dimension, uint32_t ks_level,
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
uint32_t grouping_factor, uint32_t num_blocks, uint32_t message_modulus,
uint32_t carry_modulus, PBS_TYPE pbs_type,
const CudaScalarDivisorFFI *scalar_divisor_ffi, bool allocate_gpu_memory,
PBS_MS_REDUCTION_T noise_reduction_type);
CudaStreamsFFI streams, int8_t **mem_ptr,
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
uint32_t ks_base_log, uint32_t num_blocks, uint32_t message_modulus,
uint32_t carry_modulus, const CudaScalarDivisorFFI *scalar_divisor_ffi,
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type);
void cuda_integer_signed_scalar_div_radix_64_async(
CudaStreamsFFI streams, CudaRadixCiphertextFFI *numerator_ct,
@@ -629,12 +590,10 @@ void cleanup_cuda_integer_signed_scalar_div_radix_64(CudaStreamsFFI streams,
int8_t **mem_ptr_void);
uint64_t scratch_cuda_integer_unsigned_scalar_div_rem_radix_64_async(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t lwe_dimension, uint32_t ks_level,
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
uint32_t grouping_factor, uint32_t num_blocks, uint32_t message_modulus,
uint32_t carry_modulus, PBS_TYPE pbs_type,
const CudaScalarDivisorFFI *scalar_divisor_ffi,
CudaStreamsFFI streams, int8_t **mem_ptr,
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
uint32_t ks_base_log, uint32_t num_blocks, uint32_t message_modulus,
uint32_t carry_modulus, const CudaScalarDivisorFFI *scalar_divisor_ffi,
uint32_t const active_bits_divisor, bool allocate_gpu_memory,
PBS_MS_REDUCTION_T noise_reduction_type);
@@ -651,12 +610,10 @@ void cleanup_cuda_integer_unsigned_scalar_div_rem_radix_64(
CudaStreamsFFI streams, int8_t **mem_ptr_void);
uint64_t scratch_cuda_integer_signed_scalar_div_rem_radix_64_async(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t lwe_dimension, uint32_t ks_level,
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
uint32_t grouping_factor, uint32_t num_blocks, uint32_t message_modulus,
uint32_t carry_modulus, PBS_TYPE pbs_type,
const CudaScalarDivisorFFI *scalar_divisor_ffi,
CudaStreamsFFI streams, int8_t **mem_ptr,
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
uint32_t ks_base_log, uint32_t num_blocks, uint32_t message_modulus,
uint32_t carry_modulus, const CudaScalarDivisorFFI *scalar_divisor_ffi,
uint32_t const active_bits_divisor, bool allocate_gpu_memory,
PBS_MS_REDUCTION_T noise_reduction_type);
@@ -672,12 +629,11 @@ void cleanup_cuda_integer_signed_scalar_div_rem_radix_64(CudaStreamsFFI streams,
int8_t **mem_ptr_void);
uint64_t scratch_cuda_integer_count_of_consecutive_bits_64_async(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t lwe_dimension, uint32_t ks_level,
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
uint32_t grouping_factor, uint32_t num_blocks, uint32_t counter_num_blocks,
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
Direction direction, BitValue bit_value, bool allocate_gpu_memory,
CudaStreamsFFI streams, int8_t **mem_ptr,
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
uint32_t ks_base_log, uint32_t num_blocks, uint32_t counter_num_blocks,
uint32_t message_modulus, uint32_t carry_modulus, Direction direction,
BitValue bit_value, bool allocate_gpu_memory,
PBS_MS_REDUCTION_T noise_reduction_type);
void cuda_integer_count_of_consecutive_bits_64_async(
@@ -689,13 +645,12 @@ void cleanup_cuda_integer_count_of_consecutive_bits_64(CudaStreamsFFI streams,
int8_t **mem_ptr_void);
uint64_t scratch_cuda_integer_grouped_oprf_64_async(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t lwe_dimension, uint32_t ks_level,
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
uint32_t grouping_factor, uint32_t num_blocks_to_process,
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
bool allocate_gpu_memory, uint32_t message_bits_per_block,
uint32_t total_random_bits, PBS_MS_REDUCTION_T noise_reduction_type);
CudaStreamsFFI streams, int8_t **mem_ptr,
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
uint32_t ks_base_log, uint32_t num_blocks_to_process,
uint32_t message_modulus, uint32_t carry_modulus, bool allocate_gpu_memory,
uint32_t message_bits_per_block, uint32_t total_random_bits,
PBS_MS_REDUCTION_T noise_reduction_type);
void cuda_integer_grouped_oprf_64_async(CudaStreamsFFI streams,
CudaRadixCiphertextFFI *radix_lwe_out,
@@ -707,14 +662,12 @@ void cleanup_cuda_integer_grouped_oprf_64(CudaStreamsFFI streams,
int8_t **mem_ptr_void);
uint64_t scratch_cuda_integer_grouped_oprf_custom_range_64_async(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t lwe_dimension, uint32_t ks_level,
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
uint32_t grouping_factor, uint32_t num_blocks_intermediate,
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
bool allocate_gpu_memory, uint32_t message_bits_per_block,
uint32_t num_input_random_bits, uint32_t num_scalar_bits,
PBS_MS_REDUCTION_T noise_reduction_type);
CudaStreamsFFI streams, int8_t **mem_ptr,
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
uint32_t ks_base_log, uint32_t num_blocks_intermediate,
uint32_t message_modulus, uint32_t carry_modulus, bool allocate_gpu_memory,
uint32_t message_bits_per_block, uint32_t num_input_random_bits,
uint32_t num_scalar_bits, PBS_MS_REDUCTION_T noise_reduction_type);
void cuda_integer_grouped_oprf_custom_range_64_async(
CudaStreamsFFI streams, CudaRadixCiphertextFFI *radix_lwe_out,
@@ -727,11 +680,10 @@ void cleanup_cuda_integer_grouped_oprf_custom_range_64(CudaStreamsFFI streams,
int8_t **mem_ptr_void);
uint64_t scratch_cuda_integer_ilog2_64_async(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t lwe_dimension, uint32_t ks_level,
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
uint32_t grouping_factor, uint32_t message_modulus, uint32_t carry_modulus,
PBS_TYPE pbs_type, uint32_t input_num_blocks, uint32_t counter_num_blocks,
CudaStreamsFFI streams, int8_t **mem_ptr,
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
uint32_t ks_base_log, uint32_t message_modulus, uint32_t carry_modulus,
uint32_t input_num_blocks, uint32_t counter_num_blocks,
uint32_t num_bits_in_ciphertext, bool allocate_gpu_memory,
PBS_MS_REDUCTION_T noise_reduction_type);
@@ -747,14 +699,12 @@ void cleanup_cuda_integer_ilog2_64(CudaStreamsFFI streams,
int8_t **mem_ptr_void);
uint64_t scratch_cuda_unchecked_match_value_64_async(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t big_lwe_dimension,
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
uint32_t num_matches, uint32_t num_input_blocks,
CudaStreamsFFI streams, int8_t **mem_ptr,
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
uint32_t ks_base_log, uint32_t num_matches, uint32_t num_input_blocks,
uint32_t num_output_packed_blocks, uint32_t max_output_is_zero,
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type);
uint32_t message_modulus, uint32_t carry_modulus, bool allocate_gpu_memory,
PBS_MS_REDUCTION_T noise_reduction_type);
void cuda_unchecked_match_value_64_async(
CudaStreamsFFI streams, CudaRadixCiphertextFFI *lwe_array_out_result,
@@ -767,13 +717,11 @@ void cleanup_cuda_unchecked_match_value_64(CudaStreamsFFI streams,
int8_t **mem_ptr_void);
uint64_t scratch_cuda_cast_to_unsigned_64_async(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t big_lwe_dimension,
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
uint32_t num_input_blocks, uint32_t target_num_blocks, bool input_is_signed,
bool requires_full_propagate, uint32_t message_modulus,
uint32_t carry_modulus, PBS_TYPE pbs_type, bool allocate_gpu_memory,
CudaStreamsFFI streams, int8_t **mem_ptr,
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
uint32_t ks_base_log, uint32_t num_input_blocks, uint32_t target_num_blocks,
bool input_is_signed, bool requires_full_propagate,
uint32_t message_modulus, uint32_t carry_modulus, bool allocate_gpu_memory,
PBS_MS_REDUCTION_T noise_reduction_type);
void cuda_cast_to_unsigned_64_async(CudaStreamsFFI streams,
@@ -787,14 +735,12 @@ void cleanup_cuda_cast_to_unsigned_64(CudaStreamsFFI streams,
int8_t **mem_ptr_void);
uint64_t scratch_cuda_unchecked_match_value_or_64_async(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t big_lwe_dimension,
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
uint32_t num_matches, uint32_t num_input_blocks,
CudaStreamsFFI streams, int8_t **mem_ptr,
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
uint32_t ks_base_log, uint32_t num_matches, uint32_t num_input_blocks,
uint32_t num_match_packed_blocks, uint32_t num_final_blocks,
uint32_t max_output_is_zero, uint32_t message_modulus,
uint32_t carry_modulus, PBS_TYPE pbs_type, bool allocate_gpu_memory,
uint32_t carry_modulus, bool allocate_gpu_memory,
PBS_MS_REDUCTION_T noise_reduction_type);
void cuda_unchecked_match_value_or_64_async(
@@ -808,12 +754,10 @@ void cleanup_cuda_unchecked_match_value_or_64(CudaStreamsFFI streams,
int8_t **mem_ptr_void);
uint64_t scratch_cuda_unchecked_contains_64_async(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t big_lwe_dimension,
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
uint32_t num_inputs, uint32_t num_blocks, uint32_t message_modulus,
uint32_t carry_modulus, PBS_TYPE pbs_type, bool allocate_gpu_memory,
CudaStreamsFFI streams, int8_t **mem_ptr,
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
uint32_t ks_base_log, uint32_t num_inputs, uint32_t num_blocks,
uint32_t message_modulus, uint32_t carry_modulus, bool allocate_gpu_memory,
PBS_MS_REDUCTION_T noise_reduction_type);
void cuda_unchecked_contains_64_async(CudaStreamsFFI streams,
@@ -828,12 +772,10 @@ void cleanup_cuda_unchecked_contains_64(CudaStreamsFFI streams,
int8_t **mem_ptr_void);
uint64_t scratch_cuda_unchecked_contains_clear_64_async(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t big_lwe_dimension,
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
uint32_t num_inputs, uint32_t num_blocks, uint32_t message_modulus,
uint32_t carry_modulus, PBS_TYPE pbs_type, bool allocate_gpu_memory,
CudaStreamsFFI streams, int8_t **mem_ptr,
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
uint32_t ks_base_log, uint32_t num_inputs, uint32_t num_blocks,
uint32_t message_modulus, uint32_t carry_modulus, bool allocate_gpu_memory,
PBS_MS_REDUCTION_T noise_reduction_type);
void cuda_unchecked_contains_clear_64_async(
@@ -846,12 +788,10 @@ void cleanup_cuda_unchecked_contains_clear_64(CudaStreamsFFI streams,
int8_t **mem_ptr_void);
uint64_t scratch_cuda_unchecked_is_in_clears_64_async(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t big_lwe_dimension,
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
uint32_t num_clears, uint32_t num_blocks, uint32_t message_modulus,
uint32_t carry_modulus, PBS_TYPE pbs_type, bool allocate_gpu_memory,
CudaStreamsFFI streams, int8_t **mem_ptr,
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
uint32_t ks_base_log, uint32_t num_clears, uint32_t num_blocks,
uint32_t message_modulus, uint32_t carry_modulus, bool allocate_gpu_memory,
PBS_MS_REDUCTION_T noise_reduction_type);
void cuda_unchecked_is_in_clears_64_async(CudaStreamsFFI streams,
@@ -866,12 +806,10 @@ void cleanup_cuda_unchecked_is_in_clears_64(CudaStreamsFFI streams,
int8_t **mem_ptr_void);
uint64_t scratch_cuda_unchecked_index_in_clears_64_async(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t big_lwe_dimension,
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
uint32_t num_clears, uint32_t num_blocks, uint32_t num_blocks_index,
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
CudaStreamsFFI streams, int8_t **mem_ptr,
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
uint32_t ks_base_log, uint32_t num_clears, uint32_t num_blocks,
uint32_t num_blocks_index, uint32_t message_modulus, uint32_t carry_modulus,
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type);
void cuda_unchecked_index_in_clears_64_async(
@@ -885,12 +823,10 @@ void cleanup_cuda_unchecked_index_in_clears_64(CudaStreamsFFI streams,
int8_t **mem_ptr_void);
uint64_t scratch_cuda_unchecked_first_index_in_clears_64_async(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t big_lwe_dimension,
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
uint32_t num_unique, uint32_t num_blocks, uint32_t num_blocks_index,
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
CudaStreamsFFI streams, int8_t **mem_ptr,
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
uint32_t ks_base_log, uint32_t num_unique, uint32_t num_blocks,
uint32_t num_blocks_index, uint32_t message_modulus, uint32_t carry_modulus,
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type);
void cuda_unchecked_first_index_in_clears_64_async(
@@ -908,12 +844,10 @@ void cleanup_cuda_unchecked_first_index_in_clears_64(CudaStreamsFFI streams,
int8_t **mem_ptr_void);
uint64_t scratch_cuda_unchecked_first_index_of_clear_64_async(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t big_lwe_dimension,
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
uint32_t num_inputs, uint32_t num_blocks, uint32_t num_blocks_index,
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
CudaStreamsFFI streams, int8_t **mem_ptr,
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
uint32_t ks_base_log, uint32_t num_inputs, uint32_t num_blocks,
uint32_t num_blocks_index, uint32_t message_modulus, uint32_t carry_modulus,
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type);
void cuda_unchecked_first_index_of_clear_64_async(
@@ -927,12 +861,10 @@ void cleanup_cuda_unchecked_first_index_of_clear_64(CudaStreamsFFI streams,
int8_t **mem_ptr_void);
uint64_t scratch_cuda_unchecked_first_index_of_64_async(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t big_lwe_dimension,
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
uint32_t num_inputs, uint32_t num_blocks, uint32_t num_blocks_index,
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
CudaStreamsFFI streams, int8_t **mem_ptr,
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
uint32_t ks_base_log, uint32_t num_inputs, uint32_t num_blocks,
uint32_t num_blocks_index, uint32_t message_modulus, uint32_t carry_modulus,
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type);
void cuda_unchecked_first_index_of_64_async(
@@ -946,12 +878,10 @@ void cleanup_cuda_unchecked_first_index_of_64(CudaStreamsFFI streams,
int8_t **mem_ptr_void);
uint64_t scratch_cuda_unchecked_index_of_64_async(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t big_lwe_dimension,
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
uint32_t num_inputs, uint32_t num_blocks, uint32_t num_blocks_index,
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
CudaStreamsFFI streams, int8_t **mem_ptr,
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
uint32_t ks_base_log, uint32_t num_inputs, uint32_t num_blocks,
uint32_t num_blocks_index, uint32_t message_modulus, uint32_t carry_modulus,
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type);
void cuda_unchecked_index_of_64_async(CudaStreamsFFI streams,
@@ -967,12 +897,10 @@ void cleanup_cuda_unchecked_index_of_64(CudaStreamsFFI streams,
int8_t **mem_ptr_void);
uint64_t scratch_cuda_unchecked_index_of_clear_64_async(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t big_lwe_dimension,
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
uint32_t num_inputs, uint32_t num_blocks, uint32_t num_blocks_index,
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
CudaStreamsFFI streams, int8_t **mem_ptr,
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
uint32_t ks_base_log, uint32_t num_inputs, uint32_t num_blocks,
uint32_t num_blocks_index, uint32_t message_modulus, uint32_t carry_modulus,
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type);
void cuda_unchecked_index_of_clear_64_async(
@@ -987,12 +915,10 @@ void cleanup_cuda_unchecked_index_of_clear_64(CudaStreamsFFI streams,
int8_t **mem_ptr_void);
uint64_t scratch_cuda_unchecked_all_eq_slices_64_async(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t big_lwe_dimension,
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
uint32_t num_inputs, uint32_t num_blocks, uint32_t message_modulus,
uint32_t carry_modulus, PBS_TYPE pbs_type, bool allocate_gpu_memory,
CudaStreamsFFI streams, int8_t **mem_ptr,
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
uint32_t ks_base_log, uint32_t num_inputs, uint32_t num_blocks,
uint32_t message_modulus, uint32_t carry_modulus, bool allocate_gpu_memory,
PBS_MS_REDUCTION_T noise_reduction_type);
void cuda_unchecked_all_eq_slices_64_async(
@@ -1005,12 +931,10 @@ void cleanup_cuda_unchecked_all_eq_slices_64(CudaStreamsFFI streams,
int8_t **mem_ptr_void);
uint64_t scratch_cuda_unchecked_contains_sub_slice_64_async(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t big_lwe_dimension,
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
uint32_t num_lhs, uint32_t num_rhs, uint32_t num_blocks,
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
CudaStreamsFFI streams, int8_t **mem_ptr,
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
uint32_t ks_base_log, uint32_t num_lhs, uint32_t num_rhs,
uint32_t num_blocks, uint32_t message_modulus, uint32_t carry_modulus,
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type);
void cuda_unchecked_contains_sub_slice_64_async(
@@ -1023,12 +947,10 @@ void cleanup_cuda_unchecked_contains_sub_slice_64(CudaStreamsFFI streams,
int8_t **mem_ptr_void);
uint64_t scratch_cuda_cast_to_signed_64_async(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t lwe_dimension, uint32_t ks_level,
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
uint32_t grouping_factor, uint32_t num_input_blocks,
uint32_t target_num_blocks, uint32_t message_modulus,
uint32_t carry_modulus, PBS_TYPE pbs_type, bool input_is_signed,
CudaStreamsFFI streams, int8_t **mem_ptr,
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
uint32_t ks_base_log, uint32_t num_input_blocks, uint32_t target_num_blocks,
uint32_t message_modulus, uint32_t carry_modulus, bool input_is_signed,
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type);
void cuda_cast_to_signed_64_async(CudaStreamsFFI streams,

View File

@@ -345,6 +345,21 @@ struct int_radix_params {
message_modulus(message_modulus), carry_modulus(carry_modulus),
noise_reduction_type(noise_reduction_type){};
int_radix_params(CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
uint32_t ks_base_log, uint32_t message_modulus,
uint32_t carry_modulus,
PBS_MS_REDUCTION_T noise_reduction_type)
: pbs_type((PBS_TYPE)bsk_params.pbs_type),
glwe_dimension(bsk_params.glwe_dimension),
polynomial_size(bsk_params.polynomial_size),
big_lwe_dimension(bsk_params.big_lwe_dimension),
small_lwe_dimension(bsk_params.input_lwe_dimension), ks_level(ks_level),
ks_base_log(ks_base_log), pbs_level(bsk_params.level_count),
pbs_base_log(bsk_params.base_log),
grouping_factor(bsk_params.grouping_factor),
message_modulus(message_modulus), carry_modulus(carry_modulus),
noise_reduction_type(noise_reduction_type){};
int_radix_params() = default;
void print() {

View File

@@ -5,12 +5,11 @@
extern "C" {
uint64_t scratch_cuda_kreyvium_generate_keystream_64_async(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t lwe_dimension, uint32_t ks_level,
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
uint32_t grouping_factor, uint32_t message_modulus, uint32_t carry_modulus,
PBS_TYPE pbs_type, bool allocate_gpu_memory,
PBS_MS_REDUCTION_T noise_reduction_type, uint32_t num_inputs);
CudaStreamsFFI streams, int8_t **mem_ptr,
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
uint32_t ks_base_log, uint32_t message_modulus, uint32_t carry_modulus,
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type,
uint32_t num_inputs);
void cuda_kreyvium_generate_keystream_64_async(
CudaStreamsFFI streams, CudaRadixCiphertextFFI *keystream_output,

View File

@@ -5,12 +5,11 @@
extern "C" {
uint64_t scratch_cuda_trivium_generate_keystream_64_async(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t lwe_dimension, uint32_t ks_level,
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
uint32_t grouping_factor, uint32_t message_modulus, uint32_t carry_modulus,
PBS_TYPE pbs_type, bool allocate_gpu_memory,
PBS_MS_REDUCTION_T noise_reduction_type, uint32_t num_inputs);
CudaStreamsFFI streams, int8_t **mem_ptr,
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
uint32_t ks_base_log, uint32_t message_modulus, uint32_t carry_modulus,
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type,
uint32_t num_inputs);
void cuda_trivium_generate_keystream_64_async(
CudaStreamsFFI streams, CudaRadixCiphertextFFI *keystream_output,

View File

@@ -2,19 +2,14 @@
#include "aes.cuh"
uint64_t scratch_cuda_integer_aes_ctr_encrypt_64_async(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t lwe_dimension, uint32_t ks_level,
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
uint32_t grouping_factor, uint32_t message_modulus, uint32_t carry_modulus,
PBS_TYPE pbs_type, bool allocate_gpu_memory,
PBS_MS_REDUCTION_T noise_reduction_type, uint32_t num_aes_inputs,
uint32_t sbox_parallelism) {
CudaStreamsFFI streams, int8_t **mem_ptr,
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
uint32_t ks_base_log, uint32_t message_modulus, uint32_t carry_modulus,
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type,
uint32_t num_aes_inputs, uint32_t sbox_parallelism) {
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
glwe_dimension * polynomial_size, lwe_dimension,
ks_level, ks_base_log, pbs_level, pbs_base_log,
grouping_factor, message_modulus, carry_modulus,
noise_reduction_type);
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
carry_modulus, noise_reduction_type);
return scratch_cuda_integer_aes_encrypt<uint64_t>(
CudaStreams(streams), (int_aes_encrypt_buffer<uint64_t> **)mem_ptr,
@@ -22,19 +17,14 @@ uint64_t scratch_cuda_integer_aes_ctr_encrypt_64_async(
}
uint64_t scratch_cuda_integer_aes_ctr_256_encrypt_64_async(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t lwe_dimension, uint32_t ks_level,
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
uint32_t grouping_factor, uint32_t message_modulus, uint32_t carry_modulus,
PBS_TYPE pbs_type, bool allocate_gpu_memory,
PBS_MS_REDUCTION_T noise_reduction_type, uint32_t num_aes_inputs,
uint32_t sbox_parallelism) {
CudaStreamsFFI streams, int8_t **mem_ptr,
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
uint32_t ks_base_log, uint32_t message_modulus, uint32_t carry_modulus,
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type,
uint32_t num_aes_inputs, uint32_t sbox_parallelism) {
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
glwe_dimension * polynomial_size, lwe_dimension,
ks_level, ks_base_log, pbs_level, pbs_base_log,
grouping_factor, message_modulus, carry_modulus,
noise_reduction_type);
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
carry_modulus, noise_reduction_type);
return scratch_cuda_integer_aes_encrypt<uint64_t>(
CudaStreams(streams), (int_aes_encrypt_buffer<uint64_t> **)mem_ptr,
@@ -78,18 +68,13 @@ void cleanup_cuda_integer_aes_ctr_256_encrypt_64(CudaStreamsFFI streams,
}
uint64_t scratch_cuda_integer_key_expansion_64_async(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t lwe_dimension, uint32_t ks_level,
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
uint32_t grouping_factor, uint32_t message_modulus, uint32_t carry_modulus,
PBS_TYPE pbs_type, bool allocate_gpu_memory,
PBS_MS_REDUCTION_T noise_reduction_type) {
CudaStreamsFFI streams, int8_t **mem_ptr,
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
uint32_t ks_base_log, uint32_t message_modulus, uint32_t carry_modulus,
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type) {
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
glwe_dimension * polynomial_size, lwe_dimension,
ks_level, ks_base_log, pbs_level, pbs_base_log,
grouping_factor, message_modulus, carry_modulus,
noise_reduction_type);
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
carry_modulus, noise_reduction_type);
return scratch_cuda_integer_key_expansion<uint64_t>(
CudaStreams(streams), (int_key_expansion_buffer<uint64_t> **)mem_ptr,

View File

@@ -14,18 +14,13 @@ void cuda_integer_aes_ctr_256_encrypt_64_async(
}
uint64_t scratch_cuda_integer_key_expansion_256_64_async(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t lwe_dimension, uint32_t ks_level,
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
uint32_t grouping_factor, uint32_t message_modulus, uint32_t carry_modulus,
PBS_TYPE pbs_type, bool allocate_gpu_memory,
PBS_MS_REDUCTION_T noise_reduction_type) {
CudaStreamsFFI streams, int8_t **mem_ptr,
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
uint32_t ks_base_log, uint32_t message_modulus, uint32_t carry_modulus,
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type) {
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
glwe_dimension * polynomial_size, lwe_dimension,
ks_level, ks_base_log, pbs_level, pbs_base_log,
grouping_factor, message_modulus, carry_modulus,
noise_reduction_type);
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
carry_modulus, noise_reduction_type);
return scratch_cuda_integer_key_expansion_256<uint64_t>(
CudaStreams(streams), (int_key_expansion_256_buffer<uint64_t> **)mem_ptr,

View File

@@ -2,17 +2,12 @@
uint64_t scratch_cuda_integer_abs_inplace_64_async(
CudaStreamsFFI streams, int8_t **mem_ptr, bool is_signed,
uint32_t glwe_dimension, uint32_t polynomial_size,
uint32_t big_lwe_dimension, uint32_t small_lwe_dimension, uint32_t ks_level,
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
uint32_t grouping_factor, uint32_t num_blocks, uint32_t message_modulus,
uint32_t carry_modulus, PBS_TYPE pbs_type, bool allocate_gpu_memory,
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
uint32_t ks_base_log, uint32_t num_blocks, uint32_t message_modulus,
uint32_t carry_modulus, bool allocate_gpu_memory,
PBS_MS_REDUCTION_T noise_reduction_type) {
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
big_lwe_dimension, small_lwe_dimension, ks_level,
ks_base_log, pbs_level, pbs_base_log, grouping_factor,
message_modulus, carry_modulus, noise_reduction_type);
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
carry_modulus, noise_reduction_type);
return scratch_cuda_integer_abs<uint64_t>(
CudaStreams(streams), (int_abs_buffer<uint64_t> **)mem_ptr, is_signed,

View File

@@ -11,19 +11,14 @@ void cuda_boolean_bitop_inplace_64_async(
}
uint64_t scratch_cuda_boolean_bitop_inplace_64_async(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t big_lwe_dimension,
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
uint32_t lwe_ciphertext_count, uint32_t message_modulus,
uint32_t carry_modulus, PBS_TYPE pbs_type, BITOP_TYPE op_type,
CudaStreamsFFI streams, int8_t **mem_ptr,
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
uint32_t ks_base_log, uint32_t lwe_ciphertext_count,
uint32_t message_modulus, uint32_t carry_modulus, BITOP_TYPE op_type,
bool is_unchecked, bool allocate_gpu_memory,
PBS_MS_REDUCTION_T noise_reduction_type) {
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
big_lwe_dimension, small_lwe_dimension, ks_level,
ks_base_log, pbs_level, pbs_base_log, grouping_factor,
message_modulus, carry_modulus, noise_reduction_type);
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
carry_modulus, noise_reduction_type);
return scratch_cuda_boolean_bitop<uint64_t>(
CudaStreams(streams), (boolean_bitop_buffer<uint64_t> **)mem_ptr,
@@ -41,18 +36,13 @@ void cleanup_cuda_boolean_bitop_inplace_64(CudaStreamsFFI streams,
}
uint64_t scratch_cuda_boolean_bitnot_64_async(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t big_lwe_dimension,
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
CudaStreamsFFI streams, int8_t **mem_ptr,
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
uint32_t ks_base_log, uint32_t message_modulus, uint32_t carry_modulus,
uint32_t lwe_ciphertext_count, bool is_unchecked, bool allocate_gpu_memory,
PBS_MS_REDUCTION_T noise_reduction_type) {
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
big_lwe_dimension, small_lwe_dimension, ks_level,
ks_base_log, pbs_level, pbs_base_log, grouping_factor,
message_modulus, carry_modulus, noise_reduction_type);
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
carry_modulus, noise_reduction_type);
return scratch_cuda_boolean_bitnot<uint64_t>(
CudaStreams(streams), (boolean_bitnot_buffer<uint64_t> **)mem_ptr, params,
@@ -78,6 +68,34 @@ void cleanup_cuda_boolean_bitnot_64(CudaStreamsFFI streams,
*mem_ptr_void = nullptr;
}
uint64_t scratch_cuda_integer_bitop_inplace_64_async(
CudaStreamsFFI streams, int8_t **mem_ptr,
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
uint32_t ks_base_log, uint32_t lwe_ciphertext_count,
uint32_t message_modulus, uint32_t carry_modulus, BITOP_TYPE op_type,
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type) {
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
carry_modulus, noise_reduction_type);
return scratch_cuda_bitop<uint64_t>(
CudaStreams(streams), (int_bitop_buffer<uint64_t> **)mem_ptr,
lwe_ciphertext_count, params, op_type, allocate_gpu_memory);
}
uint64_t scratch_cuda_integer_scalar_bitop_inplace_64_async(
CudaStreamsFFI streams, int8_t **mem_ptr,
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
uint32_t ks_base_log, uint32_t lwe_ciphertext_count,
uint32_t message_modulus, uint32_t carry_modulus, BITOP_TYPE op_type,
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type) {
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
carry_modulus, noise_reduction_type);
return scratch_cuda_bitop<uint64_t>(
CudaStreams(streams), (int_bitop_buffer<uint64_t> **)mem_ptr,
lwe_ciphertext_count, params, op_type, allocate_gpu_memory);
}
void cuda_bitnot_ciphertext_64(CudaStreamsFFI streams,
CudaRadixCiphertextFFI *radix_ciphertext,
uint32_t ct_message_modulus,
@@ -99,25 +117,6 @@ void cuda_integer_bitop_inplace_64_async(
(uint64_t **)(ksks));
}
uint64_t scratch_cuda_integer_bitop_inplace_64_async(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t big_lwe_dimension,
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
uint32_t lwe_ciphertext_count, uint32_t message_modulus,
uint32_t carry_modulus, PBS_TYPE pbs_type, BITOP_TYPE op_type,
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type) {
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
big_lwe_dimension, small_lwe_dimension, ks_level,
ks_base_log, pbs_level, pbs_base_log, grouping_factor,
message_modulus, carry_modulus, noise_reduction_type);
return scratch_cuda_bitop<uint64_t>(
CudaStreams(streams), (int_bitop_buffer<uint64_t> **)mem_ptr,
lwe_ciphertext_count, params, op_type, allocate_gpu_memory);
}
void cleanup_cuda_integer_bitop_inplace_64(CudaStreamsFFI streams,
int8_t **mem_ptr_void) {
@@ -128,25 +127,6 @@ void cleanup_cuda_integer_bitop_inplace_64(CudaStreamsFFI streams,
*mem_ptr_void = nullptr;
}
uint64_t scratch_cuda_integer_scalar_bitop_inplace_64_async(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t big_lwe_dimension,
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
uint32_t lwe_ciphertext_count, uint32_t message_modulus,
uint32_t carry_modulus, PBS_TYPE pbs_type, BITOP_TYPE op_type,
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type) {
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
big_lwe_dimension, small_lwe_dimension, ks_level,
ks_base_log, pbs_level, pbs_base_log, grouping_factor,
message_modulus, carry_modulus, noise_reduction_type);
return scratch_cuda_bitop<uint64_t>(
CudaStreams(streams), (int_bitop_buffer<uint64_t> **)mem_ptr,
lwe_ciphertext_count, params, op_type, allocate_gpu_memory);
}
void cleanup_cuda_integer_scalar_bitop_inplace_64(CudaStreamsFFI streams,
int8_t **mem_ptr_void) {

View File

@@ -34,19 +34,14 @@ void trim_radix_blocks_msb_64(CudaRadixCiphertextFFI *output,
}
uint64_t scratch_cuda_cast_to_unsigned_64_async(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t big_lwe_dimension,
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
uint32_t num_input_blocks, uint32_t target_num_blocks, bool input_is_signed,
bool requires_full_propagate, uint32_t message_modulus,
uint32_t carry_modulus, PBS_TYPE pbs_type, bool allocate_gpu_memory,
CudaStreamsFFI streams, int8_t **mem_ptr,
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
uint32_t ks_base_log, uint32_t num_input_blocks, uint32_t target_num_blocks,
bool input_is_signed, bool requires_full_propagate,
uint32_t message_modulus, uint32_t carry_modulus, bool allocate_gpu_memory,
PBS_MS_REDUCTION_T noise_reduction_type) {
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
big_lwe_dimension, small_lwe_dimension, ks_level,
ks_base_log, pbs_level, pbs_base_log, grouping_factor,
message_modulus, carry_modulus, noise_reduction_type);
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
carry_modulus, noise_reduction_type);
return scratch_cuda_cast_to_unsigned<uint64_t>(
CudaStreams(streams), (int_cast_to_unsigned_buffer<uint64_t> **)mem_ptr,
@@ -80,19 +75,13 @@ void cleanup_cuda_cast_to_unsigned_64(CudaStreamsFFI streams,
}
uint64_t scratch_cuda_cast_to_signed_64_async(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t lwe_dimension, uint32_t ks_level,
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
uint32_t grouping_factor, uint32_t num_input_blocks,
uint32_t target_num_blocks, uint32_t message_modulus,
uint32_t carry_modulus, PBS_TYPE pbs_type, bool input_is_signed,
CudaStreamsFFI streams, int8_t **mem_ptr,
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
uint32_t ks_base_log, uint32_t num_input_blocks, uint32_t target_num_blocks,
uint32_t message_modulus, uint32_t carry_modulus, bool input_is_signed,
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type) {
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
glwe_dimension * polynomial_size, lwe_dimension,
ks_level, ks_base_log, pbs_level, pbs_base_log,
grouping_factor, message_modulus, carry_modulus,
noise_reduction_type);
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
carry_modulus, noise_reduction_type);
return scratch_cuda_cast_to_signed<uint64_t>(
CudaStreams(streams), (int_cast_to_signed_buffer<uint64_t> **)mem_ptr,

View File

@@ -1,18 +1,16 @@
#include "integer/cmux.cuh"
uint64_t scratch_cuda_cmux_64_async(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t big_lwe_dimension,
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
uint32_t lwe_ciphertext_count, uint32_t message_modulus,
uint32_t carry_modulus, PBS_TYPE pbs_type, bool allocate_gpu_memory,
PBS_MS_REDUCTION_T noise_reduction_type) {
uint64_t scratch_cuda_cmux_64_async(CudaStreamsFFI streams, int8_t **mem_ptr,
CudaLweBootstrapKeyParamsFFI bsk_params,
uint32_t ks_level, uint32_t ks_base_log,
uint32_t lwe_ciphertext_count,
uint32_t message_modulus,
uint32_t carry_modulus,
bool allocate_gpu_memory,
PBS_MS_REDUCTION_T noise_reduction_type) {
PUSH_RANGE("scratch cmux")
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
big_lwe_dimension, small_lwe_dimension, ks_level,
ks_base_log, pbs_level, pbs_base_log, grouping_factor,
message_modulus, carry_modulus, noise_reduction_type);
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
carry_modulus, noise_reduction_type);
std::function<uint64_t(uint64_t)> predicate_lut_f =
[](uint64_t x) -> uint64_t { return x == 1; };

View File

@@ -1,18 +1,14 @@
#include "integer/comparison.cuh"
uint64_t scratch_cuda_integer_comparison_64_async(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t big_lwe_dimension,
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
uint32_t num_radix_blocks, uint32_t message_modulus, uint32_t carry_modulus,
PBS_TYPE pbs_type, COMPARISON_TYPE op_type, bool is_signed,
CudaStreamsFFI streams, int8_t **mem_ptr,
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
uint32_t ks_base_log, uint32_t num_radix_blocks, uint32_t message_modulus,
uint32_t carry_modulus, COMPARISON_TYPE op_type, bool is_signed,
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type) {
PUSH_RANGE("scratch comparison")
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
big_lwe_dimension, small_lwe_dimension, ks_level,
ks_base_log, pbs_level, pbs_base_log, grouping_factor,
message_modulus, carry_modulus, noise_reduction_type);
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
carry_modulus, noise_reduction_type);
uint64_t size_tracker = 0;
switch (op_type) {
@@ -38,18 +34,14 @@ uint64_t scratch_cuda_integer_comparison_64_async(
}
uint64_t scratch_cuda_integer_scalar_comparison_64_async(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t big_lwe_dimension,
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
uint32_t num_radix_blocks, uint32_t message_modulus, uint32_t carry_modulus,
PBS_TYPE pbs_type, COMPARISON_TYPE op_type, bool is_signed,
CudaStreamsFFI streams, int8_t **mem_ptr,
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
uint32_t ks_base_log, uint32_t num_radix_blocks, uint32_t message_modulus,
uint32_t carry_modulus, COMPARISON_TYPE op_type, bool is_signed,
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type) {
PUSH_RANGE("scratch scalar comparison")
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
big_lwe_dimension, small_lwe_dimension, ks_level,
ks_base_log, pbs_level, pbs_base_log, grouping_factor,
message_modulus, carry_modulus, noise_reduction_type);
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
carry_modulus, noise_reduction_type);
uint64_t size_tracker = 0;
switch (op_type) {
@@ -151,18 +143,13 @@ void cleanup_cuda_integer_scalar_comparison_64(CudaStreamsFFI streams,
}
uint64_t scratch_cuda_integer_are_all_comparisons_block_true_64_async(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t big_lwe_dimension,
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
uint32_t num_radix_blocks, uint32_t message_modulus, uint32_t carry_modulus,
PBS_TYPE pbs_type, bool allocate_gpu_memory,
CudaStreamsFFI streams, int8_t **mem_ptr,
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
uint32_t ks_base_log, uint32_t num_radix_blocks, uint32_t message_modulus,
uint32_t carry_modulus, bool allocate_gpu_memory,
PBS_MS_REDUCTION_T noise_reduction_type) {
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
big_lwe_dimension, small_lwe_dimension, ks_level,
ks_base_log, pbs_level, pbs_base_log, grouping_factor,
message_modulus, carry_modulus, noise_reduction_type);
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
carry_modulus, noise_reduction_type);
return scratch_cuda_comparison_check<uint64_t>(
CudaStreams(streams), (int_comparison_buffer<uint64_t> **)mem_ptr,
@@ -196,18 +183,13 @@ void cleanup_cuda_integer_are_all_comparisons_block_true_64(
}
uint64_t scratch_cuda_integer_is_at_least_one_comparisons_block_true_64_async(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t big_lwe_dimension,
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
uint32_t num_radix_blocks, uint32_t message_modulus, uint32_t carry_modulus,
PBS_TYPE pbs_type, bool allocate_gpu_memory,
CudaStreamsFFI streams, int8_t **mem_ptr,
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
uint32_t ks_base_log, uint32_t num_radix_blocks, uint32_t message_modulus,
uint32_t carry_modulus, bool allocate_gpu_memory,
PBS_MS_REDUCTION_T noise_reduction_type) {
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
big_lwe_dimension, small_lwe_dimension, ks_level,
ks_base_log, pbs_level, pbs_base_log, grouping_factor,
message_modulus, carry_modulus, noise_reduction_type);
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
carry_modulus, noise_reduction_type);
return scratch_cuda_comparison_check<uint64_t>(
CudaStreams(streams), (int_comparison_buffer<uint64_t> **)mem_ptr,

View File

@@ -23,22 +23,24 @@ uint64_t scratch_cuda_integer_decompress_radix_ciphertext_64_async(
CudaStreamsFFI streams, int8_t **mem_ptr,
uint32_t encryption_glwe_dimension, uint32_t encryption_polynomial_size,
uint32_t compression_glwe_dimension, uint32_t compression_polynomial_size,
uint32_t lwe_dimension, uint32_t pbs_level, uint32_t pbs_base_log,
uint32_t grouping_factor, uint32_t num_blocks_to_decompress,
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type) {
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t num_blocks_to_decompress,
uint32_t message_modulus, uint32_t carry_modulus, bool allocate_gpu_memory,
PBS_MS_REDUCTION_T noise_reduction_type) {
// Decompression doesn't keyswitch, so big and small dimensions are the same
int_radix_params encryption_params(
pbs_type, encryption_glwe_dimension, encryption_polynomial_size,
lwe_dimension, lwe_dimension, 0, 0, pbs_level, pbs_base_log,
grouping_factor, message_modulus, carry_modulus, noise_reduction_type);
(PBS_TYPE)bsk_params.pbs_type, encryption_glwe_dimension,
encryption_polynomial_size, bsk_params.big_lwe_dimension,
bsk_params.big_lwe_dimension, 0, 0, bsk_params.level_count,
bsk_params.base_log, bsk_params.grouping_factor, message_modulus,
carry_modulus, noise_reduction_type);
int_radix_params compression_params(
pbs_type, compression_glwe_dimension, compression_polynomial_size,
lwe_dimension, compression_glwe_dimension * compression_polynomial_size,
0, 0, pbs_level, pbs_base_log, grouping_factor, message_modulus,
carry_modulus, noise_reduction_type);
(PBS_TYPE)bsk_params.pbs_type, compression_glwe_dimension,
compression_polynomial_size, bsk_params.big_lwe_dimension,
compression_glwe_dimension * compression_polynomial_size, 0, 0,
bsk_params.level_count, bsk_params.base_log, bsk_params.grouping_factor,
message_modulus, carry_modulus, noise_reduction_type);
return scratch_cuda_integer_decompress_radix_ciphertext<uint64_t>(
CudaStreams(streams), (int_decompression<uint64_t> **)mem_ptr,

View File

@@ -2,17 +2,13 @@
uint64_t scratch_cuda_integer_div_rem_64_async(
CudaStreamsFFI streams, bool is_signed, int8_t **mem_ptr,
uint32_t glwe_dimension, uint32_t polynomial_size,
uint32_t big_lwe_dimension, uint32_t small_lwe_dimension, uint32_t ks_level,
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
uint32_t grouping_factor, uint32_t num_blocks, uint32_t message_modulus,
uint32_t carry_modulus, PBS_TYPE pbs_type, bool allocate_gpu_memory,
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
uint32_t ks_base_log, uint32_t num_blocks, uint32_t message_modulus,
uint32_t carry_modulus, bool allocate_gpu_memory,
PBS_MS_REDUCTION_T noise_reduction_type) {
PUSH_RANGE("scratch div")
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
big_lwe_dimension, small_lwe_dimension, ks_level,
ks_base_log, pbs_level, pbs_base_log, grouping_factor,
message_modulus, carry_modulus, noise_reduction_type);
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
carry_modulus, noise_reduction_type);
return scratch_cuda_integer_div_rem<uint64_t>(
CudaStreams(streams), is_signed, (int_div_rem_memory<uint64_t> **)mem_ptr,

View File

@@ -1,19 +1,14 @@
#include "ilog2.cuh"
uint64_t scratch_cuda_integer_count_of_consecutive_bits_64_async(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t lwe_dimension, uint32_t ks_level,
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
uint32_t grouping_factor, uint32_t num_blocks, uint32_t counter_num_blocks,
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
Direction direction, BitValue bit_value, bool allocate_gpu_memory,
CudaStreamsFFI streams, int8_t **mem_ptr,
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
uint32_t ks_base_log, uint32_t num_blocks, uint32_t counter_num_blocks,
uint32_t message_modulus, uint32_t carry_modulus, Direction direction,
BitValue bit_value, bool allocate_gpu_memory,
PBS_MS_REDUCTION_T noise_reduction_type) {
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
glwe_dimension * polynomial_size, lwe_dimension,
ks_level, ks_base_log, pbs_level, pbs_base_log,
grouping_factor, message_modulus, carry_modulus,
noise_reduction_type);
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
carry_modulus, noise_reduction_type);
return scratch_integer_count_of_consecutive_bits<uint64_t>(
CudaStreams(streams), params,
@@ -53,19 +48,14 @@ void cleanup_cuda_integer_count_of_consecutive_bits_64(CudaStreamsFFI streams,
}
uint64_t scratch_cuda_integer_ilog2_64_async(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t lwe_dimension, uint32_t ks_level,
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
uint32_t grouping_factor, uint32_t message_modulus, uint32_t carry_modulus,
PBS_TYPE pbs_type, uint32_t input_num_blocks, uint32_t counter_num_blocks,
CudaStreamsFFI streams, int8_t **mem_ptr,
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
uint32_t ks_base_log, uint32_t message_modulus, uint32_t carry_modulus,
uint32_t input_num_blocks, uint32_t counter_num_blocks,
uint32_t num_bits_in_ciphertext, bool allocate_gpu_memory,
PBS_MS_REDUCTION_T noise_reduction_type) {
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
glwe_dimension * polynomial_size, lwe_dimension,
ks_level, ks_base_log, pbs_level, pbs_base_log,
grouping_factor, message_modulus, carry_modulus,
noise_reduction_type);
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
carry_modulus, noise_reduction_type);
return scratch_integer_ilog2<uint64_t>(
CudaStreams(streams), params, (int_ilog2_buffer<uint64_t> **)mem_ptr,

View File

@@ -15,17 +15,12 @@ void cuda_full_propagation_64_inplace_async(
}
uint64_t scratch_cuda_full_propagation_64_inplace_async(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t lwe_dimension,
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t ks_level,
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
uint32_t grouping_factor, uint32_t message_modulus, uint32_t carry_modulus,
PBS_TYPE pbs_type, bool allocate_gpu_memory,
PBS_MS_REDUCTION_T noise_reduction_type) {
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
glwe_dimension * polynomial_size, lwe_dimension,
ks_level, ks_base_log, pbs_level, pbs_base_log,
grouping_factor, message_modulus, carry_modulus,
noise_reduction_type);
CudaStreamsFFI streams, int8_t **mem_ptr,
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
uint32_t ks_base_log, uint32_t message_modulus, uint32_t carry_modulus,
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type) {
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
carry_modulus, noise_reduction_type);
return scratch_cuda_full_propagation<uint64_t>(
CudaStreams(streams), (int_fullprop_buffer<uint64_t> **)mem_ptr, params,
@@ -44,17 +39,13 @@ void cleanup_cuda_full_propagation_64_inplace(CudaStreamsFFI streams,
}
uint64_t scratch_cuda_propagate_single_carry_64_inplace_async(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t big_lwe_dimension,
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
uint32_t num_blocks, uint32_t message_modulus, uint32_t carry_modulus,
PBS_TYPE pbs_type, uint32_t requested_flag, bool allocate_gpu_memory,
CudaStreamsFFI streams, int8_t **mem_ptr,
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
uint32_t ks_base_log, uint32_t num_blocks, uint32_t message_modulus,
uint32_t carry_modulus, uint32_t requested_flag, bool allocate_gpu_memory,
PBS_MS_REDUCTION_T noise_reduction_type) {
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
big_lwe_dimension, small_lwe_dimension, ks_level,
ks_base_log, pbs_level, pbs_base_log, grouping_factor,
message_modulus, carry_modulus, noise_reduction_type);
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
carry_modulus, noise_reduction_type);
return scratch_cuda_propagate_single_carry_inplace<uint64_t>(
CudaStreams(streams), (int_sc_prop_memory<uint64_t> **)mem_ptr,
@@ -62,17 +53,13 @@ uint64_t scratch_cuda_propagate_single_carry_64_inplace_async(
}
uint64_t scratch_cuda_add_and_propagate_single_carry_64_inplace_async(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t big_lwe_dimension,
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
uint32_t num_blocks, uint32_t message_modulus, uint32_t carry_modulus,
PBS_TYPE pbs_type, uint32_t requested_flag, bool allocate_gpu_memory,
CudaStreamsFFI streams, int8_t **mem_ptr,
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
uint32_t ks_base_log, uint32_t num_blocks, uint32_t message_modulus,
uint32_t carry_modulus, uint32_t requested_flag, bool allocate_gpu_memory,
PBS_MS_REDUCTION_T noise_reduction_type) {
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
big_lwe_dimension, small_lwe_dimension, ks_level,
ks_base_log, pbs_level, pbs_base_log, grouping_factor,
message_modulus, carry_modulus, noise_reduction_type);
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
carry_modulus, noise_reduction_type);
return scratch_cuda_propagate_single_carry_inplace<uint64_t>(
CudaStreams(streams), (int_sc_prop_memory<uint64_t> **)mem_ptr,
@@ -80,17 +67,13 @@ uint64_t scratch_cuda_add_and_propagate_single_carry_64_inplace_async(
}
uint64_t scratch_cuda_integer_overflowing_sub_64_inplace_async(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t big_lwe_dimension,
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
uint32_t num_blocks, uint32_t message_modulus, uint32_t carry_modulus,
PBS_TYPE pbs_type, uint32_t compute_overflow, bool allocate_gpu_memory,
CudaStreamsFFI streams, int8_t **mem_ptr,
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
uint32_t ks_base_log, uint32_t num_blocks, uint32_t message_modulus,
uint32_t carry_modulus, uint32_t compute_overflow, bool allocate_gpu_memory,
PBS_MS_REDUCTION_T noise_reduction_type) {
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
big_lwe_dimension, small_lwe_dimension, ks_level,
ks_base_log, pbs_level, pbs_base_log, grouping_factor,
message_modulus, carry_modulus, noise_reduction_type);
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
carry_modulus, noise_reduction_type);
return scratch_cuda_integer_overflowing_sub<uint64_t>(
CudaStreams(streams), (int_borrow_prop_memory<uint64_t> **)mem_ptr,
@@ -170,17 +153,12 @@ void cleanup_cuda_integer_overflowing_sub_64_inplace(CudaStreamsFFI streams,
uint64_t scratch_cuda_apply_univariate_lut_64_async(
CudaStreamsFFI streams, int8_t **mem_ptr, void const *input_lut,
uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size,
uint32_t ks_level, uint32_t ks_base_log, uint32_t pbs_level,
uint32_t pbs_base_log, uint32_t grouping_factor, uint32_t num_radix_blocks,
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
uint64_t lut_degree, bool allocate_gpu_memory,
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
uint32_t ks_base_log, uint32_t num_radix_blocks, uint32_t message_modulus,
uint32_t carry_modulus, uint64_t lut_degree, bool allocate_gpu_memory,
PBS_MS_REDUCTION_T noise_reduction_type) {
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
glwe_dimension * polynomial_size, lwe_dimension,
ks_level, ks_base_log, pbs_level, pbs_base_log,
grouping_factor, message_modulus, carry_modulus,
noise_reduction_type);
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
carry_modulus, noise_reduction_type);
return scratch_cuda_apply_univariate_lut<uint64_t>(
CudaStreams(streams), (int_radix_lut<uint64_t> **)mem_ptr,
@@ -190,17 +168,12 @@ uint64_t scratch_cuda_apply_univariate_lut_64_async(
uint64_t scratch_cuda_apply_many_univariate_lut_64_async(
CudaStreamsFFI streams, int8_t **mem_ptr, void const *input_lut,
uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size,
uint32_t ks_level, uint32_t ks_base_log, uint32_t pbs_level,
uint32_t pbs_base_log, uint32_t grouping_factor, uint32_t num_radix_blocks,
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
uint32_t num_many_lut, uint64_t lut_degree, bool allocate_gpu_memory,
PBS_MS_REDUCTION_T noise_reduction_type) {
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
glwe_dimension * polynomial_size, lwe_dimension,
ks_level, ks_base_log, pbs_level, pbs_base_log,
grouping_factor, message_modulus, carry_modulus,
noise_reduction_type);
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
uint32_t ks_base_log, uint32_t num_radix_blocks, uint32_t message_modulus,
uint32_t carry_modulus, uint32_t num_many_lut, uint64_t lut_degree,
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type) {
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
carry_modulus, noise_reduction_type);
return scratch_cuda_apply_many_univariate_lut<uint64_t>(
CudaStreams(streams), (int_radix_lut<uint64_t> **)mem_ptr,
@@ -294,19 +267,14 @@ uint64_t scratch_cuda_apply_noise_squashing_mem(
}
uint64_t scratch_cuda_apply_noise_squashing_async(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t lwe_dimension,
uint32_t glwe_dimension, uint32_t polynomial_size,
uint32_t input_glwe_dimension, uint32_t input_polynomial_size,
uint32_t ks_level, uint32_t ks_base_log, uint32_t pbs_level,
uint32_t pbs_base_log, uint32_t grouping_factor, uint32_t num_radix_blocks,
uint32_t original_num_blocks, uint32_t message_modulus,
uint32_t carry_modulus, PBS_TYPE pbs_type, bool allocate_gpu_memory,
CudaStreamsFFI streams, int8_t **mem_ptr,
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t input_glwe_dimension,
uint32_t input_polynomial_size, uint32_t ks_level, uint32_t ks_base_log,
uint32_t num_radix_blocks, uint32_t original_num_blocks,
uint32_t message_modulus, uint32_t carry_modulus, bool allocate_gpu_memory,
PBS_MS_REDUCTION_T noise_reduction_type) {
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
glwe_dimension * polynomial_size, lwe_dimension,
ks_level, ks_base_log, pbs_level, pbs_base_log,
grouping_factor, message_modulus, carry_modulus,
noise_reduction_type);
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
carry_modulus, noise_reduction_type);
return scratch_cuda_apply_noise_squashing_mem(
streams, params, (int_noise_squashing_lut<uint64_t> **)mem_ptr,

View File

@@ -122,16 +122,12 @@ void cuda_integer_mult_inplace_64_async(
uint64_t scratch_cuda_integer_mult_inplace_64_async(
CudaStreamsFFI streams, int8_t **mem_ptr, bool const is_boolean_left,
bool const is_boolean_right, uint32_t message_modulus,
uint32_t carry_modulus, uint32_t glwe_dimension, uint32_t lwe_dimension,
uint32_t polynomial_size, uint32_t pbs_base_log, uint32_t pbs_level,
uint32_t ks_base_log, uint32_t ks_level, uint32_t grouping_factor,
uint32_t num_radix_blocks, PBS_TYPE pbs_type, bool allocate_gpu_memory,
PBS_MS_REDUCTION_T noise_reduction_type) {
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
polynomial_size * glwe_dimension, lwe_dimension,
ks_level, ks_base_log, pbs_level, pbs_base_log,
grouping_factor, message_modulus, carry_modulus,
noise_reduction_type);
uint32_t carry_modulus, CudaLweBootstrapKeyParamsFFI bsk_params,
uint32_t ks_base_log, uint32_t ks_level, uint32_t num_radix_blocks,
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type) {
const uint32_t polynomial_size = bsk_params.polynomial_size;
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
carry_modulus, noise_reduction_type);
switch (polynomial_size) {
case 256:
@@ -164,20 +160,14 @@ void cleanup_cuda_integer_mult_inplace_64(CudaStreamsFFI streams,
}
uint64_t scratch_cuda_partial_sum_ciphertexts_vec_64_async(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t lwe_dimension, uint32_t ks_level,
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
uint32_t grouping_factor, uint32_t num_blocks_in_radix,
CudaStreamsFFI streams, int8_t **mem_ptr,
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
uint32_t ks_base_log, uint32_t num_blocks_in_radix,
uint32_t max_num_radix_in_vec, uint32_t message_modulus,
uint32_t carry_modulus, PBS_TYPE pbs_type,
bool reduce_degrees_for_single_carry_propagation, bool allocate_gpu_memory,
PBS_MS_REDUCTION_T noise_reduction_type) {
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
glwe_dimension * polynomial_size, lwe_dimension,
ks_level, ks_base_log, pbs_level, pbs_base_log,
grouping_factor, message_modulus, carry_modulus,
noise_reduction_type);
uint32_t carry_modulus, bool reduce_degrees_for_single_carry_propagation,
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type) {
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
carry_modulus, noise_reduction_type);
return scratch_cuda_integer_partial_sum_ciphertexts_vec<uint64_t>(
CudaStreams(streams),
(int_sum_ciphertexts_vec_memory<uint64_t> **)mem_ptr, num_blocks_in_radix,

View File

@@ -1,19 +1,14 @@
#include "integer/oprf.cuh"
uint64_t scratch_cuda_integer_grouped_oprf_64_async(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t lwe_dimension, uint32_t ks_level,
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
uint32_t grouping_factor, uint32_t num_blocks_to_process,
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
bool allocate_gpu_memory, uint32_t message_bits_per_block,
uint32_t total_random_bits, PBS_MS_REDUCTION_T noise_reduction_type) {
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
glwe_dimension * polynomial_size, lwe_dimension,
ks_level, ks_base_log, pbs_level, pbs_base_log,
grouping_factor, message_modulus, carry_modulus,
noise_reduction_type);
CudaStreamsFFI streams, int8_t **mem_ptr,
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
uint32_t ks_base_log, uint32_t num_blocks_to_process,
uint32_t message_modulus, uint32_t carry_modulus, bool allocate_gpu_memory,
uint32_t message_bits_per_block, uint32_t total_random_bits,
PBS_MS_REDUCTION_T noise_reduction_type) {
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
carry_modulus, noise_reduction_type);
return scratch_cuda_integer_grouped_oprf<uint64_t>(
CudaStreams(streams), (int_grouped_oprf_memory<uint64_t> **)mem_ptr,
@@ -45,20 +40,14 @@ void cleanup_cuda_integer_grouped_oprf_64(CudaStreamsFFI streams,
}
uint64_t scratch_cuda_integer_grouped_oprf_custom_range_64_async(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t lwe_dimension, uint32_t ks_level,
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
uint32_t grouping_factor, uint32_t num_blocks_intermediate,
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
bool allocate_gpu_memory, uint32_t message_bits_per_block,
uint32_t num_input_random_bits, uint32_t num_scalar_bits,
PBS_MS_REDUCTION_T noise_reduction_type) {
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
glwe_dimension * polynomial_size, lwe_dimension,
ks_level, ks_base_log, pbs_level, pbs_base_log,
grouping_factor, message_modulus, carry_modulus,
noise_reduction_type);
CudaStreamsFFI streams, int8_t **mem_ptr,
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
uint32_t ks_base_log, uint32_t num_blocks_intermediate,
uint32_t message_modulus, uint32_t carry_modulus, bool allocate_gpu_memory,
uint32_t message_bits_per_block, uint32_t num_input_random_bits,
uint32_t num_scalar_bits, PBS_MS_REDUCTION_T noise_reduction_type) {
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
carry_modulus, noise_reduction_type);
return scratch_cuda_integer_grouped_oprf_custom_range<uint64_t>(
CudaStreams(streams),

View File

@@ -1,19 +1,13 @@
#include "scalar_div.cuh"
uint64_t scratch_cuda_integer_unsigned_scalar_div_radix_64_async(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t lwe_dimension, uint32_t ks_level,
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
uint32_t grouping_factor, uint32_t num_blocks, uint32_t message_modulus,
uint32_t carry_modulus, PBS_TYPE pbs_type,
const CudaScalarDivisorFFI *scalar_divisor_ffi, bool allocate_gpu_memory,
PBS_MS_REDUCTION_T noise_reduction_type) {
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
glwe_dimension * polynomial_size, lwe_dimension,
ks_level, ks_base_log, pbs_level, pbs_base_log,
grouping_factor, message_modulus, carry_modulus,
noise_reduction_type);
CudaStreamsFFI streams, int8_t **mem_ptr,
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
uint32_t ks_base_log, uint32_t num_blocks, uint32_t message_modulus,
uint32_t carry_modulus, const CudaScalarDivisorFFI *scalar_divisor_ffi,
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type) {
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
carry_modulus, noise_reduction_type);
return scratch_integer_unsigned_scalar_div_radix<uint64_t>(
CudaStreams(streams), params,
@@ -45,19 +39,13 @@ void cleanup_cuda_integer_unsigned_scalar_div_radix_64(CudaStreamsFFI streams,
}
uint64_t scratch_cuda_integer_signed_scalar_div_radix_64_async(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t lwe_dimension, uint32_t ks_level,
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
uint32_t grouping_factor, uint32_t num_blocks, uint32_t message_modulus,
uint32_t carry_modulus, PBS_TYPE pbs_type,
const CudaScalarDivisorFFI *scalar_divisor_ffi, bool allocate_gpu_memory,
PBS_MS_REDUCTION_T noise_reduction_type) {
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
glwe_dimension * polynomial_size, lwe_dimension,
ks_level, ks_base_log, pbs_level, pbs_base_log,
grouping_factor, message_modulus, carry_modulus,
noise_reduction_type);
CudaStreamsFFI streams, int8_t **mem_ptr,
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
uint32_t ks_base_log, uint32_t num_blocks, uint32_t message_modulus,
uint32_t carry_modulus, const CudaScalarDivisorFFI *scalar_divisor_ffi,
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type) {
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
carry_modulus, noise_reduction_type);
return scratch_integer_signed_scalar_div_radix<uint64_t>(
CudaStreams(streams), params,
@@ -89,20 +77,14 @@ void cleanup_cuda_integer_signed_scalar_div_radix_64(CudaStreamsFFI streams,
}
uint64_t scratch_cuda_integer_unsigned_scalar_div_rem_radix_64_async(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t lwe_dimension, uint32_t ks_level,
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
uint32_t grouping_factor, uint32_t num_blocks, uint32_t message_modulus,
uint32_t carry_modulus, PBS_TYPE pbs_type,
const CudaScalarDivisorFFI *scalar_divisor_ffi,
CudaStreamsFFI streams, int8_t **mem_ptr,
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
uint32_t ks_base_log, uint32_t num_blocks, uint32_t message_modulus,
uint32_t carry_modulus, const CudaScalarDivisorFFI *scalar_divisor_ffi,
uint32_t const active_bits_divisor, bool allocate_gpu_memory,
PBS_MS_REDUCTION_T noise_reduction_type) {
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
glwe_dimension * polynomial_size, lwe_dimension,
ks_level, ks_base_log, pbs_level, pbs_base_log,
grouping_factor, message_modulus, carry_modulus,
noise_reduction_type);
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
carry_modulus, noise_reduction_type);
return scratch_integer_unsigned_scalar_div_rem_radix<uint64_t>(
CudaStreams(streams), params,
@@ -143,20 +125,14 @@ void cleanup_cuda_integer_unsigned_scalar_div_rem_radix_64(
}
uint64_t scratch_cuda_integer_signed_scalar_div_rem_radix_64_async(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t lwe_dimension, uint32_t ks_level,
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
uint32_t grouping_factor, uint32_t num_blocks, uint32_t message_modulus,
uint32_t carry_modulus, PBS_TYPE pbs_type,
const CudaScalarDivisorFFI *scalar_divisor_ffi,
CudaStreamsFFI streams, int8_t **mem_ptr,
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
uint32_t ks_base_log, uint32_t num_blocks, uint32_t message_modulus,
uint32_t carry_modulus, const CudaScalarDivisorFFI *scalar_divisor_ffi,
uint32_t const active_bits_divisor, bool allocate_gpu_memory,
PBS_MS_REDUCTION_T noise_reduction_type) {
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
glwe_dimension * polynomial_size, lwe_dimension,
ks_level, ks_base_log, pbs_level, pbs_base_log,
grouping_factor, message_modulus, carry_modulus,
noise_reduction_type);
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
carry_modulus, noise_reduction_type);
return scratch_integer_signed_scalar_div_rem_radix<uint64_t>(
CudaStreams(streams), params,

View File

@@ -1,18 +1,13 @@
#include "integer/scalar_mul.cuh"
uint64_t scratch_cuda_integer_scalar_mul_64_async(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t lwe_dimension, uint32_t ks_level,
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
uint32_t grouping_factor, uint32_t num_blocks, uint32_t message_modulus,
uint32_t carry_modulus, PBS_TYPE pbs_type, uint32_t num_scalar_bits,
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type) {
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
glwe_dimension * polynomial_size, lwe_dimension,
ks_level, ks_base_log, pbs_level, pbs_base_log,
grouping_factor, message_modulus, carry_modulus,
noise_reduction_type);
CudaStreamsFFI streams, int8_t **mem_ptr,
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
uint32_t ks_base_log, uint32_t num_blocks, uint32_t message_modulus,
uint32_t carry_modulus, uint32_t num_scalar_bits, bool allocate_gpu_memory,
PBS_MS_REDUCTION_T noise_reduction_type) {
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
carry_modulus, noise_reduction_type);
return scratch_cuda_scalar_mul<uint64_t>(
CudaStreams(streams), (int_scalar_mul_buffer<uint64_t> **)mem_ptr,

View File

@@ -1,18 +1,13 @@
#include "scalar_rotate.cuh"
uint64_t scratch_cuda_scalar_rotate_64_inplace_async(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t big_lwe_dimension,
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
uint32_t num_blocks, uint32_t message_modulus, uint32_t carry_modulus,
PBS_TYPE pbs_type, SHIFT_OR_ROTATE_TYPE shift_type,
CudaStreamsFFI streams, int8_t **mem_ptr,
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
uint32_t ks_base_log, uint32_t num_blocks, uint32_t message_modulus,
uint32_t carry_modulus, SHIFT_OR_ROTATE_TYPE shift_type,
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type) {
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
big_lwe_dimension, small_lwe_dimension, ks_level,
ks_base_log, pbs_level, pbs_base_log, grouping_factor,
message_modulus, carry_modulus, noise_reduction_type);
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
carry_modulus, noise_reduction_type);
return scratch_cuda_scalar_rotate<uint64_t>(
CudaStreams(streams),

View File

@@ -1,18 +1,13 @@
#include "scalar_shifts.cuh"
uint64_t scratch_cuda_logical_scalar_shift_64_inplace_async(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t big_lwe_dimension,
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
uint32_t num_blocks, uint32_t message_modulus, uint32_t carry_modulus,
PBS_TYPE pbs_type, SHIFT_OR_ROTATE_TYPE shift_type,
CudaStreamsFFI streams, int8_t **mem_ptr,
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
uint32_t ks_base_log, uint32_t num_blocks, uint32_t message_modulus,
uint32_t carry_modulus, SHIFT_OR_ROTATE_TYPE shift_type,
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type) {
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
big_lwe_dimension, small_lwe_dimension, ks_level,
ks_base_log, pbs_level, pbs_base_log, grouping_factor,
message_modulus, carry_modulus, noise_reduction_type);
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
carry_modulus, noise_reduction_type);
return scratch_cuda_logical_scalar_shift<uint64_t>(
CudaStreams(streams),
@@ -35,18 +30,13 @@ void cuda_logical_scalar_shift_64_inplace_async(
}
uint64_t scratch_cuda_arithmetic_scalar_shift_64_inplace_async(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t big_lwe_dimension,
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
uint32_t num_blocks, uint32_t message_modulus, uint32_t carry_modulus,
PBS_TYPE pbs_type, SHIFT_OR_ROTATE_TYPE shift_type,
CudaStreamsFFI streams, int8_t **mem_ptr,
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
uint32_t ks_base_log, uint32_t num_blocks, uint32_t message_modulus,
uint32_t carry_modulus, SHIFT_OR_ROTATE_TYPE shift_type,
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type) {
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
big_lwe_dimension, small_lwe_dimension, ks_level,
ks_base_log, pbs_level, pbs_base_log, grouping_factor,
message_modulus, carry_modulus, noise_reduction_type);
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
carry_modulus, noise_reduction_type);
return scratch_cuda_arithmetic_scalar_shift<uint64_t>(
CudaStreams(streams),

View File

@@ -1,18 +1,13 @@
#include "shift_and_rotate.cuh"
uint64_t scratch_cuda_shift_and_rotate_64_inplace_async(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t big_lwe_dimension,
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
uint32_t num_blocks, uint32_t message_modulus, uint32_t carry_modulus,
PBS_TYPE pbs_type, SHIFT_OR_ROTATE_TYPE shift_type, bool is_signed,
CudaStreamsFFI streams, int8_t **mem_ptr,
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
uint32_t ks_base_log, uint32_t num_blocks, uint32_t message_modulus,
uint32_t carry_modulus, SHIFT_OR_ROTATE_TYPE shift_type, bool is_signed,
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type) {
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
big_lwe_dimension, small_lwe_dimension, ks_level,
ks_base_log, pbs_level, pbs_base_log, grouping_factor,
message_modulus, carry_modulus, noise_reduction_type);
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
carry_modulus, noise_reduction_type);
return scratch_cuda_shift_and_rotate<uint64_t>(
CudaStreams(streams), (int_shift_and_rotate_buffer<uint64_t> **)mem_ptr,

View File

@@ -1,18 +1,13 @@
#include "subtraction.cuh"
uint64_t scratch_cuda_sub_and_propagate_single_carry_64_inplace_async(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t big_lwe_dimension,
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
uint32_t num_blocks, uint32_t message_modulus, uint32_t carry_modulus,
PBS_TYPE pbs_type, uint32_t requested_flag, bool allocate_gpu_memory,
CudaStreamsFFI streams, int8_t **mem_ptr,
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
uint32_t ks_base_log, uint32_t num_blocks, uint32_t message_modulus,
uint32_t carry_modulus, uint32_t requested_flag, bool allocate_gpu_memory,
PBS_MS_REDUCTION_T noise_reduction_type) {
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
big_lwe_dimension, small_lwe_dimension, ks_level,
ks_base_log, pbs_level, pbs_base_log, grouping_factor,
message_modulus, carry_modulus, noise_reduction_type);
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
carry_modulus, noise_reduction_type);
return scratch_cuda_sub_and_propagate_single_carry<uint64_t>(
CudaStreams(streams), (int_sub_and_propagate<uint64_t> **)mem_ptr,

View File

@@ -1,18 +1,13 @@
#include "integer/vector_comparison.cuh"
uint64_t scratch_cuda_unchecked_all_eq_slices_64_async(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t big_lwe_dimension,
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
uint32_t num_inputs, uint32_t num_blocks, uint32_t message_modulus,
uint32_t carry_modulus, PBS_TYPE pbs_type, bool allocate_gpu_memory,
CudaStreamsFFI streams, int8_t **mem_ptr,
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
uint32_t ks_base_log, uint32_t num_inputs, uint32_t num_blocks,
uint32_t message_modulus, uint32_t carry_modulus, bool allocate_gpu_memory,
PBS_MS_REDUCTION_T noise_reduction_type) {
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
big_lwe_dimension, small_lwe_dimension, ks_level,
ks_base_log, pbs_level, pbs_base_log, grouping_factor,
message_modulus, carry_modulus, noise_reduction_type);
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
carry_modulus, noise_reduction_type);
return scratch_cuda_unchecked_all_eq_slices<uint64_t>(
CudaStreams(streams),
@@ -50,18 +45,13 @@ void cleanup_cuda_unchecked_all_eq_slices_64(CudaStreamsFFI streams,
}
uint64_t scratch_cuda_unchecked_contains_sub_slice_64_async(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t big_lwe_dimension,
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
uint32_t num_lhs, uint32_t num_rhs, uint32_t num_blocks,
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
CudaStreamsFFI streams, int8_t **mem_ptr,
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
uint32_t ks_base_log, uint32_t num_lhs, uint32_t num_rhs,
uint32_t num_blocks, uint32_t message_modulus, uint32_t carry_modulus,
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type) {
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
big_lwe_dimension, small_lwe_dimension, ks_level,
ks_base_log, pbs_level, pbs_base_log, grouping_factor,
message_modulus, carry_modulus, noise_reduction_type);
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
carry_modulus, noise_reduction_type);
return scratch_cuda_unchecked_contains_sub_slice<uint64_t>(
CudaStreams(streams),

View File

@@ -1,19 +1,14 @@
#include "integer/vector_find.cuh"
uint64_t scratch_cuda_unchecked_match_value_64_async(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t big_lwe_dimension,
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
uint32_t num_matches, uint32_t num_input_blocks,
CudaStreamsFFI streams, int8_t **mem_ptr,
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
uint32_t ks_base_log, uint32_t num_matches, uint32_t num_input_blocks,
uint32_t num_output_packed_blocks, uint32_t max_output_is_zero,
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type) {
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
big_lwe_dimension, small_lwe_dimension, ks_level,
ks_base_log, pbs_level, pbs_base_log, grouping_factor,
message_modulus, carry_modulus, noise_reduction_type);
uint32_t message_modulus, uint32_t carry_modulus, bool allocate_gpu_memory,
PBS_MS_REDUCTION_T noise_reduction_type) {
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
carry_modulus, noise_reduction_type);
return scratch_cuda_unchecked_match_value<uint64_t>(
CudaStreams(streams), (int_unchecked_match_buffer<uint64_t> **)mem_ptr,
@@ -56,20 +51,15 @@ void cleanup_cuda_unchecked_match_value_64(CudaStreamsFFI streams,
}
uint64_t scratch_cuda_unchecked_match_value_or_64_async(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t big_lwe_dimension,
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
uint32_t num_matches, uint32_t num_input_blocks,
CudaStreamsFFI streams, int8_t **mem_ptr,
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
uint32_t ks_base_log, uint32_t num_matches, uint32_t num_input_blocks,
uint32_t num_match_packed_blocks, uint32_t num_final_blocks,
uint32_t max_output_is_zero, uint32_t message_modulus,
uint32_t carry_modulus, PBS_TYPE pbs_type, bool allocate_gpu_memory,
uint32_t carry_modulus, bool allocate_gpu_memory,
PBS_MS_REDUCTION_T noise_reduction_type) {
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
big_lwe_dimension, small_lwe_dimension, ks_level,
ks_base_log, pbs_level, pbs_base_log, grouping_factor,
message_modulus, carry_modulus, noise_reduction_type);
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
carry_modulus, noise_reduction_type);
return scratch_cuda_unchecked_match_value_or<uint64_t>(
CudaStreams(streams),
@@ -107,18 +97,13 @@ void cleanup_cuda_unchecked_match_value_or_64(CudaStreamsFFI streams,
}
uint64_t scratch_cuda_unchecked_contains_64_async(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t big_lwe_dimension,
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
uint32_t num_inputs, uint32_t num_blocks, uint32_t message_modulus,
uint32_t carry_modulus, PBS_TYPE pbs_type, bool allocate_gpu_memory,
CudaStreamsFFI streams, int8_t **mem_ptr,
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
uint32_t ks_base_log, uint32_t num_inputs, uint32_t num_blocks,
uint32_t message_modulus, uint32_t carry_modulus, bool allocate_gpu_memory,
PBS_MS_REDUCTION_T noise_reduction_type) {
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
big_lwe_dimension, small_lwe_dimension, ks_level,
ks_base_log, pbs_level, pbs_base_log, grouping_factor,
message_modulus, carry_modulus, noise_reduction_type);
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
carry_modulus, noise_reduction_type);
return scratch_cuda_unchecked_contains<uint64_t>(
CudaStreams(streams), (int_unchecked_contains_buffer<uint64_t> **)mem_ptr,
@@ -157,18 +142,13 @@ void cleanup_cuda_unchecked_contains_64(CudaStreamsFFI streams,
}
uint64_t scratch_cuda_unchecked_contains_clear_64_async(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t big_lwe_dimension,
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
uint32_t num_inputs, uint32_t num_blocks, uint32_t message_modulus,
uint32_t carry_modulus, PBS_TYPE pbs_type, bool allocate_gpu_memory,
CudaStreamsFFI streams, int8_t **mem_ptr,
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
uint32_t ks_base_log, uint32_t num_inputs, uint32_t num_blocks,
uint32_t message_modulus, uint32_t carry_modulus, bool allocate_gpu_memory,
PBS_MS_REDUCTION_T noise_reduction_type) {
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
big_lwe_dimension, small_lwe_dimension, ks_level,
ks_base_log, pbs_level, pbs_base_log, grouping_factor,
message_modulus, carry_modulus, noise_reduction_type);
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
carry_modulus, noise_reduction_type);
return scratch_cuda_unchecked_contains_clear<uint64_t>(
CudaStreams(streams),
@@ -202,18 +182,13 @@ void cleanup_cuda_unchecked_contains_clear_64(CudaStreamsFFI streams,
}
uint64_t scratch_cuda_unchecked_is_in_clears_64_async(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t big_lwe_dimension,
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
uint32_t num_clears, uint32_t num_blocks, uint32_t message_modulus,
uint32_t carry_modulus, PBS_TYPE pbs_type, bool allocate_gpu_memory,
CudaStreamsFFI streams, int8_t **mem_ptr,
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
uint32_t ks_base_log, uint32_t num_clears, uint32_t num_blocks,
uint32_t message_modulus, uint32_t carry_modulus, bool allocate_gpu_memory,
PBS_MS_REDUCTION_T noise_reduction_type) {
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
big_lwe_dimension, small_lwe_dimension, ks_level,
ks_base_log, pbs_level, pbs_base_log, grouping_factor,
message_modulus, carry_modulus, noise_reduction_type);
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
carry_modulus, noise_reduction_type);
return scratch_cuda_unchecked_is_in_clears<uint64_t>(
CudaStreams(streams),
@@ -247,18 +222,13 @@ void cleanup_cuda_unchecked_is_in_clears_64(CudaStreamsFFI streams,
}
uint64_t scratch_cuda_unchecked_index_in_clears_64_async(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t big_lwe_dimension,
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
uint32_t num_clears, uint32_t num_blocks, uint32_t num_blocks_index,
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
CudaStreamsFFI streams, int8_t **mem_ptr,
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
uint32_t ks_base_log, uint32_t num_clears, uint32_t num_blocks,
uint32_t num_blocks_index, uint32_t message_modulus, uint32_t carry_modulus,
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type) {
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
big_lwe_dimension, small_lwe_dimension, ks_level,
ks_base_log, pbs_level, pbs_base_log, grouping_factor,
message_modulus, carry_modulus, noise_reduction_type);
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
carry_modulus, noise_reduction_type);
return scratch_cuda_unchecked_index_in_clears<uint64_t>(
CudaStreams(streams),
@@ -299,18 +269,13 @@ void cleanup_cuda_unchecked_index_in_clears_64(CudaStreamsFFI streams,
}
uint64_t scratch_cuda_unchecked_first_index_in_clears_64_async(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t big_lwe_dimension,
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
uint32_t num_unique, uint32_t num_blocks, uint32_t num_blocks_index,
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
CudaStreamsFFI streams, int8_t **mem_ptr,
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
uint32_t ks_base_log, uint32_t num_unique, uint32_t num_blocks,
uint32_t num_blocks_index, uint32_t message_modulus, uint32_t carry_modulus,
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type) {
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
big_lwe_dimension, small_lwe_dimension, ks_level,
ks_base_log, pbs_level, pbs_base_log, grouping_factor,
message_modulus, carry_modulus, noise_reduction_type);
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
carry_modulus, noise_reduction_type);
return scratch_cuda_unchecked_first_index_in_clears<uint64_t>(
CudaStreams(streams),
@@ -351,18 +316,13 @@ void cleanup_cuda_unchecked_first_index_in_clears_64(CudaStreamsFFI streams,
}
uint64_t scratch_cuda_unchecked_first_index_of_clear_64_async(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t big_lwe_dimension,
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
uint32_t num_inputs, uint32_t num_blocks, uint32_t num_blocks_index,
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
CudaStreamsFFI streams, int8_t **mem_ptr,
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
uint32_t ks_base_log, uint32_t num_inputs, uint32_t num_blocks,
uint32_t num_blocks_index, uint32_t message_modulus, uint32_t carry_modulus,
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type) {
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
big_lwe_dimension, small_lwe_dimension, ks_level,
ks_base_log, pbs_level, pbs_base_log, grouping_factor,
message_modulus, carry_modulus, noise_reduction_type);
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
carry_modulus, noise_reduction_type);
return scratch_cuda_unchecked_first_index_of_clear<uint64_t>(
CudaStreams(streams),
@@ -403,18 +363,13 @@ void cleanup_cuda_unchecked_first_index_of_clear_64(CudaStreamsFFI streams,
}
uint64_t scratch_cuda_unchecked_first_index_of_64_async(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t big_lwe_dimension,
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
uint32_t num_inputs, uint32_t num_blocks, uint32_t num_blocks_index,
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
CudaStreamsFFI streams, int8_t **mem_ptr,
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
uint32_t ks_base_log, uint32_t num_inputs, uint32_t num_blocks,
uint32_t num_blocks_index, uint32_t message_modulus, uint32_t carry_modulus,
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type) {
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
big_lwe_dimension, small_lwe_dimension, ks_level,
ks_base_log, pbs_level, pbs_base_log, grouping_factor,
message_modulus, carry_modulus, noise_reduction_type);
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
carry_modulus, noise_reduction_type);
return scratch_cuda_unchecked_first_index_of<uint64_t>(
CudaStreams(streams),
@@ -455,18 +410,13 @@ void cleanup_cuda_unchecked_first_index_of_64(CudaStreamsFFI streams,
}
uint64_t scratch_cuda_unchecked_index_of_64_async(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t big_lwe_dimension,
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
uint32_t num_inputs, uint32_t num_blocks, uint32_t num_blocks_index,
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
CudaStreamsFFI streams, int8_t **mem_ptr,
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
uint32_t ks_base_log, uint32_t num_inputs, uint32_t num_blocks,
uint32_t num_blocks_index, uint32_t message_modulus, uint32_t carry_modulus,
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type) {
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
big_lwe_dimension, small_lwe_dimension, ks_level,
ks_base_log, pbs_level, pbs_base_log, grouping_factor,
message_modulus, carry_modulus, noise_reduction_type);
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
carry_modulus, noise_reduction_type);
return scratch_cuda_unchecked_index_of<uint64_t>(
CudaStreams(streams), (int_unchecked_index_of_buffer<uint64_t> **)mem_ptr,
@@ -508,18 +458,13 @@ void cleanup_cuda_unchecked_index_of_64(CudaStreamsFFI streams,
}
uint64_t scratch_cuda_unchecked_index_of_clear_64_async(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t big_lwe_dimension,
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
uint32_t num_inputs, uint32_t num_blocks, uint32_t num_blocks_index,
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
CudaStreamsFFI streams, int8_t **mem_ptr,
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
uint32_t ks_base_log, uint32_t num_inputs, uint32_t num_blocks,
uint32_t num_blocks_index, uint32_t message_modulus, uint32_t carry_modulus,
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type) {
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
big_lwe_dimension, small_lwe_dimension, ks_level,
ks_base_log, pbs_level, pbs_base_log, grouping_factor,
message_modulus, carry_modulus, noise_reduction_type);
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
carry_modulus, noise_reduction_type);
return scratch_cuda_unchecked_index_of_clear<uint64_t>(
CudaStreams(streams),

View File

@@ -2,18 +2,14 @@
#include "kreyvium.cuh"
uint64_t scratch_cuda_kreyvium_generate_keystream_64_async(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t lwe_dimension, uint32_t ks_level,
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
uint32_t grouping_factor, uint32_t message_modulus, uint32_t carry_modulus,
PBS_TYPE pbs_type, bool allocate_gpu_memory,
PBS_MS_REDUCTION_T noise_reduction_type, uint32_t num_inputs) {
CudaStreamsFFI streams, int8_t **mem_ptr,
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
uint32_t ks_base_log, uint32_t message_modulus, uint32_t carry_modulus,
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type,
uint32_t num_inputs) {
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
glwe_dimension * polynomial_size, lwe_dimension,
ks_level, ks_base_log, pbs_level, pbs_base_log,
grouping_factor, message_modulus, carry_modulus,
noise_reduction_type);
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
carry_modulus, noise_reduction_type);
return scratch_cuda_kreyvium_encrypt<uint64_t>(
CudaStreams(streams), (int_kreyvium_buffer<uint64_t> **)mem_ptr, params,

View File

@@ -2,18 +2,14 @@
#include "trivium.cuh"
uint64_t scratch_cuda_trivium_generate_keystream_64_async(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t lwe_dimension, uint32_t ks_level,
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
uint32_t grouping_factor, uint32_t message_modulus, uint32_t carry_modulus,
PBS_TYPE pbs_type, bool allocate_gpu_memory,
PBS_MS_REDUCTION_T noise_reduction_type, uint32_t num_inputs) {
CudaStreamsFFI streams, int8_t **mem_ptr,
CudaLweBootstrapKeyParamsFFI bsk_params, uint32_t ks_level,
uint32_t ks_base_log, uint32_t message_modulus, uint32_t carry_modulus,
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type,
uint32_t num_inputs) {
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
glwe_dimension * polynomial_size, lwe_dimension,
ks_level, ks_base_log, pbs_level, pbs_base_log,
grouping_factor, message_modulus, carry_modulus,
noise_reduction_type);
int_radix_params params(bsk_params, ks_level, ks_base_log, message_modulus,
carry_modulus, noise_reduction_type);
return scratch_cuda_trivium_encrypt<uint64_t>(
CudaStreams(streams), (int_trivium_buffer<uint64_t> **)mem_ptr, params,

File diff suppressed because it is too large Load Diff

View File

@@ -24,7 +24,7 @@ bindgen.workspace = true
[dependencies]
ark-ec.workspace = true
ark-ff.workspace = true
tfhe-cuda-backend = { version = "0.14.0", path = "../tfhe-cuda-backend" }
tfhe-cuda-backend = { version = "0.15.0", path = "../tfhe-cuda-backend" }
[features]
default = []

View File

@@ -26,7 +26,7 @@ num-bigint = "0.4.5"
tfhe-versionable = { version = "0.7.0", path = "../utils/tfhe-versionable" }
tfhe-safe-serialize = { version = "0.1.0", path = "../utils/tfhe-safe-serialize" }
zk-cuda-backend = { version = "0.1.0", path = "../backends/zk-cuda-backend", optional = true }
tfhe-cuda-backend = { version = "=0.14.0", path = "../backends/tfhe-cuda-backend", optional = true }
tfhe-cuda-backend = { version = "=0.15.0", path = "../backends/tfhe-cuda-backend", optional = true }
itertools.workspace = true
[target.'cfg(target_family = "wasm")'.dependencies]
getrandom = { workspace = true, features = ["js"] }

View File

@@ -66,7 +66,7 @@ tfhe-fft = { version = "0.10.1", path = "../tfhe-fft", features = [
] }
tfhe-ntt = { version = "0.7.1", path = "../tfhe-ntt" }
pulp = { workspace = true, features = ["default"] }
tfhe-cuda-backend = { version = "0.14.0", path = "../backends/tfhe-cuda-backend", optional = true }
tfhe-cuda-backend = { version = "0.15.0", path = "../backends/tfhe-cuda-backend", optional = true }
aligned-vec = { workspace = true, features = ["default", "serde"] }
dyn-stack = { workspace = true, features = ["default"] }
paste = { workspace = true }

View File

@@ -5,6 +5,11 @@ use crate::core_crypto::prelude::{
GlweDimension, LweBootstrapKey, LweDimension, PolynomialSize, UnsignedInteger,
};
use crate::shortint::server_key::ModulusSwitchConfiguration;
use tfhe_cuda_backend::bindings::{CudaLweBootstrapKeyParamsFFI, PBS_TYPE_CLASSICAL};
pub(crate) trait CudaBskParams {
fn params_ffi(&self) -> CudaLweBootstrapKeyParamsFFI;
}
#[derive(Clone, Debug)]
pub enum CudaModulusSwitchNoiseReductionConfiguration {
@@ -129,3 +134,23 @@ impl CudaLweBootstrapKey {
self.decomp_level_count
}
}
impl CudaBskParams for CudaLweBootstrapKey {
fn params_ffi(&self) -> CudaLweBootstrapKeyParamsFFI {
CudaLweBootstrapKeyParamsFFI {
input_lwe_dimension: u32::try_from(self.input_lwe_dimension.0).unwrap(),
glwe_dimension: u32::try_from(self.glwe_dimension.0).unwrap(),
polynomial_size: u32::try_from(self.polynomial_size.0).unwrap(),
base_log: u32::try_from(self.decomp_base_log.0).unwrap(),
level_count: u32::try_from(self.decomp_level_count.0).unwrap(),
big_lwe_dimension: u32::try_from(
self.glwe_dimension
.to_equivalent_lwe_dimension(self.polynomial_size)
.0,
)
.unwrap(),
pbs_type: PBS_TYPE_CLASSICAL,
grouping_factor: 0,
}
}
}

View File

@@ -1,3 +1,4 @@
use crate::core_crypto::gpu::entities::lwe_bootstrap_key::CudaBskParams;
use crate::core_crypto::gpu::vec::CudaVec;
use crate::core_crypto::gpu::{
convert_lwe_multi_bit_programmable_bootstrap_key_async, CudaStreams,
@@ -7,6 +8,7 @@ use crate::core_crypto::prelude::{
GlweDimension, LweBskGroupingFactor, LweDimension, LweMultiBitBootstrapKey, PolynomialSize,
UnsignedInteger,
};
use tfhe_cuda_backend::bindings::{CudaLweBootstrapKeyParamsFFI, PBS_TYPE_MULTI_BIT};
/// A structure representing a vector of GLWE ciphertexts with 64 bits of precision on the GPU.
#[derive(Debug)]
@@ -104,3 +106,23 @@ impl<Scalar: UnsignedInteger> CudaLweMultiBitBootstrapKey<Scalar> {
self.grouping_factor
}
}
impl<Scalar: UnsignedInteger> CudaBskParams for CudaLweMultiBitBootstrapKey<Scalar> {
fn params_ffi(&self) -> CudaLweBootstrapKeyParamsFFI {
CudaLweBootstrapKeyParamsFFI {
input_lwe_dimension: u32::try_from(self.input_lwe_dimension.0).unwrap(),
glwe_dimension: u32::try_from(self.glwe_dimension.0).unwrap(),
polynomial_size: u32::try_from(self.polynomial_size.0).unwrap(),
base_log: u32::try_from(self.decomp_base_log.0).unwrap(),
level_count: u32::try_from(self.decomp_level_count.0).unwrap(),
big_lwe_dimension: u32::try_from(
self.glwe_dimension
.to_equivalent_lwe_dimension(self.polynomial_size)
.0,
)
.unwrap(),
pbs_type: PBS_TYPE_MULTI_BIT,
grouping_factor: u32::try_from(self.grouping_factor.0).unwrap(),
}
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -6,7 +6,7 @@ use crate::core_crypto::gpu::CudaStreams;
use crate::core_crypto::prelude::packed_integers::PackedIntegers;
use crate::core_crypto::prelude::{
glwe_mask_size, CiphertextModulus, CiphertextModulusLog, GlweCiphertextCount,
LweBskGroupingFactor, LweCiphertextCount, PolynomialSize, UnsignedInteger,
LweCiphertextCount, PolynomialSize, UnsignedInteger,
};
use crate::error;
use crate::high_level_api::keys::expanded::ExpandedDecompressionKey;
@@ -18,7 +18,7 @@ use crate::integer::gpu::ciphertext::CudaRadixCiphertext;
use crate::integer::gpu::server_key::CudaBootstrappingKey;
use crate::integer::gpu::{
cuda_backend_compress, cuda_backend_decompress, cuda_backend_get_compression_size_on_gpu,
cuda_backend_get_decompression_size_on_gpu, extract_glwe, PBSType,
cuda_backend_get_decompression_size_on_gpu, extract_glwe,
};
use crate::prelude::CastInto;
use crate::shortint::ciphertext::{
@@ -527,11 +527,7 @@ impl CudaDecompressionKey {
encryption_polynomial_size,
compression_glwe_dimension,
compression_polynomial_size,
lwe_dimension,
bsk.decomp_base_log(),
bsk.decomp_level_count(),
LweBskGroupingFactor(0),
PBSType::Classical,
bsk,
indexes_array.as_slice(),
indexes_array_len.0 as u32,
);
@@ -560,11 +556,7 @@ impl CudaDecompressionKey {
encryption_polynomial_size,
compression_glwe_dimension,
compression_polynomial_size,
lwe_dimension,
bsk.decomp_base_log(),
bsk.decomp_level_count(),
bsk.grouping_factor,
PBSType::MultiBit,
bsk,
indexes_array.as_slice(),
indexes_array_len.0 as u32,
);
@@ -630,8 +622,6 @@ impl CudaDecompressionKey {
bsk.ms_noise_reduction_configuration.is_none(),
"Decompression key should not do modulus switch noise reduction"
);
let lwe_dimension = bsk.output_lwe_dimension();
cuda_backend_get_decompression_size_on_gpu(
streams,
message_modulus,
@@ -640,33 +630,21 @@ impl CudaDecompressionKey {
encryption_polynomial_size,
compression_glwe_dimension,
compression_polynomial_size,
lwe_dimension,
bsk.decomp_base_log(),
bsk.decomp_level_count(),
LweBskGroupingFactor(0),
PBSType::Classical,
indexes_array_len.0 as u32,
)
}
CudaBootstrappingKey::MultiBit(bsk) => {
let lwe_dimension = bsk.output_lwe_dimension();
cuda_backend_get_decompression_size_on_gpu(
streams,
message_modulus,
carry_modulus,
encryption_glwe_dimension,
encryption_polynomial_size,
compression_glwe_dimension,
compression_polynomial_size,
lwe_dimension,
bsk.decomp_base_log(),
bsk.decomp_level_count(),
bsk.grouping_factor,
PBSType::MultiBit,
bsk,
indexes_array_len.0 as u32,
)
}
CudaBootstrappingKey::MultiBit(bsk) => cuda_backend_get_decompression_size_on_gpu(
streams,
message_modulus,
carry_modulus,
encryption_glwe_dimension,
encryption_polynomial_size,
compression_glwe_dimension,
compression_polynomial_size,
bsk,
indexes_array_len.0 as u32,
),
}
}
pub fn get_cpu_list_unpack_size_on_gpu(
@@ -703,8 +681,6 @@ impl CudaDecompressionKey {
bsk.ms_noise_reduction_configuration.is_none(),
"Decompression key should not do modulus switch noise reduction"
);
let lwe_dimension = bsk.output_lwe_dimension();
cuda_backend_get_decompression_size_on_gpu(
streams,
message_modulus,
@@ -713,33 +689,21 @@ impl CudaDecompressionKey {
encryption_polynomial_size,
compression_glwe_dimension,
compression_polynomial_size,
lwe_dimension,
bsk.decomp_base_log(),
bsk.decomp_level_count(),
LweBskGroupingFactor(0),
PBSType::Classical,
indexes_array_len.0 as u32,
)
}
CudaBootstrappingKey::MultiBit(bsk) => {
let lwe_dimension = bsk.output_lwe_dimension();
cuda_backend_get_decompression_size_on_gpu(
streams,
message_modulus,
carry_modulus,
encryption_glwe_dimension,
encryption_polynomial_size,
compression_glwe_dimension,
compression_polynomial_size,
lwe_dimension,
bsk.decomp_base_log(),
bsk.decomp_level_count(),
bsk.grouping_factor,
PBSType::MultiBit,
bsk,
indexes_array_len.0 as u32,
)
}
CudaBootstrappingKey::MultiBit(bsk) => cuda_backend_get_decompression_size_on_gpu(
streams,
message_modulus,
carry_modulus,
encryption_glwe_dimension,
encryption_polynomial_size,
compression_glwe_dimension,
compression_polynomial_size,
bsk,
indexes_array_len.0 as u32,
),
}
}
}

View File

@@ -1,10 +1,9 @@
use crate::core_crypto::gpu::CudaStreams;
use crate::core_crypto::prelude::LweBskGroupingFactor;
use crate::integer::gpu::ciphertext::CudaIntegerRadixCiphertext;
use crate::integer::gpu::cuda_backend_unchecked_signed_abs_assign;
use crate::integer::gpu::server_key::{
CudaBootstrappingKey, CudaDynamicKeyswitchingKey, CudaServerKey,
};
use crate::integer::gpu::{cuda_backend_unchecked_signed_abs_assign, PBSType};
impl CudaServerKey {
pub fn unchecked_abs_assign<T>(&self, ct: &mut T, streams: &CudaStreams)
@@ -20,6 +19,18 @@ impl CudaServerKey {
unsafe {
match &self.bootstrapping_key {
CudaBootstrappingKey::Classic(d_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
cuda_backend_unchecked_signed_abs_assign(
streams,
ct.as_mut(),
@@ -27,21 +38,26 @@ impl CudaServerKey {
&computing_ks_key.d_vec,
self.message_modulus,
self.carry_modulus,
d_bsk.glwe_dimension,
d_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count,
d_bsk.decomp_base_log,
num_blocks,
PBSType::Classical,
LweBskGroupingFactor(0),
d_bsk.ms_noise_reduction_configuration.as_ref(),
);
}
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_multibit_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
cuda_backend_unchecked_signed_abs_assign(
streams,
ct.as_mut(),
@@ -49,17 +65,10 @@ impl CudaServerKey {
&computing_ks_key.d_vec,
self.message_modulus,
self.carry_modulus,
d_multibit_bsk.glwe_dimension,
d_multibit_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_multibit_bsk.decomp_level_count,
d_multibit_bsk.decomp_base_log,
num_blocks,
PBSType::MultiBit,
d_multibit_bsk.grouping_factor,
None,
);
}

View File

@@ -1,5 +1,4 @@
use crate::core_crypto::gpu::CudaStreams;
use crate::core_crypto::prelude::LweBskGroupingFactor;
use crate::integer::gpu::ciphertext::boolean_value::CudaBooleanBlock;
use crate::integer::gpu::ciphertext::{
CudaIntegerRadixCiphertext, CudaRadixCiphertext, CudaSignedRadixCiphertext,
@@ -12,7 +11,7 @@ use crate::integer::gpu::{
cuda_backend_add_and_propagate_single_carry_assign,
cuda_backend_get_add_and_propagate_single_carry_assign_size_on_gpu,
cuda_backend_get_full_propagate_assign_size_on_gpu, cuda_backend_unchecked_add_assign,
cuda_backend_unchecked_partial_sum_ciphertexts_assign, PBSType,
cuda_backend_unchecked_partial_sum_ciphertexts_assign,
};
use crate::integer::server_key::radix_parallel::OutputFlag;
use crate::shortint::ciphertext::NoiseLevel;
@@ -137,34 +136,22 @@ impl CudaServerKey {
CudaBootstrappingKey::Classic(d_bsk) => {
cuda_backend_get_full_propagate_assign_size_on_gpu(
streams,
d_bsk.input_lwe_dimension(),
d_bsk.glwe_dimension(),
d_bsk.polynomial_size(),
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count(),
d_bsk.decomp_base_log(),
self.message_modulus,
self.carry_modulus,
PBSType::Classical,
LweBskGroupingFactor(0),
d_bsk.ms_noise_reduction_configuration.as_ref(),
)
}
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
cuda_backend_get_full_propagate_assign_size_on_gpu(
streams,
d_multibit_bsk.input_lwe_dimension(),
d_multibit_bsk.glwe_dimension(),
d_multibit_bsk.polynomial_size(),
d_multibit_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_multibit_bsk.decomp_level_count(),
d_multibit_bsk.decomp_base_log(),
self.message_modulus,
self.carry_modulus,
PBSType::MultiBit,
d_multibit_bsk.grouping_factor,
None,
)
}
@@ -184,18 +171,12 @@ impl CudaServerKey {
CudaBootstrappingKey::Classic(d_bsk) => {
cuda_backend_get_add_and_propagate_single_carry_assign_size_on_gpu(
streams,
d_bsk.input_lwe_dimension(),
d_bsk.glwe_dimension(),
d_bsk.polynomial_size(),
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count(),
d_bsk.decomp_base_log(),
num_blocks,
self.message_modulus,
self.carry_modulus,
PBSType::Classical,
LweBskGroupingFactor(0),
OutputFlag::None,
d_bsk.ms_noise_reduction_configuration.as_ref(),
)
@@ -203,18 +184,12 @@ impl CudaServerKey {
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
cuda_backend_get_add_and_propagate_single_carry_assign_size_on_gpu(
streams,
d_multibit_bsk.input_lwe_dimension(),
d_multibit_bsk.glwe_dimension(),
d_multibit_bsk.polynomial_size(),
d_multibit_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_multibit_bsk.decomp_level_count(),
d_multibit_bsk.decomp_base_log(),
num_blocks,
self.message_modulus,
self.carry_modulus,
PBSType::MultiBit,
d_multibit_bsk.grouping_factor,
OutputFlag::None,
None,
)
@@ -344,6 +319,11 @@ impl CudaServerKey {
unsafe {
match &self.bootstrapping_key {
CudaBootstrappingKey::Classic(d_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
cuda_backend_unchecked_partial_sum_ciphertexts_assign(
streams,
result.as_mut(),
@@ -353,21 +333,20 @@ impl CudaServerKey {
&computing_ks_key.d_vec,
self.message_modulus,
self.carry_modulus,
d_bsk.glwe_dimension,
d_bsk.polynomial_size,
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count,
d_bsk.decomp_base_log,
num_blocks.0 as u32,
radix_count_in_vec as u32,
PBSType::Classical,
LweBskGroupingFactor(0),
d_bsk.ms_noise_reduction_configuration.as_ref(),
);
}
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
cuda_backend_unchecked_partial_sum_ciphertexts_assign(
streams,
result.as_mut(),
@@ -377,17 +356,11 @@ impl CudaServerKey {
&computing_ks_key.d_vec,
self.message_modulus,
self.carry_modulus,
d_multibit_bsk.glwe_dimension,
d_multibit_bsk.polynomial_size,
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_multibit_bsk.decomp_level_count,
d_multibit_bsk.decomp_base_log,
num_blocks.0 as u32,
radix_count_in_vec as u32,
PBSType::MultiBit,
d_multibit_bsk.grouping_factor,
None,
);
}
@@ -714,18 +687,12 @@ impl CudaServerKey {
in_carry,
&d_bsk.d_vec,
&computing_ks_key.d_vec,
d_bsk.input_lwe_dimension(),
d_bsk.glwe_dimension(),
d_bsk.polynomial_size(),
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count(),
d_bsk.decomp_base_log(),
num_blocks,
self.message_modulus,
self.carry_modulus,
PBSType::Classical,
LweBskGroupingFactor(0),
requested_flag,
uses_carry,
d_bsk.ms_noise_reduction_configuration.as_ref(),
@@ -740,18 +707,12 @@ impl CudaServerKey {
in_carry,
&d_multibit_bsk.d_vec,
&computing_ks_key.d_vec,
d_multibit_bsk.input_lwe_dimension(),
d_multibit_bsk.glwe_dimension(),
d_multibit_bsk.polynomial_size(),
d_multibit_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_multibit_bsk.decomp_level_count(),
d_multibit_bsk.decomp_base_log(),
num_blocks,
self.message_modulus,
self.carry_modulus,
PBSType::MultiBit,
d_multibit_bsk.grouping_factor,
requested_flag,
uses_carry,
None,

View File

@@ -6,11 +6,9 @@ use crate::integer::gpu::server_key::{
CudaBootstrappingKey, CudaDynamicKeyswitchingKey, CudaServerKey,
};
use crate::core_crypto::prelude::LweBskGroupingFactor;
use crate::integer::gpu::{
cuda_backend_aes_key_expansion, cuda_backend_get_aes_ctr_encrypt_size_on_gpu,
cuda_backend_get_aes_key_expansion_size_on_gpu, cuda_backend_unchecked_aes_ctr_encrypt,
PBSType,
};
use crate::integer::{RadixCiphertext, RadixClientKey};
use crate::shortint::Ciphertext;
@@ -292,15 +290,9 @@ impl CudaServerKey {
&computing_ks_key.d_vec,
self.message_modulus,
self.carry_modulus,
d_bsk.glwe_dimension,
d_bsk.polynomial_size,
d_bsk.input_lwe_dimension,
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count,
d_bsk.decomp_base_log,
LweBskGroupingFactor(0),
PBSType::Classical,
d_bsk.ms_noise_reduction_configuration.as_ref(),
);
}
@@ -317,15 +309,9 @@ impl CudaServerKey {
&computing_ks_key.d_vec,
self.message_modulus,
self.carry_modulus,
d_multibit_bsk.glwe_dimension,
d_multibit_bsk.polynomial_size,
d_multibit_bsk.input_lwe_dimension,
d_multibit_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_multibit_bsk.decomp_level_count,
d_multibit_bsk.decomp_base_log,
d_multibit_bsk.grouping_factor,
PBSType::MultiBit,
None,
);
}
@@ -351,15 +337,9 @@ impl CudaServerKey {
sbox_parallelism as u32,
self.message_modulus,
self.carry_modulus,
d_bsk.glwe_dimension,
d_bsk.polynomial_size,
d_bsk.input_lwe_dimension,
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count,
d_bsk.decomp_base_log,
LweBskGroupingFactor(0),
PBSType::Classical,
d_bsk.ms_noise_reduction_configuration.as_ref(),
),
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
@@ -369,15 +349,9 @@ impl CudaServerKey {
sbox_parallelism as u32,
self.message_modulus,
self.carry_modulus,
d_multibit_bsk.glwe_dimension,
d_multibit_bsk.polynomial_size,
d_multibit_bsk.input_lwe_dimension,
d_multibit_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_multibit_bsk.decomp_level_count,
d_multibit_bsk.decomp_base_log,
d_multibit_bsk.grouping_factor,
PBSType::MultiBit,
None,
)
}
@@ -417,15 +391,9 @@ impl CudaServerKey {
&computing_ks_key.d_vec,
self.message_modulus,
self.carry_modulus,
d_bsk.glwe_dimension,
d_bsk.polynomial_size,
d_bsk.input_lwe_dimension,
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count,
d_bsk.decomp_base_log,
LweBskGroupingFactor(0),
PBSType::Classical,
d_bsk.ms_noise_reduction_configuration.as_ref(),
);
}
@@ -438,15 +406,9 @@ impl CudaServerKey {
&computing_ks_key.d_vec,
self.message_modulus,
self.carry_modulus,
d_multibit_bsk.glwe_dimension,
d_multibit_bsk.polynomial_size,
d_multibit_bsk.input_lwe_dimension,
d_multibit_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_multibit_bsk.decomp_level_count,
d_multibit_bsk.decomp_base_log,
d_multibit_bsk.grouping_factor,
PBSType::MultiBit,
None,
);
}
@@ -465,15 +427,9 @@ impl CudaServerKey {
streams,
self.message_modulus,
self.carry_modulus,
d_bsk.glwe_dimension,
d_bsk.polynomial_size,
d_bsk.input_lwe_dimension,
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count,
d_bsk.decomp_base_log,
LweBskGroupingFactor(0),
PBSType::Classical,
d_bsk.ms_noise_reduction_configuration.as_ref(),
),
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
@@ -481,15 +437,9 @@ impl CudaServerKey {
streams,
self.message_modulus,
self.carry_modulus,
d_multibit_bsk.glwe_dimension,
d_multibit_bsk.polynomial_size,
d_multibit_bsk.input_lwe_dimension,
d_multibit_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_multibit_bsk.decomp_level_count,
d_multibit_bsk.decomp_base_log,
d_multibit_bsk.grouping_factor,
PBSType::MultiBit,
None,
)
}

View File

@@ -6,10 +6,9 @@ use crate::integer::gpu::server_key::{
CudaBootstrappingKey, CudaDynamicKeyswitchingKey, CudaServerKey,
};
use crate::core_crypto::prelude::LweBskGroupingFactor;
use crate::integer::gpu::{
cuda_backend_aes_key_expansion_256, cuda_backend_get_aes_key_expansion_256_size_on_gpu,
cuda_backend_unchecked_aes_ctr_256_encrypt, PBSType,
cuda_backend_unchecked_aes_ctr_256_encrypt,
};
use crate::integer::{RadixCiphertext, RadixClientKey};
@@ -218,15 +217,9 @@ impl CudaServerKey {
&computing_ks_key.d_vec,
self.message_modulus,
self.carry_modulus,
d_bsk.glwe_dimension,
d_bsk.polynomial_size,
d_bsk.input_lwe_dimension,
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count,
d_bsk.decomp_base_log,
LweBskGroupingFactor(0),
PBSType::Classical,
d_bsk.ms_noise_reduction_configuration.as_ref(),
);
}
@@ -243,15 +236,9 @@ impl CudaServerKey {
&computing_ks_key.d_vec,
self.message_modulus,
self.carry_modulus,
d_multibit_bsk.glwe_dimension,
d_multibit_bsk.polynomial_size,
d_multibit_bsk.input_lwe_dimension,
d_multibit_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_multibit_bsk.decomp_level_count,
d_multibit_bsk.decomp_base_log,
d_multibit_bsk.grouping_factor,
PBSType::MultiBit,
None,
);
}
@@ -295,15 +282,9 @@ impl CudaServerKey {
&computing_ks_key.d_vec,
self.message_modulus,
self.carry_modulus,
d_bsk.glwe_dimension,
d_bsk.polynomial_size,
d_bsk.input_lwe_dimension,
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count,
d_bsk.decomp_base_log,
LweBskGroupingFactor(0),
PBSType::Classical,
d_bsk.ms_noise_reduction_configuration.as_ref(),
);
}
@@ -316,15 +297,9 @@ impl CudaServerKey {
&computing_ks_key.d_vec,
self.message_modulus,
self.carry_modulus,
d_multibit_bsk.glwe_dimension,
d_multibit_bsk.polynomial_size,
d_multibit_bsk.input_lwe_dimension,
d_multibit_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_multibit_bsk.decomp_level_count,
d_multibit_bsk.decomp_base_log,
d_multibit_bsk.grouping_factor,
PBSType::MultiBit,
None,
);
}
@@ -344,15 +319,9 @@ impl CudaServerKey {
streams,
self.message_modulus,
self.carry_modulus,
d_bsk.glwe_dimension,
d_bsk.polynomial_size,
d_bsk.input_lwe_dimension,
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count,
d_bsk.decomp_base_log,
LweBskGroupingFactor(0),
PBSType::Classical,
d_bsk.ms_noise_reduction_configuration.as_ref(),
)
}
@@ -361,15 +330,9 @@ impl CudaServerKey {
streams,
self.message_modulus,
self.carry_modulus,
d_multibit_bsk.glwe_dimension,
d_multibit_bsk.polynomial_size,
d_multibit_bsk.input_lwe_dimension,
d_multibit_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_multibit_bsk.decomp_level_count,
d_multibit_bsk.decomp_base_log,
d_multibit_bsk.grouping_factor,
PBSType::MultiBit,
None,
)
}

View File

@@ -1,5 +1,4 @@
use crate::core_crypto::gpu::CudaStreams;
use crate::core_crypto::prelude::LweBskGroupingFactor;
use crate::integer::gpu::ciphertext::boolean_value::CudaBooleanBlock;
use crate::integer::gpu::ciphertext::{CudaIntegerRadixCiphertext, CudaRadixCiphertext};
use crate::integer::gpu::server_key::{CudaBootstrappingKey, CudaDynamicKeyswitchingKey};
@@ -8,7 +7,7 @@ use crate::integer::gpu::{
cuda_backend_get_bitop_size_on_gpu, cuda_backend_get_boolean_bitnot_size_on_gpu,
cuda_backend_get_boolean_bitop_size_on_gpu, cuda_backend_get_full_propagate_assign_size_on_gpu,
cuda_backend_unchecked_bitnot_assign, cuda_backend_unchecked_bitop_assign, BitOpType,
CudaServerKey, PBSType,
CudaServerKey,
};
impl CudaServerKey {
@@ -331,6 +330,18 @@ impl CudaServerKey {
unsafe {
match &self.bootstrapping_key {
CudaBootstrappingKey::Classic(d_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
cuda_backend_boolean_bitop_assign(
streams,
ct_left.0.as_mut(),
@@ -339,23 +350,28 @@ impl CudaServerKey {
&computing_ks_key.d_vec,
self.message_modulus,
self.carry_modulus,
d_bsk.glwe_dimension,
d_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count,
d_bsk.decomp_base_log,
op,
is_unchecked,
1u32,
PBSType::Classical,
LweBskGroupingFactor(0),
d_bsk.ms_noise_reduction_configuration.as_ref(),
);
}
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_multibit_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
cuda_backend_boolean_bitop_assign(
streams,
ct_left.0.as_mut(),
@@ -364,19 +380,12 @@ impl CudaServerKey {
&computing_ks_key.d_vec,
self.message_modulus,
self.carry_modulus,
d_multibit_bsk.glwe_dimension,
d_multibit_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_multibit_bsk.decomp_level_count,
d_multibit_bsk.decomp_base_log,
op,
is_unchecked,
1u32,
PBSType::MultiBit,
d_multibit_bsk.grouping_factor,
None,
);
}
@@ -423,16 +432,9 @@ impl CudaServerKey {
&computing_ks_key.d_vec,
self.message_modulus,
self.carry_modulus,
d_bsk.glwe_dimension(),
d_bsk.polynomial_size(),
d_bsk.output_lwe_dimension(),
d_bsk.input_lwe_dimension(),
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count(),
d_bsk.decomp_base_log(),
PBSType::Classical,
LweBskGroupingFactor(0),
d_bsk.ms_noise_reduction_configuration.as_ref(),
);
}
@@ -445,16 +447,9 @@ impl CudaServerKey {
&computing_ks_key.d_vec,
self.message_modulus,
self.carry_modulus,
d_multibit_bsk.glwe_dimension(),
d_multibit_bsk.polynomial_size(),
d_multibit_bsk.output_lwe_dimension(),
d_multibit_bsk.input_lwe_dimension(),
d_multibit_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_multibit_bsk.decomp_level_count(),
d_multibit_bsk.decomp_base_log(),
PBSType::MultiBit,
d_multibit_bsk.grouping_factor,
None,
);
}
@@ -541,6 +536,18 @@ impl CudaServerKey {
unsafe {
match &self.bootstrapping_key {
CudaBootstrappingKey::Classic(d_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
cuda_backend_unchecked_bitop_assign(
streams,
ct_left.as_mut(),
@@ -549,22 +556,27 @@ impl CudaServerKey {
&computing_ks_key.d_vec,
self.message_modulus,
self.carry_modulus,
d_bsk.glwe_dimension,
d_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count,
d_bsk.decomp_base_log,
op,
lwe_ciphertext_count.0 as u32,
PBSType::Classical,
LweBskGroupingFactor(0),
d_bsk.ms_noise_reduction_configuration.as_ref(),
);
}
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_multibit_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
cuda_backend_unchecked_bitop_assign(
streams,
ct_left.as_mut(),
@@ -573,18 +585,11 @@ impl CudaServerKey {
&computing_ks_key.d_vec,
self.message_modulus,
self.carry_modulus,
d_multibit_bsk.glwe_dimension,
d_multibit_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_multibit_bsk.decomp_level_count,
d_multibit_bsk.decomp_base_log,
op,
lwe_ciphertext_count.0 as u32,
PBSType::MultiBit,
d_multibit_bsk.grouping_factor,
None,
);
}
@@ -612,43 +617,55 @@ impl CudaServerKey {
};
let boolean_bitop_mem = match &self.bootstrapping_key {
CudaBootstrappingKey::Classic(d_bsk) => cuda_backend_get_boolean_bitop_size_on_gpu(
streams,
self.message_modulus,
self.carry_modulus,
d_bsk.glwe_dimension,
d_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count,
d_bsk.decomp_base_log,
op,
false,
1u32,
PBSType::Classical,
LweBskGroupingFactor(0),
d_bsk.ms_noise_reduction_configuration.as_ref(),
),
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
CudaBootstrappingKey::Classic(d_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
cuda_backend_get_boolean_bitop_size_on_gpu(
streams,
self.message_modulus,
self.carry_modulus,
d_multibit_bsk.glwe_dimension,
d_multibit_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
op,
false,
1u32,
d_bsk.ms_noise_reduction_configuration.as_ref(),
)
}
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_multibit_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
cuda_backend_get_boolean_bitop_size_on_gpu(
streams,
self.message_modulus,
self.carry_modulus,
d_multibit_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_multibit_bsk.decomp_level_count,
d_multibit_bsk.decomp_base_log,
op,
false,
1u32,
PBSType::MultiBit,
d_multibit_bsk.grouping_factor,
None,
)
}
@@ -679,34 +696,22 @@ impl CudaServerKey {
CudaBootstrappingKey::Classic(d_bsk) => {
cuda_backend_get_full_propagate_assign_size_on_gpu(
streams,
d_bsk.input_lwe_dimension(),
d_bsk.glwe_dimension(),
d_bsk.polynomial_size(),
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count(),
d_bsk.decomp_base_log(),
self.message_modulus,
self.carry_modulus,
PBSType::Classical,
LweBskGroupingFactor(0),
d_bsk.ms_noise_reduction_configuration.as_ref(),
)
}
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
cuda_backend_get_full_propagate_assign_size_on_gpu(
streams,
d_multibit_bsk.input_lwe_dimension(),
d_multibit_bsk.glwe_dimension(),
d_multibit_bsk.polynomial_size(),
d_multibit_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_multibit_bsk.decomp_level_count(),
d_multibit_bsk.decomp_base_log(),
self.message_modulus,
self.carry_modulus,
PBSType::MultiBit,
d_multibit_bsk.grouping_factor,
None,
)
}
@@ -724,42 +729,56 @@ impl CudaServerKey {
let lwe_ciphertext_count = ct_left.as_ref().d_blocks.lwe_ciphertext_count();
let bitop_mem = match &self.bootstrapping_key {
CudaBootstrappingKey::Classic(d_bsk) => cuda_backend_get_bitop_size_on_gpu(
streams,
self.message_modulus,
self.carry_modulus,
d_bsk.glwe_dimension,
d_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count,
d_bsk.decomp_base_log,
op,
lwe_ciphertext_count.0 as u32,
PBSType::Classical,
LweBskGroupingFactor(0),
d_bsk.ms_noise_reduction_configuration.as_ref(),
),
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => cuda_backend_get_bitop_size_on_gpu(
streams,
self.message_modulus,
self.carry_modulus,
d_multibit_bsk.glwe_dimension,
d_multibit_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_multibit_bsk.decomp_level_count,
d_multibit_bsk.decomp_base_log,
op,
lwe_ciphertext_count.0 as u32,
PBSType::MultiBit,
d_multibit_bsk.grouping_factor,
None,
),
CudaBootstrappingKey::Classic(d_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
cuda_backend_get_bitop_size_on_gpu(
streams,
self.message_modulus,
self.carry_modulus,
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
op,
lwe_ciphertext_count.0 as u32,
d_bsk.ms_noise_reduction_configuration.as_ref(),
)
}
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_multibit_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
cuda_backend_get_bitop_size_on_gpu(
streams,
self.message_modulus,
self.carry_modulus,
d_multibit_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
op,
lwe_ciphertext_count.0 as u32,
None,
)
}
};
actual_full_prop_mem.max(bitop_mem)
}
@@ -1280,41 +1299,53 @@ impl CudaServerKey {
};
let boolean_bitnot_mem = match &self.bootstrapping_key {
CudaBootstrappingKey::Classic(d_bsk) => cuda_backend_get_boolean_bitnot_size_on_gpu(
streams,
self.message_modulus,
self.carry_modulus,
d_bsk.glwe_dimension,
d_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count,
d_bsk.decomp_base_log,
false,
1u32,
PBSType::Classical,
LweBskGroupingFactor(0),
d_bsk.ms_noise_reduction_configuration.as_ref(),
),
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
CudaBootstrappingKey::Classic(d_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
cuda_backend_get_boolean_bitnot_size_on_gpu(
streams,
self.message_modulus,
self.carry_modulus,
d_multibit_bsk.glwe_dimension,
d_multibit_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
false,
1u32,
d_bsk.ms_noise_reduction_configuration.as_ref(),
)
}
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_multibit_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
cuda_backend_get_boolean_bitnot_size_on_gpu(
streams,
self.message_modulus,
self.carry_modulus,
d_multibit_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_multibit_bsk.decomp_level_count,
d_multibit_bsk.decomp_base_log,
false,
1u32,
PBSType::MultiBit,
d_multibit_bsk.grouping_factor,
None,
)
}
@@ -1339,34 +1370,22 @@ impl CudaServerKey {
CudaBootstrappingKey::Classic(d_bsk) => {
cuda_backend_get_full_propagate_assign_size_on_gpu(
streams,
d_bsk.input_lwe_dimension(),
d_bsk.glwe_dimension(),
d_bsk.polynomial_size(),
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count(),
d_bsk.decomp_base_log(),
self.message_modulus,
self.carry_modulus,
PBSType::Classical,
LweBskGroupingFactor(0),
d_bsk.ms_noise_reduction_configuration.as_ref(),
)
}
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
cuda_backend_get_full_propagate_assign_size_on_gpu(
streams,
d_multibit_bsk.input_lwe_dimension(),
d_multibit_bsk.glwe_dimension(),
d_multibit_bsk.polynomial_size(),
d_multibit_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_multibit_bsk.decomp_level_count(),
d_multibit_bsk.decomp_base_log(),
self.message_modulus,
self.carry_modulus,
PBSType::MultiBit,
d_multibit_bsk.grouping_factor,
None,
)
}

View File

@@ -1,11 +1,10 @@
use crate::core_crypto::gpu::CudaStreams;
use crate::core_crypto::prelude::LweBskGroupingFactor;
use crate::integer::gpu::ciphertext::boolean_value::CudaBooleanBlock;
use crate::integer::gpu::ciphertext::CudaIntegerRadixCiphertext;
use crate::integer::gpu::server_key::{CudaBootstrappingKey, CudaDynamicKeyswitchingKey};
use crate::integer::gpu::{
cuda_backend_get_cmux_size_on_gpu, cuda_backend_get_full_propagate_assign_size_on_gpu,
cuda_backend_unchecked_cmux, CudaServerKey, PBSType,
cuda_backend_unchecked_cmux, CudaServerKey,
};
impl CudaServerKey {
@@ -27,6 +26,18 @@ impl CudaServerKey {
unsafe {
match &self.bootstrapping_key {
CudaBootstrappingKey::Classic(d_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
cuda_backend_unchecked_cmux(
stream,
result.as_mut(),
@@ -37,21 +48,26 @@ impl CudaServerKey {
&computing_ks_key.d_vec,
self.message_modulus,
self.carry_modulus,
d_bsk.glwe_dimension,
d_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count,
d_bsk.decomp_base_log,
lwe_ciphertext_count.0 as u32,
PBSType::Classical,
LweBskGroupingFactor(0),
d_bsk.ms_noise_reduction_configuration.as_ref(),
);
}
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_multibit_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
cuda_backend_unchecked_cmux(
stream,
result.as_mut(),
@@ -62,17 +78,10 @@ impl CudaServerKey {
&computing_ks_key.d_vec,
self.message_modulus,
self.carry_modulus,
d_multibit_bsk.glwe_dimension,
d_multibit_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_multibit_bsk.decomp_level_count,
d_multibit_bsk.decomp_base_log,
lwe_ciphertext_count.0 as u32,
PBSType::MultiBit,
d_multibit_bsk.grouping_factor,
None,
);
}
@@ -133,34 +142,22 @@ impl CudaServerKey {
CudaBootstrappingKey::Classic(d_bsk) => {
cuda_backend_get_full_propagate_assign_size_on_gpu(
streams,
d_bsk.input_lwe_dimension(),
d_bsk.glwe_dimension(),
d_bsk.polynomial_size(),
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count(),
d_bsk.decomp_base_log(),
self.message_modulus,
self.carry_modulus,
PBSType::Classical,
LweBskGroupingFactor(0),
d_bsk.ms_noise_reduction_configuration.as_ref(),
)
}
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
cuda_backend_get_full_propagate_assign_size_on_gpu(
streams,
d_multibit_bsk.input_lwe_dimension(),
d_multibit_bsk.glwe_dimension(),
d_multibit_bsk.polynomial_size(),
d_multibit_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_multibit_bsk.decomp_level_count(),
d_multibit_bsk.decomp_base_log(),
self.message_modulus,
self.carry_modulus,
PBSType::MultiBit,
d_multibit_bsk.grouping_factor,
None,
)
}
@@ -178,40 +175,54 @@ impl CudaServerKey {
let lwe_ciphertext_count = true_ct.as_ref().d_blocks.lwe_ciphertext_count();
let cmux_mem = match &self.bootstrapping_key {
CudaBootstrappingKey::Classic(d_bsk) => cuda_backend_get_cmux_size_on_gpu(
streams,
self.message_modulus,
self.carry_modulus,
d_bsk.glwe_dimension,
d_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count,
d_bsk.decomp_base_log,
lwe_ciphertext_count.0 as u32,
PBSType::Classical,
LweBskGroupingFactor(0),
d_bsk.ms_noise_reduction_configuration.as_ref(),
),
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => cuda_backend_get_cmux_size_on_gpu(
streams,
self.message_modulus,
self.carry_modulus,
d_multibit_bsk.glwe_dimension,
d_multibit_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_multibit_bsk.decomp_level_count,
d_multibit_bsk.decomp_base_log,
lwe_ciphertext_count.0 as u32,
PBSType::MultiBit,
d_multibit_bsk.grouping_factor,
None,
),
CudaBootstrappingKey::Classic(d_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
cuda_backend_get_cmux_size_on_gpu(
streams,
self.message_modulus,
self.carry_modulus,
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
lwe_ciphertext_count.0 as u32,
d_bsk.ms_noise_reduction_configuration.as_ref(),
)
}
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_multibit_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
cuda_backend_get_cmux_size_on_gpu(
streams,
self.message_modulus,
self.carry_modulus,
d_multibit_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
lwe_ciphertext_count.0 as u32,
None,
)
}
};
actual_full_prop_mem.max(cmux_mem)
}

View File

@@ -1,13 +1,13 @@
use crate::core_crypto::gpu::lwe_ciphertext_list::CudaLweCiphertextList;
use crate::core_crypto::gpu::CudaStreams;
use crate::core_crypto::prelude::{LweBskGroupingFactor, LweCiphertextCount};
use crate::core_crypto::prelude::LweCiphertextCount;
use crate::integer::gpu::ciphertext::boolean_value::CudaBooleanBlock;
use crate::integer::gpu::ciphertext::info::CudaRadixCiphertextInfo;
use crate::integer::gpu::ciphertext::{CudaIntegerRadixCiphertext, CudaRadixCiphertext};
use crate::integer::gpu::server_key::{CudaBootstrappingKey, CudaDynamicKeyswitchingKey};
use crate::integer::gpu::{
cuda_backend_get_comparison_size_on_gpu, cuda_backend_get_full_propagate_assign_size_on_gpu,
cuda_backend_unchecked_comparison, ComparisonType, CudaServerKey, PBSType,
cuda_backend_unchecked_comparison, ComparisonType, CudaServerKey,
};
use crate::shortint::ciphertext::Degree;
@@ -52,6 +52,18 @@ impl CudaServerKey {
unsafe {
match &self.bootstrapping_key {
CudaBootstrappingKey::Classic(d_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
cuda_backend_unchecked_comparison(
streams,
result.as_mut().as_mut(),
@@ -61,22 +73,27 @@ impl CudaServerKey {
&computing_ks_key.d_vec,
self.message_modulus,
self.carry_modulus,
d_bsk.glwe_dimension,
d_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count,
d_bsk.decomp_base_log,
op,
T::IS_SIGNED,
PBSType::Classical,
LweBskGroupingFactor(0),
d_bsk.ms_noise_reduction_configuration.as_ref(),
);
}
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_multibit_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
cuda_backend_unchecked_comparison(
streams,
result.as_mut().as_mut(),
@@ -86,18 +103,11 @@ impl CudaServerKey {
&computing_ks_key.d_vec,
self.message_modulus,
self.carry_modulus,
d_multibit_bsk.glwe_dimension,
d_multibit_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_multibit_bsk.decomp_level_count,
d_multibit_bsk.decomp_base_log,
op,
T::IS_SIGNED,
PBSType::MultiBit,
d_multibit_bsk.grouping_factor,
None,
);
}
@@ -230,34 +240,22 @@ impl CudaServerKey {
CudaBootstrappingKey::Classic(d_bsk) => {
cuda_backend_get_full_propagate_assign_size_on_gpu(
streams,
d_bsk.input_lwe_dimension(),
d_bsk.glwe_dimension(),
d_bsk.polynomial_size(),
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count(),
d_bsk.decomp_base_log(),
self.message_modulus,
self.carry_modulus,
PBSType::Classical,
LweBskGroupingFactor(0),
d_bsk.ms_noise_reduction_configuration.as_ref(),
)
}
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
cuda_backend_get_full_propagate_assign_size_on_gpu(
streams,
d_multibit_bsk.input_lwe_dimension(),
d_multibit_bsk.glwe_dimension(),
d_multibit_bsk.polynomial_size(),
d_multibit_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_multibit_bsk.decomp_level_count(),
d_multibit_bsk.decomp_base_log(),
self.message_modulus,
self.carry_modulus,
PBSType::MultiBit,
d_multibit_bsk.grouping_factor,
None,
)
}
@@ -278,43 +276,55 @@ impl CudaServerKey {
};
let comparison_mem = match &self.bootstrapping_key {
CudaBootstrappingKey::Classic(d_bsk) => cuda_backend_get_comparison_size_on_gpu(
streams,
self.message_modulus,
self.carry_modulus,
d_bsk.glwe_dimension,
d_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count,
d_bsk.decomp_base_log,
lwe_ciphertext_count.0 as u32,
op,
T::IS_SIGNED,
PBSType::Classical,
LweBskGroupingFactor(0),
d_bsk.ms_noise_reduction_configuration.as_ref(),
),
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
CudaBootstrappingKey::Classic(d_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
cuda_backend_get_comparison_size_on_gpu(
streams,
self.message_modulus,
self.carry_modulus,
d_multibit_bsk.glwe_dimension,
d_multibit_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
lwe_ciphertext_count.0 as u32,
op,
T::IS_SIGNED,
d_bsk.ms_noise_reduction_configuration.as_ref(),
)
}
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_multibit_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
cuda_backend_get_comparison_size_on_gpu(
streams,
self.message_modulus,
self.carry_modulus,
d_multibit_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_multibit_bsk.decomp_level_count,
d_multibit_bsk.decomp_base_log,
lwe_ciphertext_count.0 as u32,
op,
T::IS_SIGNED,
PBSType::MultiBit,
d_multibit_bsk.grouping_factor,
None,
)
}
@@ -824,6 +834,18 @@ impl CudaServerKey {
unsafe {
match &self.bootstrapping_key {
CudaBootstrappingKey::Classic(d_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
cuda_backend_unchecked_comparison(
streams,
result.as_mut(),
@@ -833,22 +855,27 @@ impl CudaServerKey {
&computing_ks_key.d_vec,
self.message_modulus,
self.carry_modulus,
d_bsk.glwe_dimension,
d_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count,
d_bsk.decomp_base_log,
ComparisonType::MAX,
T::IS_SIGNED,
PBSType::Classical,
LweBskGroupingFactor(0),
d_bsk.ms_noise_reduction_configuration.as_ref(),
);
}
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_multibit_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
cuda_backend_unchecked_comparison(
streams,
result.as_mut(),
@@ -858,18 +885,11 @@ impl CudaServerKey {
&computing_ks_key.d_vec,
self.message_modulus,
self.carry_modulus,
d_multibit_bsk.glwe_dimension,
d_multibit_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_multibit_bsk.decomp_level_count,
d_multibit_bsk.decomp_base_log,
ComparisonType::MAX,
T::IS_SIGNED,
PBSType::MultiBit,
d_multibit_bsk.grouping_factor,
None,
);
}
@@ -900,6 +920,18 @@ impl CudaServerKey {
unsafe {
match &self.bootstrapping_key {
CudaBootstrappingKey::Classic(d_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
cuda_backend_unchecked_comparison(
streams,
result.as_mut(),
@@ -909,22 +941,27 @@ impl CudaServerKey {
&computing_ks_key.d_vec,
self.message_modulus,
self.carry_modulus,
d_bsk.glwe_dimension,
d_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count,
d_bsk.decomp_base_log,
ComparisonType::MIN,
T::IS_SIGNED,
PBSType::Classical,
LweBskGroupingFactor(0),
d_bsk.ms_noise_reduction_configuration.as_ref(),
);
}
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_multibit_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
cuda_backend_unchecked_comparison(
streams,
result.as_mut(),
@@ -934,18 +971,11 @@ impl CudaServerKey {
&computing_ks_key.d_vec,
self.message_modulus,
self.carry_modulus,
d_multibit_bsk.glwe_dimension,
d_multibit_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_multibit_bsk.decomp_level_count,
d_multibit_bsk.decomp_base_log,
ComparisonType::MIN,
T::IS_SIGNED,
PBSType::MultiBit,
d_multibit_bsk.grouping_factor,
None,
);
}

View File

@@ -1,12 +1,11 @@
use crate::core_crypto::gpu::CudaStreams;
use crate::core_crypto::prelude::LweBskGroupingFactor;
use crate::integer::gpu::ciphertext::CudaIntegerRadixCiphertext;
use crate::integer::gpu::server_key::{
CudaBootstrappingKey, CudaDynamicKeyswitchingKey, CudaServerKey,
};
use crate::integer::gpu::{
cuda_backend_get_div_rem_size_on_gpu, cuda_backend_get_full_propagate_assign_size_on_gpu,
cuda_backend_unchecked_div_rem_assign, PBSType,
cuda_backend_unchecked_div_rem_assign,
};
impl CudaServerKey {
@@ -29,6 +28,18 @@ impl CudaServerKey {
unsafe {
match &self.bootstrapping_key {
CudaBootstrappingKey::Classic(d_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
cuda_backend_unchecked_div_rem_assign(
streams,
quotient.as_mut(),
@@ -40,21 +51,26 @@ impl CudaServerKey {
&computing_ks_key.d_vec,
self.message_modulus,
self.carry_modulus,
d_bsk.glwe_dimension,
d_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count,
d_bsk.decomp_base_log,
num_blocks,
PBSType::Classical,
LweBskGroupingFactor(0),
d_bsk.ms_noise_reduction_configuration.as_ref(),
);
}
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_multibit_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
cuda_backend_unchecked_div_rem_assign(
streams,
quotient.as_mut(),
@@ -66,17 +82,10 @@ impl CudaServerKey {
&computing_ks_key.d_vec,
self.message_modulus,
self.carry_modulus,
d_multibit_bsk.glwe_dimension,
d_multibit_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_multibit_bsk.decomp_level_count,
d_multibit_bsk.decomp_base_log,
num_blocks,
PBSType::MultiBit,
d_multibit_bsk.grouping_factor,
None,
);
}
@@ -233,34 +242,22 @@ impl CudaServerKey {
CudaBootstrappingKey::Classic(d_bsk) => {
cuda_backend_get_full_propagate_assign_size_on_gpu(
streams,
d_bsk.input_lwe_dimension(),
d_bsk.glwe_dimension(),
d_bsk.polynomial_size(),
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count(),
d_bsk.decomp_base_log(),
self.message_modulus,
self.carry_modulus,
PBSType::Classical,
LweBskGroupingFactor(0),
d_bsk.ms_noise_reduction_configuration.as_ref(),
)
}
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
cuda_backend_get_full_propagate_assign_size_on_gpu(
streams,
d_multibit_bsk.input_lwe_dimension(),
d_multibit_bsk.glwe_dimension(),
d_multibit_bsk.polynomial_size(),
d_multibit_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_multibit_bsk.decomp_level_count(),
d_multibit_bsk.decomp_base_log(),
self.message_modulus,
self.carry_modulus,
PBSType::MultiBit,
d_multibit_bsk.grouping_factor,
None,
)
}
@@ -278,42 +275,56 @@ impl CudaServerKey {
let lwe_ciphertext_count = numerator.as_ref().d_blocks.lwe_ciphertext_count();
let mul_mem = match &self.bootstrapping_key {
CudaBootstrappingKey::Classic(d_bsk) => cuda_backend_get_div_rem_size_on_gpu(
streams,
T::IS_SIGNED,
self.message_modulus,
self.carry_modulus,
d_bsk.glwe_dimension,
d_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count,
d_bsk.decomp_base_log,
lwe_ciphertext_count.0 as u32,
PBSType::Classical,
LweBskGroupingFactor(0),
d_bsk.ms_noise_reduction_configuration.as_ref(),
),
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => cuda_backend_get_div_rem_size_on_gpu(
streams,
T::IS_SIGNED,
self.message_modulus,
self.carry_modulus,
d_multibit_bsk.glwe_dimension,
d_multibit_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_multibit_bsk.decomp_level_count,
d_multibit_bsk.decomp_base_log,
lwe_ciphertext_count.0 as u32,
PBSType::MultiBit,
d_multibit_bsk.grouping_factor,
None,
),
CudaBootstrappingKey::Classic(d_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
cuda_backend_get_div_rem_size_on_gpu(
streams,
T::IS_SIGNED,
self.message_modulus,
self.carry_modulus,
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
lwe_ciphertext_count.0 as u32,
d_bsk.ms_noise_reduction_configuration.as_ref(),
)
}
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_multibit_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
cuda_backend_get_div_rem_size_on_gpu(
streams,
T::IS_SIGNED,
self.message_modulus,
self.carry_modulus,
d_multibit_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
lwe_ciphertext_count.0 as u32,
None,
)
}
};
actual_full_prop_mem.max(mul_mem)
}

View File

@@ -1,5 +1,4 @@
use crate::core_crypto::gpu::CudaStreams;
use crate::core_crypto::prelude::LweBskGroupingFactor;
use crate::integer::gpu::ciphertext::boolean_value::CudaBooleanBlock;
use crate::integer::gpu::ciphertext::{
CudaIntegerRadixCiphertext, CudaSignedRadixCiphertext, CudaUnsignedRadixCiphertext,
@@ -7,7 +6,7 @@ use crate::integer::gpu::ciphertext::{
use crate::integer::gpu::server_key::{
CudaBootstrappingKey, CudaDynamicKeyswitchingKey, CudaServerKey,
};
use crate::integer::gpu::{cuda_backend_count_of_consecutive_bits, cuda_backend_ilog2, PBSType};
use crate::integer::gpu::{cuda_backend_count_of_consecutive_bits, cuda_backend_ilog2};
use crate::integer::server_key::radix_parallel::ilog2::{BitValue, Direction};
impl CudaServerKey {
@@ -49,17 +48,11 @@ impl CudaServerKey {
ct.as_ref(),
&d_bsk.d_vec,
&computing_ks_key.d_vec,
d_bsk.input_lwe_dimension(),
d_bsk.glwe_dimension(),
d_bsk.polynomial_size(),
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count(),
d_bsk.decomp_base_log(),
self.message_modulus,
self.carry_modulus,
PBSType::Classical,
LweBskGroupingFactor(0),
direction as u32,
bit_value as u32,
d_bsk.ms_noise_reduction_configuration.as_ref(),
@@ -72,17 +65,11 @@ impl CudaServerKey {
ct.as_ref(),
&d_multibit_bsk.d_vec,
&computing_ks_key.d_vec,
d_multibit_bsk.input_lwe_dimension(),
d_multibit_bsk.glwe_dimension(),
d_multibit_bsk.polynomial_size(),
d_multibit_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_multibit_bsk.decomp_level_count(),
d_multibit_bsk.decomp_base_log(),
self.message_modulus,
self.carry_modulus,
PBSType::MultiBit,
d_multibit_bsk.grouping_factor,
direction as u32,
bit_value as u32,
None,
@@ -212,17 +199,11 @@ impl CudaServerKey {
trivial_ct_m_minus_1_block.as_ref(),
&d_bsk.d_vec,
&computing_ks_key.d_vec,
d_bsk.input_lwe_dimension(),
d_bsk.glwe_dimension(),
d_bsk.polynomial_size(),
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count(),
d_bsk.decomp_base_log(),
LweBskGroupingFactor(0),
self.message_modulus,
self.carry_modulus,
PBSType::Classical,
input_num_blocks as u32,
counter_num_blocks as u32,
num_bits_in_ciphertext,
@@ -239,17 +220,11 @@ impl CudaServerKey {
trivial_ct_m_minus_1_block.as_ref(),
&d_multibit_bsk.d_vec,
&computing_ks_key.d_vec,
d_multibit_bsk.input_lwe_dimension(),
d_multibit_bsk.glwe_dimension(),
d_multibit_bsk.polynomial_size(),
d_multibit_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_multibit_bsk.decomp_level_count(),
d_multibit_bsk.decomp_base_log(),
d_multibit_bsk.grouping_factor,
self.message_modulus,
self.carry_modulus,
PBSType::MultiBit,
input_num_blocks as u32,
counter_num_blocks as u32,
num_bits_in_ciphertext,

View File

@@ -1,10 +1,9 @@
use crate::core_crypto::gpu::CudaStreams;
use crate::integer::gpu::ciphertext::{CudaIntegerRadixCiphertext, CudaUnsignedRadixCiphertext};
use crate::integer::gpu::cuda_backend_kreyvium_generate_keystream;
use crate::integer::gpu::server_key::{
CudaBootstrappingKey, CudaDynamicKeyswitchingKey, CudaServerKey,
};
use crate::integer::gpu::{cuda_backend_kreyvium_generate_keystream, PBSType};
use crate::shortint::parameters::LweBskGroupingFactor;
impl CudaServerKey {
pub fn kreyvium_generate_keystream(
@@ -63,15 +62,9 @@ impl CudaServerKey {
&computing_ks_key.d_vec,
self.message_modulus,
self.carry_modulus,
d_bsk.glwe_dimension,
d_bsk.polynomial_size,
d_bsk.input_lwe_dimension,
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count,
d_bsk.decomp_base_log,
LweBskGroupingFactor(0),
PBSType::Classical,
d_bsk.ms_noise_reduction_configuration.as_ref(),
num_steps as u32,
);
@@ -86,15 +79,9 @@ impl CudaServerKey {
&computing_ks_key.d_vec,
self.message_modulus,
self.carry_modulus,
d_multibit_bsk.glwe_dimension,
d_multibit_bsk.polynomial_size,
d_multibit_bsk.input_lwe_dimension,
d_multibit_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_multibit_bsk.decomp_level_count,
d_multibit_bsk.decomp_base_log,
d_multibit_bsk.grouping_factor,
PBSType::MultiBit,
None,
num_steps as u32,
);

View File

@@ -2,9 +2,7 @@ use crate::core_crypto::entities::{Cleartext, GlweCiphertext, LweCiphertextList}
use crate::core_crypto::gpu::lwe_ciphertext_list::CudaLweCiphertextList;
use crate::core_crypto::gpu::vec::CudaVec;
use crate::core_crypto::gpu::CudaStreams;
use crate::core_crypto::prelude::{
ContiguousEntityContainerMut, LweBskGroupingFactor, LweCiphertextCount,
};
use crate::core_crypto::prelude::{ContiguousEntityContainerMut, LweCiphertextCount};
use crate::integer::block_decomposition::{BlockDecomposer, DecomposableInto};
use crate::integer::gpu::ciphertext::boolean_value::CudaBooleanBlock;
use crate::integer::gpu::ciphertext::info::{CudaBlockInfo, CudaRadixCiphertextInfo};
@@ -20,7 +18,7 @@ use crate::integer::gpu::{
cuda_backend_cast_to_signed, cuda_backend_cast_to_unsigned,
cuda_backend_extend_radix_with_trivial_zero_blocks_msb, cuda_backend_full_propagate_assign,
cuda_backend_noise_squashing, cuda_backend_propagate_single_carry_assign,
cuda_backend_trim_radix_blocks_lsb, cuda_backend_trim_radix_blocks_msb, CudaServerKey, PBSType,
cuda_backend_trim_radix_blocks_lsb, cuda_backend_trim_radix_blocks_msb, CudaServerKey,
};
use crate::integer::server_key::radix_parallel::OutputFlag;
use crate::shortint::ciphertext::{Degree, NoiseLevel};
@@ -255,18 +253,12 @@ impl CudaServerKey {
in_carry,
&d_bsk.d_vec,
&computing_ks_key.d_vec,
d_bsk.input_lwe_dimension(),
d_bsk.glwe_dimension(),
d_bsk.polynomial_size(),
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count(),
d_bsk.decomp_base_log(),
num_blocks,
ciphertext.info.blocks.first().unwrap().message_modulus,
ciphertext.info.blocks.first().unwrap().carry_modulus,
PBSType::Classical,
LweBskGroupingFactor(0),
requested_flag,
uses_carry,
d_bsk.ms_noise_reduction_configuration.as_ref(),
@@ -280,18 +272,12 @@ impl CudaServerKey {
in_carry,
&d_multibit_bsk.d_vec,
&computing_ks_key.d_vec,
d_multibit_bsk.input_lwe_dimension(),
d_multibit_bsk.glwe_dimension(),
d_multibit_bsk.polynomial_size(),
d_multibit_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_multibit_bsk.decomp_level_count(),
d_multibit_bsk.decomp_base_log(),
num_blocks,
ciphertext.info.blocks.first().unwrap().message_modulus,
ciphertext.info.blocks.first().unwrap().carry_modulus,
PBSType::MultiBit,
d_multibit_bsk.grouping_factor,
requested_flag,
uses_carry,
None,
@@ -321,18 +307,12 @@ impl CudaServerKey {
ciphertext,
&d_bsk.d_vec,
&computing_ks_key.d_vec,
d_bsk.input_lwe_dimension(),
d_bsk.glwe_dimension(),
d_bsk.polynomial_size(),
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count(),
d_bsk.decomp_base_log(),
num_blocks,
ciphertext.info.blocks.first().unwrap().message_modulus,
ciphertext.info.blocks.first().unwrap().carry_modulus,
PBSType::Classical,
LweBskGroupingFactor(0),
d_bsk.ms_noise_reduction_configuration.as_ref(),
);
}
@@ -342,18 +322,12 @@ impl CudaServerKey {
ciphertext,
&d_multibit_bsk.d_vec,
&computing_ks_key.d_vec,
d_multibit_bsk.input_lwe_dimension(),
d_multibit_bsk.glwe_dimension(),
d_multibit_bsk.polynomial_size(),
d_multibit_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_multibit_bsk.decomp_level_count(),
d_multibit_bsk.decomp_base_log(),
num_blocks,
ciphertext.info.blocks.first().unwrap().message_modulus,
ciphertext.info.blocks.first().unwrap().carry_modulus,
PBSType::MultiBit,
d_multibit_bsk.grouping_factor,
None,
);
}
@@ -711,6 +685,11 @@ impl CudaServerKey {
unsafe {
match &self.bootstrapping_key {
CudaBootstrappingKey::Classic(d_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
cuda_backend_apply_univariate_lut(
streams,
&mut output_slice,
@@ -721,22 +700,21 @@ impl CudaServerKey {
lut.degree.0,
&d_bsk.d_vec,
&computing_ks_key.d_vec,
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk.glwe_dimension,
d_bsk.polynomial_size,
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count,
d_bsk.decomp_base_log,
num_ct_blocks,
self.message_modulus,
self.carry_modulus,
PBSType::Classical,
LweBskGroupingFactor(0),
d_bsk.ms_noise_reduction_configuration.as_ref(),
);
}
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
cuda_backend_apply_univariate_lut(
streams,
&mut output_slice,
@@ -747,18 +725,12 @@ impl CudaServerKey {
lut.degree.0,
&d_multibit_bsk.d_vec,
&computing_ks_key.d_vec,
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk.glwe_dimension,
d_multibit_bsk.polynomial_size,
d_multibit_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_multibit_bsk.decomp_level_count,
d_multibit_bsk.decomp_base_log,
num_ct_blocks,
self.message_modulus,
self.carry_modulus,
PBSType::MultiBit,
d_multibit_bsk.grouping_factor,
None,
);
}
@@ -881,6 +853,11 @@ impl CudaServerKey {
unsafe {
match &self.bootstrapping_key {
CudaBootstrappingKey::Classic(d_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
cuda_backend_apply_many_univariate_lut(
streams,
&mut output_slice,
@@ -891,24 +868,23 @@ impl CudaServerKey {
lut.input_max_degree.0,
&d_bsk.d_vec,
&computing_ks_key.d_vec,
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk.glwe_dimension,
d_bsk.polynomial_size,
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count,
d_bsk.decomp_base_log,
num_ct_blocks as u32,
self.message_modulus,
self.carry_modulus,
PBSType::Classical,
LweBskGroupingFactor(0),
function_count as u32,
lut.sample_extraction_stride as u32,
d_bsk.ms_noise_reduction_configuration.as_ref(),
);
}
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
cuda_backend_apply_many_univariate_lut(
streams,
&mut output_slice,
@@ -919,18 +895,12 @@ impl CudaServerKey {
lut.input_max_degree.0,
&d_multibit_bsk.d_vec,
&computing_ks_key.d_vec,
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk.glwe_dimension,
d_multibit_bsk.polynomial_size,
d_multibit_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_multibit_bsk.decomp_level_count,
d_multibit_bsk.decomp_base_log,
num_ct_blocks as u32,
self.message_modulus,
self.carry_modulus,
PBSType::MultiBit,
d_multibit_bsk.grouping_factor,
function_count as u32,
lut.sample_extraction_stride as u32,
None,
@@ -1021,6 +991,18 @@ impl CudaServerKey {
unsafe {
match &self.bootstrapping_key {
CudaBootstrappingKey::Classic(d_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
cuda_backend_cast_to_unsigned(
streams,
result.as_mut(),
@@ -1030,20 +1012,25 @@ impl CudaServerKey {
target_num_blocks as u32,
&d_bsk.d_vec,
&computing_ks_key.d_vec,
d_bsk.glwe_dimension,
d_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count,
d_bsk.decomp_base_log,
PBSType::Classical,
LweBskGroupingFactor(0),
d_bsk.ms_noise_reduction_configuration.as_ref(),
);
}
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_multibit_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
cuda_backend_cast_to_unsigned(
streams,
result.as_mut(),
@@ -1053,16 +1040,9 @@ impl CudaServerKey {
target_num_blocks as u32,
&d_multibit_bsk.d_vec,
&computing_ks_key.d_vec,
d_multibit_bsk.glwe_dimension,
d_multibit_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_multibit_bsk.decomp_level_count,
d_multibit_bsk.decomp_base_log,
PBSType::MultiBit,
d_multibit_bsk.grouping_factor,
None,
);
}
@@ -1129,6 +1109,11 @@ impl CudaServerKey {
unsafe {
match &self.bootstrapping_key {
CudaBootstrappingKey::Classic(d_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
cuda_backend_cast_to_signed(
streams,
output_ct.as_mut(),
@@ -1138,19 +1123,18 @@ impl CudaServerKey {
&computing_ks_key.d_vec,
self.message_modulus,
self.carry_modulus,
d_bsk.glwe_dimension,
d_bsk.polynomial_size,
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count,
d_bsk.decomp_base_log,
PBSType::Classical,
LweBskGroupingFactor(0),
d_bsk.ms_noise_reduction_configuration.as_ref(),
);
}
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
cuda_backend_cast_to_signed(
streams,
output_ct.as_mut(),
@@ -1160,15 +1144,9 @@ impl CudaServerKey {
&computing_ks_key.d_vec,
self.message_modulus,
self.carry_modulus,
d_multibit_bsk.glwe_dimension,
d_multibit_bsk.polynomial_size,
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_multibit_bsk.decomp_level_count,
d_multibit_bsk.decomp_base_log,
PBSType::MultiBit,
d_multibit_bsk.grouping_factor,
None,
);
}
@@ -1217,6 +1195,11 @@ impl CudaServerKey {
unsafe {
match &d_bootstrapping_key {
CudaBootstrappingKey::Classic(bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
cuda_backend_noise_squashing(
streams,
&mut output_slice,
@@ -1225,25 +1208,24 @@ impl CudaServerKey {
&input_slice,
&bsk.d_vec,
&computing_ks_key.d_vec,
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
bsk.glwe_dimension,
bsk.polynomial_size,
bsk,
input_glwe_dimension,
input_polynomial_size,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
bsk.decomp_level_count,
bsk.decomp_base_log,
num_output_blocks as u32,
input.d_blocks.lwe_ciphertext_count().0 as u32,
self.message_modulus,
self.carry_modulus,
PBSType::Classical,
LweBskGroupingFactor(0),
bsk.ms_noise_reduction_configuration.as_ref(),
);
}
CudaBootstrappingKey::MultiBit(mb_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
mb_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
cuda_backend_noise_squashing(
streams,
&mut output_slice,
@@ -1252,21 +1234,15 @@ impl CudaServerKey {
&input_slice,
&mb_bsk.d_vec,
&computing_ks_key.d_vec,
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
mb_bsk.glwe_dimension,
mb_bsk.polynomial_size,
mb_bsk,
input_glwe_dimension,
input_polynomial_size,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
mb_bsk.decomp_level_count,
mb_bsk.decomp_base_log,
num_output_blocks as u32,
input.d_blocks.lwe_ciphertext_count().0 as u32,
self.message_modulus,
self.carry_modulus,
PBSType::MultiBit,
mb_bsk.grouping_factor,
None,
);
}

View File

@@ -1,12 +1,11 @@
use crate::core_crypto::gpu::CudaStreams;
use crate::core_crypto::prelude::LweBskGroupingFactor;
use crate::integer::gpu::ciphertext::CudaIntegerRadixCiphertext;
use crate::integer::gpu::server_key::{
CudaBootstrappingKey, CudaDynamicKeyswitchingKey, CudaServerKey,
};
use crate::integer::gpu::{
cuda_backend_get_full_propagate_assign_size_on_gpu, cuda_backend_get_mul_size_on_gpu,
cuda_backend_unchecked_mul_assign, PBSType,
cuda_backend_unchecked_mul_assign,
};
impl CudaServerKey {
@@ -93,16 +92,10 @@ impl CudaServerKey {
&computing_ks_key.d_vec,
self.message_modulus,
self.carry_modulus,
d_bsk.glwe_dimension(),
d_bsk.input_lwe_dimension(),
d_bsk.polynomial_size(),
d_bsk.decomp_base_log(),
d_bsk.decomp_level_count(),
d_bsk,
computing_ks_key.decomposition_base_log(),
computing_ks_key.decomposition_level_count(),
num_blocks,
PBSType::Classical,
LweBskGroupingFactor(0),
d_bsk.ms_noise_reduction_configuration.as_ref(),
);
}
@@ -117,16 +110,10 @@ impl CudaServerKey {
&computing_ks_key.d_vec,
self.message_modulus,
self.carry_modulus,
d_multibit_bsk.glwe_dimension(),
d_multibit_bsk.input_lwe_dimension(),
d_multibit_bsk.polynomial_size(),
d_multibit_bsk.decomp_base_log(),
d_multibit_bsk.decomp_level_count(),
d_multibit_bsk,
computing_ks_key.decomposition_base_log(),
computing_ks_key.decomposition_level_count(),
num_blocks,
PBSType::MultiBit,
d_multibit_bsk.grouping_factor,
None,
);
}
@@ -247,34 +234,22 @@ impl CudaServerKey {
CudaBootstrappingKey::Classic(d_bsk) => {
cuda_backend_get_full_propagate_assign_size_on_gpu(
streams,
d_bsk.input_lwe_dimension(),
d_bsk.glwe_dimension(),
d_bsk.polynomial_size(),
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count(),
d_bsk.decomp_base_log(),
self.message_modulus,
self.carry_modulus,
PBSType::Classical,
LweBskGroupingFactor(0),
d_bsk.ms_noise_reduction_configuration.as_ref(),
)
}
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
cuda_backend_get_full_propagate_assign_size_on_gpu(
streams,
d_multibit_bsk.input_lwe_dimension(),
d_multibit_bsk.glwe_dimension(),
d_multibit_bsk.polynomial_size(),
d_multibit_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_multibit_bsk.decomp_level_count(),
d_multibit_bsk.decomp_base_log(),
self.message_modulus,
self.carry_modulus,
PBSType::MultiBit,
d_multibit_bsk.grouping_factor,
None,
)
}
@@ -294,42 +269,44 @@ impl CudaServerKey {
let is_boolean_right = ct_right.holds_boolean_value();
let mul_mem = match &self.bootstrapping_key {
CudaBootstrappingKey::Classic(d_bsk) => cuda_backend_get_mul_size_on_gpu(
streams,
is_boolean_left,
is_boolean_right,
self.message_modulus,
self.carry_modulus,
d_bsk.glwe_dimension,
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk.polynomial_size,
d_bsk.decomp_base_log,
d_bsk.decomp_level_count,
computing_ks_key.decomposition_base_log(),
computing_ks_key.decomposition_level_count(),
lwe_ciphertext_count.0 as u32,
PBSType::Classical,
LweBskGroupingFactor(0),
d_bsk.ms_noise_reduction_configuration.as_ref(),
),
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => cuda_backend_get_mul_size_on_gpu(
streams,
is_boolean_left,
is_boolean_right,
self.message_modulus,
self.carry_modulus,
d_multibit_bsk.glwe_dimension,
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk.polynomial_size,
d_multibit_bsk.decomp_base_log,
d_multibit_bsk.decomp_level_count,
computing_ks_key.decomposition_base_log(),
computing_ks_key.decomposition_level_count(),
lwe_ciphertext_count.0 as u32,
PBSType::MultiBit,
d_multibit_bsk.grouping_factor,
None,
),
CudaBootstrappingKey::Classic(d_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
cuda_backend_get_mul_size_on_gpu(
streams,
is_boolean_left,
is_boolean_right,
self.message_modulus,
self.carry_modulus,
d_bsk,
computing_ks_key.decomposition_base_log(),
computing_ks_key.decomposition_level_count(),
lwe_ciphertext_count.0 as u32,
d_bsk.ms_noise_reduction_configuration.as_ref(),
)
}
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
cuda_backend_get_mul_size_on_gpu(
streams,
is_boolean_left,
is_boolean_right,
self.message_modulus,
self.carry_modulus,
d_multibit_bsk,
computing_ks_key.decomposition_base_log(),
computing_ks_key.decomposition_level_count(),
lwe_ciphertext_count.0 as u32,
None,
)
}
};
actual_full_prop_mem.max(mul_mem)
}

View File

@@ -10,8 +10,6 @@ use crate::integer::gpu::server_key::{
};
use itertools::Itertools;
use crate::core_crypto::prelude::LweBskGroupingFactor;
use crate::shortint::oprf::{create_random_from_seed_modulus_switched, raw_seeded_msed_to_lwe};
use crate::shortint::OprfSeed;
@@ -19,7 +17,7 @@ use crate::core_crypto::gpu::vec::CudaVec;
use crate::integer::block_decomposition::BlockDecomposer;
use crate::integer::gpu::{
cuda_backend_get_grouped_oprf_size_on_gpu, cuda_backend_grouped_oprf,
cuda_backend_grouped_oprf_custom_range, PBSType,
cuda_backend_grouped_oprf_custom_range,
};
pub struct GenericCudaOprfServerKey<K> {
@@ -474,17 +472,11 @@ where
&d_seeded_lwe_input,
num_active_blocks as u32,
&d_bsk.d_vec,
d_bsk.input_lwe_dimension,
d_bsk.glwe_dimension,
d_bsk.polynomial_size,
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count,
d_bsk.decomp_base_log,
LweBskGroupingFactor(0),
target_sks.message_modulus,
target_sks.carry_modulus,
PBSType::Classical,
message_bits_count,
total_random_bits as u32,
d_bsk.ms_noise_reduction_configuration.as_ref(),
@@ -497,17 +489,11 @@ where
&d_seeded_lwe_input,
num_active_blocks as u32,
&d_bsk.d_vec,
d_bsk.input_lwe_dimension,
d_bsk.glwe_dimension,
d_bsk.polynomial_size,
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count,
d_bsk.decomp_base_log,
d_bsk.grouping_factor,
target_sks.message_modulus,
target_sks.carry_modulus,
PBSType::MultiBit,
message_bits_count,
total_random_bits as u32,
None,
@@ -612,17 +598,11 @@ where
&d_bsk.d_vec,
&compute_d_bsk.d_vec,
&computing_ks_key.d_vec,
d_bsk.input_lwe_dimension,
d_bsk.glwe_dimension,
d_bsk.polynomial_size,
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count,
d_bsk.decomp_base_log,
LweBskGroupingFactor(0),
target_sks.message_modulus,
target_sks.carry_modulus,
PBSType::Classical,
message_bits_count as u32,
post_mul_num_bits as u32,
d_bsk.ms_noise_reduction_configuration.as_ref(),
@@ -643,17 +623,11 @@ where
&d_bsk.d_vec,
&compute_d_bsk.d_vec,
&computing_ks_key.d_vec,
d_bsk.input_lwe_dimension,
d_bsk.glwe_dimension,
d_bsk.polynomial_size,
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count,
d_bsk.decomp_base_log,
d_bsk.grouping_factor,
target_sks.message_modulus,
target_sks.carry_modulus,
PBSType::MultiBit,
message_bits_count as u32,
post_mul_num_bits as u32,
None,
@@ -685,17 +659,11 @@ where
CudaBootstrappingKey::Classic(d_bsk) => cuda_backend_get_grouped_oprf_size_on_gpu(
streams,
1,
d_bsk.input_lwe_dimension,
d_bsk.glwe_dimension,
d_bsk.polynomial_size,
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count,
d_bsk.decomp_base_log,
LweBskGroupingFactor(0),
target_sks.message_modulus,
target_sks.carry_modulus,
PBSType::Classical,
message_bits,
message_bits,
d_bsk.ms_noise_reduction_configuration.as_ref(),
@@ -703,17 +671,11 @@ where
CudaBootstrappingKey::MultiBit(d_bsk) => cuda_backend_get_grouped_oprf_size_on_gpu(
streams,
1,
d_bsk.input_lwe_dimension,
d_bsk.glwe_dimension,
d_bsk.polynomial_size,
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count,
d_bsk.decomp_base_log,
d_bsk.grouping_factor,
target_sks.message_modulus,
target_sks.carry_modulus,
PBSType::MultiBit,
message_bits,
message_bits,
None,

View File

@@ -1,11 +1,10 @@
use crate::core_crypto::gpu::CudaStreams;
use crate::core_crypto::prelude::LweBskGroupingFactor;
use crate::integer::gpu::ciphertext::{CudaIntegerRadixCiphertext, CudaUnsignedRadixCiphertext};
use crate::integer::gpu::server_key::{CudaBootstrappingKey, CudaDynamicKeyswitchingKey};
use crate::integer::gpu::{
cuda_backend_get_full_propagate_assign_size_on_gpu, cuda_backend_get_rotate_left_size_on_gpu,
cuda_backend_get_rotate_right_size_on_gpu, cuda_backend_unchecked_rotate_left_assign,
cuda_backend_unchecked_rotate_right_assign, CudaServerKey, PBSType,
cuda_backend_unchecked_rotate_right_assign, CudaServerKey,
};
impl CudaServerKey {
@@ -26,6 +25,18 @@ impl CudaServerKey {
unsafe {
match &self.bootstrapping_key {
CudaBootstrappingKey::Classic(d_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
cuda_backend_unchecked_rotate_right_assign(
streams,
ct.as_mut(),
@@ -34,22 +45,27 @@ impl CudaServerKey {
&computing_ks_key.d_vec,
self.message_modulus,
self.carry_modulus,
d_bsk.glwe_dimension,
d_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count,
d_bsk.decomp_base_log,
lwe_ciphertext_count.0 as u32,
is_signed,
PBSType::Classical,
LweBskGroupingFactor(0),
d_bsk.ms_noise_reduction_configuration.as_ref(),
);
}
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_multibit_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
cuda_backend_unchecked_rotate_right_assign(
streams,
ct.as_mut(),
@@ -58,18 +74,11 @@ impl CudaServerKey {
&computing_ks_key.d_vec,
self.message_modulus,
self.carry_modulus,
d_multibit_bsk.glwe_dimension,
d_multibit_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_multibit_bsk.decomp_level_count,
d_multibit_bsk.decomp_base_log,
lwe_ciphertext_count.0 as u32,
is_signed,
PBSType::MultiBit,
d_multibit_bsk.grouping_factor,
None,
);
}
@@ -108,6 +117,18 @@ impl CudaServerKey {
unsafe {
match &self.bootstrapping_key {
CudaBootstrappingKey::Classic(d_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
cuda_backend_unchecked_rotate_left_assign(
streams,
ct.as_mut(),
@@ -116,22 +137,27 @@ impl CudaServerKey {
&computing_ks_key.d_vec,
self.message_modulus,
self.carry_modulus,
d_bsk.glwe_dimension,
d_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count,
d_bsk.decomp_base_log,
lwe_ciphertext_count.0 as u32,
is_signed,
PBSType::Classical,
LweBskGroupingFactor(0),
d_bsk.ms_noise_reduction_configuration.as_ref(),
);
}
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_multibit_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
cuda_backend_unchecked_rotate_left_assign(
streams,
ct.as_mut(),
@@ -140,18 +166,11 @@ impl CudaServerKey {
&computing_ks_key.d_vec,
self.message_modulus,
self.carry_modulus,
d_multibit_bsk.glwe_dimension,
d_multibit_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_multibit_bsk.decomp_level_count,
d_multibit_bsk.decomp_base_log,
lwe_ciphertext_count.0 as u32,
is_signed,
PBSType::MultiBit,
d_multibit_bsk.grouping_factor,
None,
);
}
@@ -435,34 +454,22 @@ impl CudaServerKey {
CudaBootstrappingKey::Classic(d_bsk) => {
cuda_backend_get_full_propagate_assign_size_on_gpu(
streams,
d_bsk.input_lwe_dimension(),
d_bsk.glwe_dimension(),
d_bsk.polynomial_size(),
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count(),
d_bsk.decomp_base_log(),
self.message_modulus,
self.carry_modulus,
PBSType::Classical,
LweBskGroupingFactor(0),
d_bsk.ms_noise_reduction_configuration.as_ref(),
)
}
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
cuda_backend_get_full_propagate_assign_size_on_gpu(
streams,
d_multibit_bsk.input_lwe_dimension(),
d_multibit_bsk.glwe_dimension(),
d_multibit_bsk.polynomial_size(),
d_multibit_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_multibit_bsk.decomp_level_count(),
d_multibit_bsk.decomp_base_log(),
self.message_modulus,
self.carry_modulus,
PBSType::MultiBit,
d_multibit_bsk.grouping_factor,
None,
)
}
@@ -480,41 +487,53 @@ impl CudaServerKey {
let lwe_ciphertext_count = ct_left.as_ref().d_blocks.lwe_ciphertext_count();
let rotate_mem = match &self.bootstrapping_key {
CudaBootstrappingKey::Classic(d_bsk) => cuda_backend_get_rotate_left_size_on_gpu(
streams,
self.message_modulus,
self.carry_modulus,
d_bsk.glwe_dimension,
d_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count,
d_bsk.decomp_base_log,
lwe_ciphertext_count.0 as u32,
T::IS_SIGNED,
PBSType::Classical,
LweBskGroupingFactor(0),
d_bsk.ms_noise_reduction_configuration.as_ref(),
),
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
CudaBootstrappingKey::Classic(d_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
cuda_backend_get_rotate_left_size_on_gpu(
streams,
self.message_modulus,
self.carry_modulus,
d_multibit_bsk.glwe_dimension,
d_multibit_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
lwe_ciphertext_count.0 as u32,
T::IS_SIGNED,
d_bsk.ms_noise_reduction_configuration.as_ref(),
)
}
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_multibit_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
cuda_backend_get_rotate_left_size_on_gpu(
streams,
self.message_modulus,
self.carry_modulus,
d_multibit_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_multibit_bsk.decomp_level_count,
d_multibit_bsk.decomp_base_log,
lwe_ciphertext_count.0 as u32,
T::IS_SIGNED,
PBSType::MultiBit,
d_multibit_bsk.grouping_factor,
None,
)
}
@@ -544,34 +563,22 @@ impl CudaServerKey {
CudaBootstrappingKey::Classic(d_bsk) => {
cuda_backend_get_full_propagate_assign_size_on_gpu(
streams,
d_bsk.input_lwe_dimension(),
d_bsk.glwe_dimension(),
d_bsk.polynomial_size(),
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count(),
d_bsk.decomp_base_log(),
self.message_modulus,
self.carry_modulus,
PBSType::Classical,
LweBskGroupingFactor(0),
d_bsk.ms_noise_reduction_configuration.as_ref(),
)
}
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
cuda_backend_get_full_propagate_assign_size_on_gpu(
streams,
d_multibit_bsk.input_lwe_dimension(),
d_multibit_bsk.glwe_dimension(),
d_multibit_bsk.polynomial_size(),
d_multibit_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_multibit_bsk.decomp_level_count(),
d_multibit_bsk.decomp_base_log(),
self.message_modulus,
self.carry_modulus,
PBSType::MultiBit,
d_multibit_bsk.grouping_factor,
None,
)
}
@@ -589,41 +596,53 @@ impl CudaServerKey {
let lwe_ciphertext_count = ct_left.as_ref().d_blocks.lwe_ciphertext_count();
let rotate_mem = match &self.bootstrapping_key {
CudaBootstrappingKey::Classic(d_bsk) => cuda_backend_get_rotate_right_size_on_gpu(
streams,
self.message_modulus,
self.carry_modulus,
d_bsk.glwe_dimension,
d_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count,
d_bsk.decomp_base_log,
lwe_ciphertext_count.0 as u32,
T::IS_SIGNED,
PBSType::Classical,
LweBskGroupingFactor(0),
d_bsk.ms_noise_reduction_configuration.as_ref(),
),
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
CudaBootstrappingKey::Classic(d_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
cuda_backend_get_rotate_right_size_on_gpu(
streams,
self.message_modulus,
self.carry_modulus,
d_multibit_bsk.glwe_dimension,
d_multibit_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
lwe_ciphertext_count.0 as u32,
T::IS_SIGNED,
d_bsk.ms_noise_reduction_configuration.as_ref(),
)
}
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_multibit_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
cuda_backend_get_rotate_right_size_on_gpu(
streams,
self.message_modulus,
self.carry_modulus,
d_multibit_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_multibit_bsk.decomp_level_count,
d_multibit_bsk.decomp_base_log,
lwe_ciphertext_count.0 as u32,
T::IS_SIGNED,
PBSType::MultiBit,
d_multibit_bsk.grouping_factor,
None,
)
}

View File

@@ -1,6 +1,6 @@
use crate::core_crypto::gpu::vec::CudaVec;
use crate::core_crypto::gpu::CudaStreams;
use crate::core_crypto::prelude::{LweBskGroupingFactor, SignedNumeric};
use crate::core_crypto::prelude::SignedNumeric;
use crate::integer::block_decomposition::{BlockDecomposer, DecomposableInto};
use crate::integer::gpu::ciphertext::boolean_value::CudaBooleanBlock;
use crate::integer::gpu::ciphertext::{
@@ -12,7 +12,7 @@ use crate::integer::gpu::server_key::{
use crate::integer::gpu::{
cuda_backend_get_full_propagate_assign_size_on_gpu,
cuda_backend_get_propagate_single_carry_assign_size_on_gpu,
cuda_backend_scalar_addition_assign, PBSType,
cuda_backend_scalar_addition_assign,
};
use crate::integer::server_key::radix_parallel::OutputFlag;
use crate::prelude::CastInto;
@@ -192,34 +192,22 @@ impl CudaServerKey {
CudaBootstrappingKey::Classic(d_bsk) => {
cuda_backend_get_full_propagate_assign_size_on_gpu(
streams,
d_bsk.input_lwe_dimension(),
d_bsk.glwe_dimension(),
d_bsk.polynomial_size(),
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count(),
d_bsk.decomp_base_log(),
self.message_modulus,
self.carry_modulus,
PBSType::Classical,
LweBskGroupingFactor(0),
d_bsk.ms_noise_reduction_configuration.as_ref(),
)
}
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
cuda_backend_get_full_propagate_assign_size_on_gpu(
streams,
d_multibit_bsk.input_lwe_dimension(),
d_multibit_bsk.glwe_dimension(),
d_multibit_bsk.polynomial_size(),
d_multibit_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_multibit_bsk.decomp_level_count(),
d_multibit_bsk.decomp_base_log(),
self.message_modulus,
self.carry_modulus,
PBSType::MultiBit,
d_multibit_bsk.grouping_factor,
None,
)
}
@@ -231,18 +219,12 @@ impl CudaServerKey {
CudaBootstrappingKey::Classic(d_bsk) => {
cuda_backend_get_propagate_single_carry_assign_size_on_gpu(
streams,
d_bsk.input_lwe_dimension(),
d_bsk.glwe_dimension(),
d_bsk.polynomial_size(),
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count(),
d_bsk.decomp_base_log(),
num_blocks,
self.message_modulus,
self.carry_modulus,
PBSType::Classical,
LweBskGroupingFactor(0),
OutputFlag::None,
d_bsk.ms_noise_reduction_configuration.as_ref(),
)
@@ -250,18 +232,12 @@ impl CudaServerKey {
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
cuda_backend_get_propagate_single_carry_assign_size_on_gpu(
streams,
d_multibit_bsk.input_lwe_dimension(),
d_multibit_bsk.glwe_dimension(),
d_multibit_bsk.polynomial_size(),
d_multibit_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_multibit_bsk.decomp_level_count(),
d_multibit_bsk.decomp_base_log(),
num_blocks,
self.message_modulus,
self.carry_modulus,
PBSType::MultiBit,
d_multibit_bsk.grouping_factor,
OutputFlag::None,
None,
)

View File

@@ -1,12 +1,11 @@
use crate::core_crypto::gpu::vec::CudaVec;
use crate::core_crypto::gpu::CudaStreams;
use crate::core_crypto::prelude::LweBskGroupingFactor;
use crate::integer::block_decomposition::{BlockDecomposer, DecomposableInto};
use crate::integer::gpu::ciphertext::CudaIntegerRadixCiphertext;
use crate::integer::gpu::server_key::{CudaBootstrappingKey, CudaDynamicKeyswitchingKey};
use crate::integer::gpu::{
cuda_backend_get_full_propagate_assign_size_on_gpu, cuda_backend_get_scalar_bitop_size_on_gpu,
cuda_backend_unchecked_scalar_bitop_assign, BitOpType, CudaServerKey, PBSType,
cuda_backend_unchecked_scalar_bitop_assign, BitOpType, CudaServerKey,
};
impl CudaServerKey {
@@ -36,6 +35,18 @@ impl CudaServerKey {
unsafe {
match &self.bootstrapping_key {
CudaBootstrappingKey::Classic(d_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
cuda_backend_unchecked_scalar_bitop_assign(
streams,
ct.as_mut(),
@@ -45,22 +56,27 @@ impl CudaServerKey {
&computing_ks_key.d_vec,
self.message_modulus,
self.carry_modulus,
d_bsk.glwe_dimension,
d_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count,
d_bsk.decomp_base_log,
op,
lwe_ciphertext_count.0 as u32,
PBSType::Classical,
LweBskGroupingFactor(0),
d_bsk.ms_noise_reduction_configuration.as_ref(),
);
}
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_multibit_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
cuda_backend_unchecked_scalar_bitop_assign(
streams,
ct.as_mut(),
@@ -70,18 +86,11 @@ impl CudaServerKey {
&computing_ks_key.d_vec,
self.message_modulus,
self.carry_modulus,
d_multibit_bsk.glwe_dimension,
d_multibit_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_multibit_bsk.decomp_level_count,
d_multibit_bsk.decomp_base_log,
op,
lwe_ciphertext_count.0 as u32,
PBSType::MultiBit,
d_multibit_bsk.grouping_factor,
None,
);
}
@@ -248,34 +257,22 @@ impl CudaServerKey {
CudaBootstrappingKey::Classic(d_bsk) => {
cuda_backend_get_full_propagate_assign_size_on_gpu(
streams,
d_bsk.input_lwe_dimension(),
d_bsk.glwe_dimension(),
d_bsk.polynomial_size(),
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count(),
d_bsk.decomp_base_log(),
self.message_modulus,
self.carry_modulus,
PBSType::Classical,
LweBskGroupingFactor(0),
d_bsk.ms_noise_reduction_configuration.as_ref(),
)
}
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
cuda_backend_get_full_propagate_assign_size_on_gpu(
streams,
d_multibit_bsk.input_lwe_dimension(),
d_multibit_bsk.glwe_dimension(),
d_multibit_bsk.polynomial_size(),
d_multibit_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_multibit_bsk.decomp_level_count(),
d_multibit_bsk.decomp_base_log(),
self.message_modulus,
self.carry_modulus,
PBSType::MultiBit,
d_multibit_bsk.grouping_factor,
None,
)
}
@@ -284,41 +281,53 @@ impl CudaServerKey {
let clear_blocks_mem = (lwe_ciphertext_count.0 * size_of::<u64>()) as u64;
let scalar_bitop_mem = match &self.bootstrapping_key {
CudaBootstrappingKey::Classic(d_bsk) => cuda_backend_get_scalar_bitop_size_on_gpu(
streams,
self.message_modulus,
self.carry_modulus,
d_bsk.glwe_dimension,
d_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count,
d_bsk.decomp_base_log,
op,
lwe_ciphertext_count.0 as u32,
PBSType::Classical,
LweBskGroupingFactor(0),
d_bsk.ms_noise_reduction_configuration.as_ref(),
),
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
CudaBootstrappingKey::Classic(d_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
cuda_backend_get_scalar_bitop_size_on_gpu(
streams,
self.message_modulus,
self.carry_modulus,
d_multibit_bsk.glwe_dimension,
d_multibit_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
op,
lwe_ciphertext_count.0 as u32,
d_bsk.ms_noise_reduction_configuration.as_ref(),
)
}
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_multibit_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
cuda_backend_get_scalar_bitop_size_on_gpu(
streams,
self.message_modulus,
self.carry_modulus,
d_multibit_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_multibit_bsk.decomp_level_count,
d_multibit_bsk.decomp_base_log,
op,
lwe_ciphertext_count.0 as u32,
PBSType::MultiBit,
d_multibit_bsk.grouping_factor,
None,
)
}

View File

@@ -1,7 +1,7 @@
use crate::core_crypto::gpu::lwe_ciphertext_list::CudaLweCiphertextList;
use crate::core_crypto::gpu::vec::CudaVec;
use crate::core_crypto::gpu::CudaStreams;
use crate::core_crypto::prelude::{LweBskGroupingFactor, LweCiphertextCount};
use crate::core_crypto::prelude::LweCiphertextCount;
use crate::integer::block_decomposition::{BlockDecomposer, DecomposableInto};
use crate::integer::gpu::ciphertext::boolean_value::CudaBooleanBlock;
use crate::integer::gpu::ciphertext::info::CudaRadixCiphertextInfo;
@@ -12,7 +12,7 @@ use crate::integer::gpu::server_key::{
use crate::integer::gpu::{
cuda_backend_unchecked_are_all_comparisons_block_true,
cuda_backend_unchecked_is_at_least_one_comparisons_block_true,
cuda_backend_unchecked_scalar_comparison, ComparisonType, PBSType,
cuda_backend_unchecked_scalar_comparison, ComparisonType,
};
use crate::shortint::ciphertext::Degree;
@@ -176,6 +176,18 @@ impl CudaServerKey {
unsafe {
match &self.bootstrapping_key {
CudaBootstrappingKey::Classic(d_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
cuda_backend_unchecked_scalar_comparison(
streams,
result.as_mut().as_mut(),
@@ -186,23 +198,28 @@ impl CudaServerKey {
&computing_ks_key.d_vec,
self.message_modulus,
self.carry_modulus,
d_bsk.glwe_dimension,
d_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count,
d_bsk.decomp_base_log,
scalar_blocks.len() as u32,
op,
signed_with_positive_scalar,
PBSType::Classical,
LweBskGroupingFactor(0),
d_bsk.ms_noise_reduction_configuration.as_ref(),
);
}
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_multibit_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
cuda_backend_unchecked_scalar_comparison(
streams,
result.as_mut().as_mut(),
@@ -213,19 +230,12 @@ impl CudaServerKey {
&computing_ks_key.d_vec,
self.message_modulus,
self.carry_modulus,
d_multibit_bsk.glwe_dimension,
d_multibit_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_multibit_bsk.decomp_level_count,
d_multibit_bsk.decomp_base_log,
scalar_blocks.len() as u32,
op,
signed_with_positive_scalar,
PBSType::MultiBit,
d_multibit_bsk.grouping_factor,
None,
);
}
@@ -324,6 +334,18 @@ impl CudaServerKey {
unsafe {
match &self.bootstrapping_key {
CudaBootstrappingKey::Classic(d_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
cuda_backend_unchecked_scalar_comparison(
streams,
result.as_mut(),
@@ -334,23 +356,28 @@ impl CudaServerKey {
&computing_ks_key.d_vec,
self.message_modulus,
self.carry_modulus,
d_bsk.glwe_dimension,
d_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count,
d_bsk.decomp_base_log,
scalar_blocks.len() as u32,
op,
T::IS_SIGNED,
PBSType::Classical,
LweBskGroupingFactor(0),
d_bsk.ms_noise_reduction_configuration.as_ref(),
);
}
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_multibit_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
cuda_backend_unchecked_scalar_comparison(
streams,
result.as_mut(),
@@ -361,19 +388,12 @@ impl CudaServerKey {
&computing_ks_key.d_vec,
self.message_modulus,
self.carry_modulus,
d_multibit_bsk.glwe_dimension,
d_multibit_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_multibit_bsk.decomp_level_count,
d_multibit_bsk.decomp_base_log,
scalar_blocks.len() as u32,
op,
T::IS_SIGNED,
PBSType::MultiBit,
d_multibit_bsk.grouping_factor,
None,
);
}
@@ -399,6 +419,18 @@ impl CudaServerKey {
unsafe {
match &self.bootstrapping_key {
CudaBootstrappingKey::Classic(d_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
cuda_backend_unchecked_are_all_comparisons_block_true(
streams,
boolean_res.as_mut().as_mut(),
@@ -407,20 +439,25 @@ impl CudaServerKey {
&computing_ks_key.d_vec,
self.message_modulus,
self.carry_modulus,
d_bsk.glwe_dimension,
d_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count,
d_bsk.decomp_base_log,
PBSType::Classical,
LweBskGroupingFactor(0),
d_bsk.ms_noise_reduction_configuration.as_ref(),
);
}
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_multibit_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
cuda_backend_unchecked_are_all_comparisons_block_true(
streams,
boolean_res.as_mut().as_mut(),
@@ -429,16 +466,9 @@ impl CudaServerKey {
&computing_ks_key.d_vec,
self.message_modulus,
self.carry_modulus,
d_multibit_bsk.glwe_dimension,
d_multibit_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_multibit_bsk.decomp_level_count,
d_multibit_bsk.decomp_base_log,
PBSType::MultiBit,
d_multibit_bsk.grouping_factor,
None,
);
}
@@ -464,6 +494,18 @@ impl CudaServerKey {
unsafe {
match &self.bootstrapping_key {
CudaBootstrappingKey::Classic(d_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
cuda_backend_unchecked_is_at_least_one_comparisons_block_true(
streams,
boolean_res.as_mut().as_mut(),
@@ -472,20 +514,25 @@ impl CudaServerKey {
&computing_ks_key.d_vec,
self.message_modulus,
self.carry_modulus,
d_bsk.glwe_dimension,
d_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count,
d_bsk.decomp_base_log,
PBSType::Classical,
LweBskGroupingFactor(0),
d_bsk.ms_noise_reduction_configuration.as_ref(),
);
}
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_multibit_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
cuda_backend_unchecked_is_at_least_one_comparisons_block_true(
streams,
boolean_res.as_mut().as_mut(),
@@ -494,16 +541,9 @@ impl CudaServerKey {
&computing_ks_key.d_vec,
self.message_modulus,
self.carry_modulus,
d_multibit_bsk.glwe_dimension,
d_multibit_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_multibit_bsk.decomp_level_count,
d_multibit_bsk.decomp_base_log,
PBSType::MultiBit,
d_multibit_bsk.grouping_factor,
None,
);
}

View File

@@ -1,5 +1,4 @@
use crate::core_crypto::gpu::CudaStreams;
use crate::core_crypto::prelude::LweBskGroupingFactor;
use crate::integer::block_decomposition::DecomposableInto;
use crate::integer::gpu::ciphertext::{
CudaIntegerRadixCiphertext, CudaSignedRadixCiphertext, CudaUnsignedRadixCiphertext,
@@ -13,7 +12,7 @@ use crate::integer::gpu::{
cuda_backend_unchecked_signed_scalar_div_assign,
cuda_backend_unchecked_signed_scalar_div_rem_assign,
cuda_backend_unchecked_unsigned_scalar_div_assign,
cuda_backend_unchecked_unsigned_scalar_div_rem, CudaServerKey, PBSType,
cuda_backend_unchecked_unsigned_scalar_div_rem, CudaServerKey,
};
use crate::integer::server_key::radix_parallel::scalar_div_mod::SignedReciprocable;
use crate::integer::server_key::radix_parallel::OutputFlag;
@@ -100,15 +99,9 @@ impl CudaServerKey {
&d_bsk.d_vec,
self.message_modulus,
self.carry_modulus,
d_bsk.glwe_dimension,
d_bsk.polynomial_size,
d_bsk.input_lwe_dimension,
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count,
d_bsk.decomp_base_log,
LweBskGroupingFactor(0),
PBSType::Classical,
d_bsk.ms_noise_reduction_configuration.as_ref(),
);
}
@@ -121,15 +114,9 @@ impl CudaServerKey {
&d_multibit_bsk.d_vec,
self.message_modulus,
self.carry_modulus,
d_multibit_bsk.glwe_dimension,
d_multibit_bsk.polynomial_size,
d_multibit_bsk.input_lwe_dimension,
d_multibit_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_multibit_bsk.decomp_level_count,
d_multibit_bsk.decomp_base_log,
d_multibit_bsk.grouping_factor,
PBSType::MultiBit,
None,
);
}
@@ -239,15 +226,9 @@ impl CudaServerKey {
&d_bsk.d_vec,
self.message_modulus,
self.carry_modulus,
d_bsk.glwe_dimension,
d_bsk.polynomial_size,
d_bsk.input_lwe_dimension,
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count,
d_bsk.decomp_base_log,
LweBskGroupingFactor(0),
PBSType::Classical,
d_bsk.ms_noise_reduction_configuration.as_ref(),
);
}
@@ -261,15 +242,9 @@ impl CudaServerKey {
&d_multibit_bsk.d_vec,
self.message_modulus,
self.carry_modulus,
d_multibit_bsk.glwe_dimension,
d_multibit_bsk.polynomial_size,
d_multibit_bsk.input_lwe_dimension,
d_multibit_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_multibit_bsk.decomp_level_count,
d_multibit_bsk.decomp_base_log,
d_multibit_bsk.grouping_factor,
PBSType::MultiBit,
None,
);
}
@@ -441,15 +416,9 @@ impl CudaServerKey {
&d_bsk.d_vec,
self.message_modulus,
self.carry_modulus,
d_bsk.glwe_dimension,
d_bsk.polynomial_size,
d_bsk.input_lwe_dimension,
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count,
d_bsk.decomp_base_log,
LweBskGroupingFactor(0),
PBSType::Classical,
d_bsk.ms_noise_reduction_configuration.as_ref(),
);
}
@@ -462,15 +431,9 @@ impl CudaServerKey {
&d_multibit_bsk.d_vec,
self.message_modulus,
self.carry_modulus,
d_multibit_bsk.glwe_dimension,
d_multibit_bsk.polynomial_size,
d_multibit_bsk.input_lwe_dimension,
d_multibit_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_multibit_bsk.decomp_level_count,
d_multibit_bsk.decomp_base_log,
d_multibit_bsk.grouping_factor,
PBSType::MultiBit,
None,
);
}
@@ -580,15 +543,9 @@ impl CudaServerKey {
&d_bsk.d_vec,
self.message_modulus,
self.carry_modulus,
d_bsk.glwe_dimension,
d_bsk.polynomial_size,
d_bsk.input_lwe_dimension,
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count,
d_bsk.decomp_base_log,
LweBskGroupingFactor(0),
PBSType::Classical,
d_bsk.ms_noise_reduction_configuration.as_ref(),
);
}
@@ -602,15 +559,9 @@ impl CudaServerKey {
&d_multibit_bsk.d_vec,
self.message_modulus,
self.carry_modulus,
d_multibit_bsk.glwe_dimension,
d_multibit_bsk.polynomial_size,
d_multibit_bsk.input_lwe_dimension,
d_multibit_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_multibit_bsk.decomp_level_count,
d_multibit_bsk.decomp_base_log,
d_multibit_bsk.grouping_factor,
PBSType::MultiBit,
None,
);
}
@@ -781,34 +732,22 @@ encrypted bits: {numerator_bits}, scalar bits: {}
CudaBootstrappingKey::Classic(d_bsk) => {
cuda_backend_get_full_propagate_assign_size_on_gpu(
streams,
d_bsk.input_lwe_dimension(),
d_bsk.glwe_dimension(),
d_bsk.polynomial_size(),
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count(),
d_bsk.decomp_base_log(),
self.message_modulus,
self.carry_modulus,
PBSType::Classical,
LweBskGroupingFactor(0),
d_bsk.ms_noise_reduction_configuration.as_ref(),
)
}
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
cuda_backend_get_full_propagate_assign_size_on_gpu(
streams,
d_multibit_bsk.input_lwe_dimension(),
d_multibit_bsk.glwe_dimension(),
d_multibit_bsk.polynomial_size(),
d_multibit_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_multibit_bsk.decomp_level_count(),
d_multibit_bsk.decomp_base_log(),
self.message_modulus,
self.carry_modulus,
PBSType::MultiBit,
d_multibit_bsk.grouping_factor,
None,
)
}
@@ -821,16 +760,10 @@ encrypted bits: {numerator_bits}, scalar bits: {}
divisor,
self.message_modulus,
self.carry_modulus,
d_bsk.glwe_dimension,
d_bsk.polynomial_size,
d_bsk.input_lwe_dimension,
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count,
d_bsk.decomp_base_log,
LweBskGroupingFactor(0),
num_blocks,
PBSType::Classical,
d_bsk.ms_noise_reduction_configuration.as_ref(),
),
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
@@ -839,16 +772,10 @@ encrypted bits: {numerator_bits}, scalar bits: {}
divisor,
self.message_modulus,
self.carry_modulus,
d_multibit_bsk.glwe_dimension,
d_multibit_bsk.polynomial_size,
d_multibit_bsk.input_lwe_dimension,
d_multibit_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_multibit_bsk.decomp_level_count,
d_multibit_bsk.decomp_base_log,
d_multibit_bsk.grouping_factor,
num_blocks,
PBSType::MultiBit,
None,
)
}
@@ -890,16 +817,10 @@ encrypted bits: {numerator_bits}, scalar bits: {}
divisor,
self.message_modulus,
self.carry_modulus,
d_bsk.glwe_dimension,
d_bsk.polynomial_size,
d_bsk.input_lwe_dimension,
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count,
d_bsk.decomp_base_log,
LweBskGroupingFactor(0),
num_blocks,
PBSType::Classical,
d_bsk.ms_noise_reduction_configuration.as_ref(),
),
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
@@ -908,16 +829,10 @@ encrypted bits: {numerator_bits}, scalar bits: {}
divisor,
self.message_modulus,
self.carry_modulus,
d_multibit_bsk.glwe_dimension,
d_multibit_bsk.polynomial_size,
d_multibit_bsk.input_lwe_dimension,
d_multibit_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_multibit_bsk.decomp_level_count,
d_multibit_bsk.decomp_base_log,
d_multibit_bsk.grouping_factor,
num_blocks,
PBSType::MultiBit,
None,
)
}
@@ -970,16 +885,10 @@ encrypted bits: {numerator_bits}, scalar bits: {}
divisor,
self.message_modulus,
self.carry_modulus,
d_bsk.glwe_dimension,
d_bsk.polynomial_size,
d_bsk.input_lwe_dimension,
d_bsk.decomp_base_log,
d_bsk.decomp_level_count,
d_bsk,
computing_ks_key.decomposition_base_log(),
computing_ks_key.decomposition_level_count(),
LweBskGroupingFactor(0),
num_blocks,
PBSType::Classical,
d_bsk.ms_noise_reduction_configuration.as_ref(),
),
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
@@ -988,16 +897,10 @@ encrypted bits: {numerator_bits}, scalar bits: {}
divisor,
self.message_modulus,
self.carry_modulus,
d_multibit_bsk.glwe_dimension,
d_multibit_bsk.polynomial_size,
d_multibit_bsk.input_lwe_dimension,
d_multibit_bsk.decomp_base_log,
d_multibit_bsk.decomp_level_count,
d_multibit_bsk,
computing_ks_key.decomposition_base_log(),
computing_ks_key.decomposition_level_count(),
d_multibit_bsk.grouping_factor,
num_blocks,
PBSType::MultiBit,
None,
)
}
@@ -1035,16 +938,10 @@ encrypted bits: {numerator_bits}, scalar bits: {}
divisor,
self.message_modulus,
self.carry_modulus,
d_bsk.glwe_dimension,
d_bsk.polynomial_size,
d_bsk.input_lwe_dimension,
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count,
d_bsk.decomp_base_log,
LweBskGroupingFactor(0),
num_blocks,
PBSType::Classical,
d_bsk.ms_noise_reduction_configuration.as_ref(),
)
}
@@ -1054,16 +951,10 @@ encrypted bits: {numerator_bits}, scalar bits: {}
divisor,
self.message_modulus,
self.carry_modulus,
d_multibit_bsk.glwe_dimension,
d_multibit_bsk.polynomial_size,
d_multibit_bsk.input_lwe_dimension,
d_multibit_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_multibit_bsk.decomp_level_count,
d_multibit_bsk.decomp_base_log,
d_multibit_bsk.grouping_factor,
num_blocks,
PBSType::MultiBit,
None,
)
}

View File

@@ -1,5 +1,4 @@
use crate::core_crypto::gpu::CudaStreams;
use crate::core_crypto::prelude::LweBskGroupingFactor;
use crate::integer::block_decomposition::{BlockDecomposer, DecomposableInto};
use crate::integer::gpu::ciphertext::CudaIntegerRadixCiphertext;
use crate::integer::gpu::server_key::{
@@ -7,7 +6,7 @@ use crate::integer::gpu::server_key::{
};
use crate::integer::gpu::{
cuda_backend_get_full_propagate_assign_size_on_gpu, cuda_backend_get_scalar_mul_size_on_gpu,
cuda_backend_unchecked_scalar_mul, PBSType,
cuda_backend_unchecked_scalar_mul,
};
use crate::integer::server_key::ScalarMultiplier;
use crate::prelude::CastInto;
@@ -116,6 +115,11 @@ impl CudaServerKey {
unsafe {
match &self.bootstrapping_key {
CudaBootstrappingKey::Classic(d_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
cuda_backend_unchecked_scalar_mul(
streams,
ct.as_mut(),
@@ -125,20 +129,19 @@ impl CudaServerKey {
&computing_ks_key.d_vec,
self.message_modulus,
self.carry_modulus,
d_bsk.glwe_dimension,
d_bsk.polynomial_size,
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk.decomp_base_log,
d_bsk.decomp_level_count,
d_bsk,
computing_ks_key.decomposition_base_log(),
computing_ks_key.decomposition_level_count(),
decomposed_scalar.len() as u32,
PBSType::Classical,
LweBskGroupingFactor(0),
d_bsk.ms_noise_reduction_configuration.as_ref(),
);
}
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
cuda_backend_unchecked_scalar_mul(
streams,
ct.as_mut(),
@@ -148,16 +151,10 @@ impl CudaServerKey {
&computing_ks_key.d_vec,
self.message_modulus,
self.carry_modulus,
d_multibit_bsk.glwe_dimension,
d_multibit_bsk.polynomial_size,
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk.decomp_base_log,
d_multibit_bsk.decomp_level_count,
d_multibit_bsk,
computing_ks_key.decomposition_base_log(),
computing_ks_key.decomposition_level_count(),
decomposed_scalar.len() as u32,
PBSType::MultiBit,
d_multibit_bsk.grouping_factor,
None,
);
}
@@ -258,34 +255,22 @@ impl CudaServerKey {
CudaBootstrappingKey::Classic(d_bsk) => {
cuda_backend_get_full_propagate_assign_size_on_gpu(
streams,
d_bsk.input_lwe_dimension(),
d_bsk.glwe_dimension(),
d_bsk.polynomial_size(),
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count(),
d_bsk.decomp_base_log(),
self.message_modulus,
self.carry_modulus,
PBSType::Classical,
LweBskGroupingFactor(0),
d_bsk.ms_noise_reduction_configuration.as_ref(),
)
}
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
cuda_backend_get_full_propagate_assign_size_on_gpu(
streams,
d_multibit_bsk.input_lwe_dimension(),
d_multibit_bsk.glwe_dimension(),
d_multibit_bsk.polynomial_size(),
d_multibit_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_multibit_bsk.decomp_level_count(),
d_multibit_bsk.decomp_base_log(),
self.message_modulus,
self.carry_modulus,
PBSType::MultiBit,
d_multibit_bsk.grouping_factor,
None,
)
}
@@ -299,39 +284,39 @@ impl CudaServerKey {
return 0;
}
let scalar_mul_mem = match &self.bootstrapping_key {
CudaBootstrappingKey::Classic(d_bsk) => cuda_backend_get_scalar_mul_size_on_gpu(
streams,
decomposed_scalar.as_slice(),
self.message_modulus,
self.carry_modulus,
d_bsk.glwe_dimension,
d_bsk.polynomial_size,
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk.decomp_base_log,
d_bsk.decomp_level_count,
computing_ks_key.decomposition_base_log(),
computing_ks_key.decomposition_level_count(),
lwe_ciphertext_count.0 as u32,
PBSType::Classical,
LweBskGroupingFactor(0),
d_bsk.ms_noise_reduction_configuration.as_ref(),
),
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
CudaBootstrappingKey::Classic(d_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
cuda_backend_get_scalar_mul_size_on_gpu(
streams,
decomposed_scalar.as_slice(),
self.message_modulus,
self.carry_modulus,
d_multibit_bsk.glwe_dimension,
d_multibit_bsk.polynomial_size,
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk.decomp_base_log,
d_multibit_bsk.decomp_level_count,
d_bsk,
computing_ks_key.decomposition_base_log(),
computing_ks_key.decomposition_level_count(),
lwe_ciphertext_count.0 as u32,
d_bsk.ms_noise_reduction_configuration.as_ref(),
)
}
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
cuda_backend_get_scalar_mul_size_on_gpu(
streams,
decomposed_scalar.as_slice(),
self.message_modulus,
self.carry_modulus,
d_multibit_bsk,
computing_ks_key.decomposition_base_log(),
computing_ks_key.decomposition_level_count(),
lwe_ciphertext_count.0 as u32,
PBSType::MultiBit,
d_multibit_bsk.grouping_factor,
None,
)
}

View File

@@ -1,5 +1,5 @@
use crate::core_crypto::gpu::CudaStreams;
use crate::core_crypto::prelude::{CastFrom, LweBskGroupingFactor};
use crate::core_crypto::prelude::CastFrom;
use crate::integer::gpu::ciphertext::CudaIntegerRadixCiphertext;
use crate::integer::gpu::server_key::{CudaBootstrappingKey, CudaDynamicKeyswitchingKey};
use crate::integer::gpu::{
@@ -7,7 +7,7 @@ use crate::integer::gpu::{
cuda_backend_get_scalar_rotate_left_size_on_gpu,
cuda_backend_unchecked_scalar_rotate_left_assign,
cuda_backend_unchecked_scalar_rotate_right_assign, get_scalar_rotate_right_size_on_gpu,
CudaServerKey, PBSType,
CudaServerKey,
};
impl CudaServerKey {
@@ -45,6 +45,18 @@ impl CudaServerKey {
unsafe {
match &self.bootstrapping_key {
CudaBootstrappingKey::Classic(d_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
cuda_backend_unchecked_scalar_rotate_left_assign(
stream,
ct.as_mut(),
@@ -53,21 +65,26 @@ impl CudaServerKey {
&computing_ks_key.d_vec,
self.message_modulus,
self.carry_modulus,
d_bsk.glwe_dimension,
d_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count,
d_bsk.decomp_base_log,
lwe_ciphertext_count.0 as u32,
PBSType::Classical,
LweBskGroupingFactor(0),
d_bsk.ms_noise_reduction_configuration.as_ref(),
);
}
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_multibit_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
cuda_backend_unchecked_scalar_rotate_left_assign(
stream,
ct.as_mut(),
@@ -76,17 +93,10 @@ impl CudaServerKey {
&computing_ks_key.d_vec,
self.message_modulus,
self.carry_modulus,
d_multibit_bsk.glwe_dimension,
d_multibit_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_multibit_bsk.decomp_level_count,
d_multibit_bsk.decomp_base_log,
lwe_ciphertext_count.0 as u32,
PBSType::MultiBit,
d_multibit_bsk.grouping_factor,
None,
);
}
@@ -128,6 +138,18 @@ impl CudaServerKey {
unsafe {
match &self.bootstrapping_key {
CudaBootstrappingKey::Classic(d_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
cuda_backend_unchecked_scalar_rotate_right_assign(
stream,
ct.as_mut(),
@@ -136,21 +158,26 @@ impl CudaServerKey {
&computing_ks_key.d_vec,
self.message_modulus,
self.carry_modulus,
d_bsk.glwe_dimension,
d_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count,
d_bsk.decomp_base_log,
lwe_ciphertext_count.0 as u32,
PBSType::Classical,
LweBskGroupingFactor(0),
d_bsk.ms_noise_reduction_configuration.as_ref(),
);
}
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_multibit_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
cuda_backend_unchecked_scalar_rotate_right_assign(
stream,
ct.as_mut(),
@@ -159,17 +186,10 @@ impl CudaServerKey {
&computing_ks_key.d_vec,
self.message_modulus,
self.carry_modulus,
d_multibit_bsk.glwe_dimension,
d_multibit_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_multibit_bsk.decomp_level_count,
d_multibit_bsk.decomp_base_log,
lwe_ciphertext_count.0 as u32,
PBSType::MultiBit,
d_multibit_bsk.grouping_factor,
None,
);
}
@@ -240,34 +260,22 @@ impl CudaServerKey {
CudaBootstrappingKey::Classic(d_bsk) => {
cuda_backend_get_full_propagate_assign_size_on_gpu(
streams,
d_bsk.input_lwe_dimension(),
d_bsk.glwe_dimension(),
d_bsk.polynomial_size(),
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count(),
d_bsk.decomp_base_log(),
self.message_modulus,
self.carry_modulus,
PBSType::Classical,
LweBskGroupingFactor(0),
d_bsk.ms_noise_reduction_configuration.as_ref(),
)
}
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
cuda_backend_get_full_propagate_assign_size_on_gpu(
streams,
d_multibit_bsk.input_lwe_dimension(),
d_multibit_bsk.glwe_dimension(),
d_multibit_bsk.polynomial_size(),
d_multibit_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_multibit_bsk.decomp_level_count(),
d_multibit_bsk.decomp_base_log(),
self.message_modulus,
self.carry_modulus,
PBSType::MultiBit,
d_multibit_bsk.grouping_factor,
None,
)
}
@@ -275,40 +283,50 @@ impl CudaServerKey {
};
let scalar_shift_mem = match &self.bootstrapping_key {
CudaBootstrappingKey::Classic(d_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
cuda_backend_get_scalar_rotate_left_size_on_gpu(
streams,
self.message_modulus,
self.carry_modulus,
d_bsk.glwe_dimension,
d_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count,
d_bsk.decomp_base_log,
lwe_ciphertext_count.0 as u32,
PBSType::Classical,
LweBskGroupingFactor(0),
d_bsk.ms_noise_reduction_configuration.as_ref(),
)
}
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_multibit_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
cuda_backend_get_scalar_rotate_left_size_on_gpu(
streams,
self.message_modulus,
self.carry_modulus,
d_multibit_bsk.glwe_dimension,
d_multibit_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_multibit_bsk.decomp_level_count,
d_multibit_bsk.decomp_base_log,
lwe_ciphertext_count.0 as u32,
PBSType::MultiBit,
d_multibit_bsk.grouping_factor,
None,
)
}
@@ -332,74 +350,76 @@ impl CudaServerKey {
CudaBootstrappingKey::Classic(d_bsk) => {
cuda_backend_get_full_propagate_assign_size_on_gpu(
streams,
d_bsk.input_lwe_dimension(),
d_bsk.glwe_dimension(),
d_bsk.polynomial_size(),
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count(),
d_bsk.decomp_base_log(),
self.message_modulus,
self.carry_modulus,
PBSType::Classical,
LweBskGroupingFactor(0),
d_bsk.ms_noise_reduction_configuration.as_ref(),
)
}
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
cuda_backend_get_full_propagate_assign_size_on_gpu(
streams,
d_multibit_bsk.input_lwe_dimension(),
d_multibit_bsk.glwe_dimension(),
d_multibit_bsk.polynomial_size(),
d_multibit_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_multibit_bsk.decomp_level_count(),
d_multibit_bsk.decomp_base_log(),
self.message_modulus,
self.carry_modulus,
PBSType::MultiBit,
d_multibit_bsk.grouping_factor,
None,
)
}
}
};
let scalar_shift_mem = match &self.bootstrapping_key {
CudaBootstrappingKey::Classic(d_bsk) => get_scalar_rotate_right_size_on_gpu(
streams,
self.message_modulus,
self.carry_modulus,
d_bsk.glwe_dimension,
d_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count,
d_bsk.decomp_base_log,
lwe_ciphertext_count.0 as u32,
PBSType::Classical,
LweBskGroupingFactor(0),
d_bsk.ms_noise_reduction_configuration.as_ref(),
),
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => get_scalar_rotate_right_size_on_gpu(
streams,
self.message_modulus,
self.carry_modulus,
d_multibit_bsk.glwe_dimension,
d_multibit_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_multibit_bsk.decomp_level_count,
d_multibit_bsk.decomp_base_log,
lwe_ciphertext_count.0 as u32,
PBSType::MultiBit,
d_multibit_bsk.grouping_factor,
None,
),
CudaBootstrappingKey::Classic(d_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
get_scalar_rotate_right_size_on_gpu(
streams,
self.message_modulus,
self.carry_modulus,
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
lwe_ciphertext_count.0 as u32,
d_bsk.ms_noise_reduction_configuration.as_ref(),
)
}
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_multibit_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
get_scalar_rotate_right_size_on_gpu(
streams,
self.message_modulus,
self.carry_modulus,
d_multibit_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
lwe_ciphertext_count.0 as u32,
None,
)
}
};
full_prop_mem.max(scalar_shift_mem)
}

View File

@@ -1,5 +1,5 @@
use crate::core_crypto::gpu::CudaStreams;
use crate::core_crypto::prelude::{CastFrom, LweBskGroupingFactor};
use crate::core_crypto::prelude::CastFrom;
use crate::integer::gpu::ciphertext::CudaIntegerRadixCiphertext;
use crate::integer::gpu::server_key::{CudaBootstrappingKey, CudaDynamicKeyswitchingKey};
use crate::integer::gpu::{
@@ -9,7 +9,7 @@ use crate::integer::gpu::{
cuda_backend_get_scalar_logical_right_shift_size_on_gpu,
cuda_backend_unchecked_scalar_arithmetic_right_shift_assign,
cuda_backend_unchecked_scalar_left_shift_assign,
cuda_backend_unchecked_scalar_logical_right_shift_assign, CudaServerKey, PBSType,
cuda_backend_unchecked_scalar_logical_right_shift_assign, CudaServerKey,
};
impl CudaServerKey {
@@ -83,6 +83,18 @@ impl CudaServerKey {
unsafe {
match &self.bootstrapping_key {
CudaBootstrappingKey::Classic(d_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
cuda_backend_unchecked_scalar_left_shift_assign(
streams,
ct.as_mut(),
@@ -91,21 +103,26 @@ impl CudaServerKey {
&computing_ks_key.d_vec,
self.message_modulus,
self.carry_modulus,
d_bsk.glwe_dimension,
d_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count,
d_bsk.decomp_base_log,
lwe_ciphertext_count.0 as u32,
PBSType::Classical,
LweBskGroupingFactor(0),
d_bsk.ms_noise_reduction_configuration.as_ref(),
);
}
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_multibit_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
cuda_backend_unchecked_scalar_left_shift_assign(
streams,
ct.as_mut(),
@@ -114,17 +131,10 @@ impl CudaServerKey {
&computing_ks_key.d_vec,
self.message_modulus,
self.carry_modulus,
d_multibit_bsk.glwe_dimension,
d_multibit_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_multibit_bsk.decomp_level_count,
d_multibit_bsk.decomp_base_log,
lwe_ciphertext_count.0 as u32,
PBSType::MultiBit,
d_multibit_bsk.grouping_factor,
None,
);
}
@@ -203,6 +213,18 @@ impl CudaServerKey {
if T::IS_SIGNED {
match &self.bootstrapping_key {
CudaBootstrappingKey::Classic(d_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
cuda_backend_unchecked_scalar_arithmetic_right_shift_assign(
streams,
ct.as_mut(),
@@ -211,20 +233,25 @@ impl CudaServerKey {
&computing_ks_key.d_vec,
self.message_modulus,
self.carry_modulus,
d_bsk.glwe_dimension,
d_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count,
d_bsk.decomp_base_log,
PBSType::Classical,
LweBskGroupingFactor(0),
d_bsk.ms_noise_reduction_configuration.as_ref(),
);
}
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_multibit_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
cuda_backend_unchecked_scalar_arithmetic_right_shift_assign(
streams,
ct.as_mut(),
@@ -233,16 +260,9 @@ impl CudaServerKey {
&computing_ks_key.d_vec,
self.message_modulus,
self.carry_modulus,
d_multibit_bsk.glwe_dimension,
d_multibit_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_multibit_bsk.decomp_level_count,
d_multibit_bsk.decomp_base_log,
PBSType::MultiBit,
d_multibit_bsk.grouping_factor,
None,
);
}
@@ -250,6 +270,18 @@ impl CudaServerKey {
} else {
match &self.bootstrapping_key {
CudaBootstrappingKey::Classic(d_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
cuda_backend_unchecked_scalar_logical_right_shift_assign(
streams,
ct.as_mut(),
@@ -258,21 +290,26 @@ impl CudaServerKey {
&computing_ks_key.d_vec,
self.message_modulus,
self.carry_modulus,
d_bsk.glwe_dimension,
d_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count,
d_bsk.decomp_base_log,
lwe_ciphertext_count.0 as u32,
PBSType::Classical,
LweBskGroupingFactor(0),
d_bsk.ms_noise_reduction_configuration.as_ref(),
);
}
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_multibit_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
cuda_backend_unchecked_scalar_logical_right_shift_assign(
streams,
ct.as_mut(),
@@ -281,17 +318,10 @@ impl CudaServerKey {
&computing_ks_key.d_vec,
self.message_modulus,
self.carry_modulus,
d_multibit_bsk.glwe_dimension,
d_multibit_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_multibit_bsk.decomp_level_count,
d_multibit_bsk.decomp_base_log,
lwe_ciphertext_count.0 as u32,
PBSType::MultiBit,
d_multibit_bsk.grouping_factor,
None,
);
}
@@ -447,34 +477,34 @@ impl CudaServerKey {
CudaBootstrappingKey::Classic(d_bsk) => {
cuda_backend_get_full_propagate_assign_size_on_gpu(
streams,
d_bsk.input_lwe_dimension(),
d_bsk.glwe_dimension(),
d_bsk.polynomial_size(),
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count(),
d_bsk.decomp_base_log(),
self.message_modulus,
self.carry_modulus,
PBSType::Classical,
LweBskGroupingFactor(0),
d_bsk.ms_noise_reduction_configuration.as_ref(),
)
}
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_multibit_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
cuda_backend_get_full_propagate_assign_size_on_gpu(
streams,
d_multibit_bsk.input_lwe_dimension(),
d_multibit_bsk.glwe_dimension(),
d_multibit_bsk.polynomial_size(),
d_multibit_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_multibit_bsk.decomp_level_count(),
d_multibit_bsk.decomp_base_log(),
self.message_modulus,
self.carry_modulus,
PBSType::MultiBit,
d_multibit_bsk.grouping_factor,
None,
)
}
@@ -486,35 +516,33 @@ impl CudaServerKey {
streams,
self.message_modulus,
self.carry_modulus,
d_bsk.glwe_dimension,
d_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count,
d_bsk.decomp_base_log,
lwe_ciphertext_count.0 as u32,
PBSType::Classical,
LweBskGroupingFactor(0),
d_bsk.ms_noise_reduction_configuration.as_ref(),
),
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_multibit_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
cuda_backend_get_scalar_left_shift_size_on_gpu(
streams,
self.message_modulus,
self.carry_modulus,
d_multibit_bsk.glwe_dimension,
d_multibit_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_multibit_bsk.decomp_level_count,
d_multibit_bsk.decomp_base_log,
lwe_ciphertext_count.0 as u32,
PBSType::MultiBit,
d_multibit_bsk.grouping_factor,
None,
)
}
@@ -538,34 +566,22 @@ impl CudaServerKey {
CudaBootstrappingKey::Classic(d_bsk) => {
cuda_backend_get_full_propagate_assign_size_on_gpu(
streams,
d_bsk.input_lwe_dimension(),
d_bsk.glwe_dimension(),
d_bsk.polynomial_size(),
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count(),
d_bsk.decomp_base_log(),
self.message_modulus,
self.carry_modulus,
PBSType::Classical,
LweBskGroupingFactor(0),
d_bsk.ms_noise_reduction_configuration.as_ref(),
)
}
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
cuda_backend_get_full_propagate_assign_size_on_gpu(
streams,
d_multibit_bsk.input_lwe_dimension(),
d_multibit_bsk.glwe_dimension(),
d_multibit_bsk.polynomial_size(),
d_multibit_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_multibit_bsk.decomp_level_count(),
d_multibit_bsk.decomp_base_log(),
self.message_modulus,
self.carry_modulus,
PBSType::MultiBit,
d_multibit_bsk.grouping_factor,
None,
)
}
@@ -574,40 +590,50 @@ impl CudaServerKey {
let scalar_shift_mem = if T::IS_SIGNED {
match &self.bootstrapping_key {
CudaBootstrappingKey::Classic(d_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
cuda_backend_get_scalar_arithmetic_right_shift_size_on_gpu(
streams,
self.message_modulus,
self.carry_modulus,
d_bsk.glwe_dimension,
d_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count,
d_bsk.decomp_base_log,
lwe_ciphertext_count.0 as u32,
PBSType::Classical,
LweBskGroupingFactor(0),
d_bsk.ms_noise_reduction_configuration.as_ref(),
)
}
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_multibit_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
cuda_backend_get_scalar_arithmetic_right_shift_size_on_gpu(
streams,
self.message_modulus,
self.carry_modulus,
d_multibit_bsk.glwe_dimension,
d_multibit_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_multibit_bsk.decomp_level_count,
d_multibit_bsk.decomp_base_log,
lwe_ciphertext_count.0 as u32,
PBSType::MultiBit,
d_multibit_bsk.grouping_factor,
None,
)
}
@@ -615,40 +641,50 @@ impl CudaServerKey {
} else {
match &self.bootstrapping_key {
CudaBootstrappingKey::Classic(d_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
cuda_backend_get_scalar_logical_right_shift_size_on_gpu(
streams,
self.message_modulus,
self.carry_modulus,
d_bsk.glwe_dimension,
d_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count,
d_bsk.decomp_base_log,
lwe_ciphertext_count.0 as u32,
PBSType::Classical,
LweBskGroupingFactor(0),
d_bsk.ms_noise_reduction_configuration.as_ref(),
)
}
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_multibit_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
cuda_backend_get_scalar_logical_right_shift_size_on_gpu(
streams,
self.message_modulus,
self.carry_modulus,
d_multibit_bsk.glwe_dimension,
d_multibit_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_multibit_bsk.decomp_level_count,
d_multibit_bsk.decomp_base_log,
lwe_ciphertext_count.0 as u32,
PBSType::MultiBit,
d_multibit_bsk.grouping_factor,
None,
)
}

View File

@@ -1,11 +1,10 @@
use crate::core_crypto::gpu::CudaStreams;
use crate::core_crypto::prelude::LweBskGroupingFactor;
use crate::integer::gpu::ciphertext::{CudaIntegerRadixCiphertext, CudaUnsignedRadixCiphertext};
use crate::integer::gpu::server_key::{CudaBootstrappingKey, CudaDynamicKeyswitchingKey};
use crate::integer::gpu::{
cuda_backend_get_full_propagate_assign_size_on_gpu, cuda_backend_get_left_shift_size_on_gpu,
cuda_backend_get_right_shift_size_on_gpu, cuda_backend_unchecked_left_shift_assign,
cuda_backend_unchecked_right_shift_assign, CudaServerKey, PBSType,
cuda_backend_unchecked_right_shift_assign, CudaServerKey,
};
impl CudaServerKey {
@@ -26,6 +25,18 @@ impl CudaServerKey {
unsafe {
match &self.bootstrapping_key {
CudaBootstrappingKey::Classic(d_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
cuda_backend_unchecked_right_shift_assign(
streams,
ct.as_mut(),
@@ -34,22 +45,27 @@ impl CudaServerKey {
&computing_ks_key.d_vec,
self.message_modulus,
self.carry_modulus,
d_bsk.glwe_dimension,
d_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count,
d_bsk.decomp_base_log,
lwe_ciphertext_count.0 as u32,
is_signed,
PBSType::Classical,
LweBskGroupingFactor(0),
d_bsk.ms_noise_reduction_configuration.as_ref(),
);
}
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_multibit_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
cuda_backend_unchecked_right_shift_assign(
streams,
ct.as_mut(),
@@ -58,18 +74,11 @@ impl CudaServerKey {
&computing_ks_key.d_vec,
self.message_modulus,
self.carry_modulus,
d_multibit_bsk.glwe_dimension,
d_multibit_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_multibit_bsk.decomp_level_count,
d_multibit_bsk.decomp_base_log,
lwe_ciphertext_count.0 as u32,
is_signed,
PBSType::MultiBit,
d_multibit_bsk.grouping_factor,
None,
);
}
@@ -108,6 +117,18 @@ impl CudaServerKey {
unsafe {
match &self.bootstrapping_key {
CudaBootstrappingKey::Classic(d_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
cuda_backend_unchecked_left_shift_assign(
streams,
ct.as_mut(),
@@ -116,22 +137,27 @@ impl CudaServerKey {
&computing_ks_key.d_vec,
self.message_modulus,
self.carry_modulus,
d_bsk.glwe_dimension,
d_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count,
d_bsk.decomp_base_log,
lwe_ciphertext_count.0 as u32,
is_signed,
PBSType::Classical,
LweBskGroupingFactor(0),
d_bsk.ms_noise_reduction_configuration.as_ref(),
);
}
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_multibit_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
cuda_backend_unchecked_left_shift_assign(
streams,
ct.as_mut(),
@@ -140,18 +166,11 @@ impl CudaServerKey {
&computing_ks_key.d_vec,
self.message_modulus,
self.carry_modulus,
d_multibit_bsk.glwe_dimension,
d_multibit_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_multibit_bsk.decomp_level_count,
d_multibit_bsk.decomp_base_log,
lwe_ciphertext_count.0 as u32,
is_signed,
PBSType::MultiBit,
d_multibit_bsk.grouping_factor,
None,
);
}
@@ -433,34 +452,22 @@ impl CudaServerKey {
CudaBootstrappingKey::Classic(d_bsk) => {
cuda_backend_get_full_propagate_assign_size_on_gpu(
streams,
d_bsk.input_lwe_dimension(),
d_bsk.glwe_dimension(),
d_bsk.polynomial_size(),
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count(),
d_bsk.decomp_base_log(),
self.message_modulus,
self.carry_modulus,
PBSType::Classical,
LweBskGroupingFactor(0),
d_bsk.ms_noise_reduction_configuration.as_ref(),
)
}
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
cuda_backend_get_full_propagate_assign_size_on_gpu(
streams,
d_multibit_bsk.input_lwe_dimension(),
d_multibit_bsk.glwe_dimension(),
d_multibit_bsk.polynomial_size(),
d_multibit_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_multibit_bsk.decomp_level_count(),
d_multibit_bsk.decomp_base_log(),
self.message_modulus,
self.carry_modulus,
PBSType::MultiBit,
d_multibit_bsk.grouping_factor,
None,
)
}
@@ -481,41 +488,53 @@ impl CudaServerKey {
};
let shift_mem = match &self.bootstrapping_key {
CudaBootstrappingKey::Classic(d_bsk) => cuda_backend_get_left_shift_size_on_gpu(
streams,
self.message_modulus,
self.carry_modulus,
d_bsk.glwe_dimension,
d_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count,
d_bsk.decomp_base_log,
lwe_ciphertext_count.0 as u32,
T::IS_SIGNED,
PBSType::Classical,
LweBskGroupingFactor(0),
d_bsk.ms_noise_reduction_configuration.as_ref(),
),
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
CudaBootstrappingKey::Classic(d_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
cuda_backend_get_left_shift_size_on_gpu(
streams,
self.message_modulus,
self.carry_modulus,
d_multibit_bsk.glwe_dimension,
d_multibit_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
lwe_ciphertext_count.0 as u32,
T::IS_SIGNED,
d_bsk.ms_noise_reduction_configuration.as_ref(),
)
}
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_multibit_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
cuda_backend_get_left_shift_size_on_gpu(
streams,
self.message_modulus,
self.carry_modulus,
d_multibit_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_multibit_bsk.decomp_level_count,
d_multibit_bsk.decomp_base_log,
lwe_ciphertext_count.0 as u32,
T::IS_SIGNED,
PBSType::MultiBit,
d_multibit_bsk.grouping_factor,
None,
)
}
@@ -545,34 +564,22 @@ impl CudaServerKey {
CudaBootstrappingKey::Classic(d_bsk) => {
cuda_backend_get_full_propagate_assign_size_on_gpu(
streams,
d_bsk.input_lwe_dimension(),
d_bsk.glwe_dimension(),
d_bsk.polynomial_size(),
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count(),
d_bsk.decomp_base_log(),
self.message_modulus,
self.carry_modulus,
PBSType::Classical,
LweBskGroupingFactor(0),
d_bsk.ms_noise_reduction_configuration.as_ref(),
)
}
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
cuda_backend_get_full_propagate_assign_size_on_gpu(
streams,
d_multibit_bsk.input_lwe_dimension(),
d_multibit_bsk.glwe_dimension(),
d_multibit_bsk.polynomial_size(),
d_multibit_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_multibit_bsk.decomp_level_count(),
d_multibit_bsk.decomp_base_log(),
self.message_modulus,
self.carry_modulus,
PBSType::MultiBit,
d_multibit_bsk.grouping_factor,
None,
)
}
@@ -593,41 +600,53 @@ impl CudaServerKey {
};
let shift_mem = match &self.bootstrapping_key {
CudaBootstrappingKey::Classic(d_bsk) => cuda_backend_get_right_shift_size_on_gpu(
streams,
self.message_modulus,
self.carry_modulus,
d_bsk.glwe_dimension,
d_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count,
d_bsk.decomp_base_log,
lwe_ciphertext_count.0 as u32,
T::IS_SIGNED,
PBSType::Classical,
LweBskGroupingFactor(0),
d_bsk.ms_noise_reduction_configuration.as_ref(),
),
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
CudaBootstrappingKey::Classic(d_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
cuda_backend_get_right_shift_size_on_gpu(
streams,
self.message_modulus,
self.carry_modulus,
d_multibit_bsk.glwe_dimension,
d_multibit_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
lwe_ciphertext_count.0 as u32,
T::IS_SIGNED,
d_bsk.ms_noise_reduction_configuration.as_ref(),
)
}
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_multibit_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
cuda_backend_get_right_shift_size_on_gpu(
streams,
self.message_modulus,
self.carry_modulus,
d_multibit_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_multibit_bsk.decomp_level_count,
d_multibit_bsk.decomp_base_log,
lwe_ciphertext_count.0 as u32,
T::IS_SIGNED,
PBSType::MultiBit,
d_multibit_bsk.grouping_factor,
None,
)
}

View File

@@ -9,10 +9,9 @@ use crate::integer::gpu::server_key::{CudaDynamicKeyswitchingKey, CudaServerKey}
use crate::integer::gpu::server_key::CudaBootstrappingKey;
use crate::integer::gpu::{
cuda_backend_sub_and_propagate_single_carry_assign,
cuda_backend_unchecked_unsigned_overflowing_sub_assign, PBSType,
cuda_backend_unchecked_unsigned_overflowing_sub_assign,
};
use crate::integer::server_key::radix_parallel::OutputFlag;
use crate::shortint::parameters::LweBskGroupingFactor;
impl CudaServerKey {
/// Computes homomorphically a subtraction between two ciphertexts encrypting integer values.
@@ -311,17 +310,11 @@ impl CudaServerKey {
in_carry_dvec,
&d_bsk.d_vec,
&computing_ks_key.d_vec,
d_bsk.input_lwe_dimension(),
d_bsk.glwe_dimension(),
d_bsk.polynomial_size(),
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count(),
d_bsk.decomp_base_log(),
ciphertext.info.blocks.first().unwrap().message_modulus,
ciphertext.info.blocks.first().unwrap().carry_modulus,
PBSType::Classical,
LweBskGroupingFactor(0),
compute_overflow,
uses_input_borrow,
d_bsk.ms_noise_reduction_configuration.as_ref(),
@@ -336,17 +329,11 @@ impl CudaServerKey {
in_carry_dvec,
&d_multibit_bsk.d_vec,
&computing_ks_key.d_vec,
d_multibit_bsk.input_lwe_dimension(),
d_multibit_bsk.glwe_dimension(),
d_multibit_bsk.polynomial_size(),
d_multibit_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_multibit_bsk.decomp_level_count(),
d_multibit_bsk.decomp_base_log(),
ciphertext.info.blocks.first().unwrap().message_modulus,
ciphertext.info.blocks.first().unwrap().carry_modulus,
PBSType::MultiBit,
d_multibit_bsk.grouping_factor,
compute_overflow,
uses_input_borrow,
None,
@@ -392,18 +379,12 @@ impl CudaServerKey {
in_carry,
&d_bsk.d_vec,
&computing_ks_key.d_vec,
d_bsk.input_lwe_dimension(),
d_bsk.glwe_dimension(),
d_bsk.polynomial_size(),
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count(),
d_bsk.decomp_base_log(),
num_blocks,
self.message_modulus,
self.carry_modulus,
PBSType::Classical,
LweBskGroupingFactor(0),
requested_flag,
uses_carry,
d_bsk.ms_noise_reduction_configuration.as_ref(),
@@ -418,18 +399,12 @@ impl CudaServerKey {
in_carry,
&d_multibit_bsk.d_vec,
&computing_ks_key.d_vec,
d_multibit_bsk.input_lwe_dimension(),
d_multibit_bsk.glwe_dimension(),
d_multibit_bsk.polynomial_size(),
d_multibit_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_multibit_bsk.decomp_level_count(),
d_multibit_bsk.decomp_base_log(),
num_blocks,
self.message_modulus,
self.carry_modulus,
PBSType::MultiBit,
d_multibit_bsk.grouping_factor,
requested_flag,
uses_carry,
None,

View File

@@ -1,10 +1,9 @@
use crate::core_crypto::gpu::CudaStreams;
use crate::integer::gpu::ciphertext::{CudaIntegerRadixCiphertext, CudaUnsignedRadixCiphertext};
use crate::integer::gpu::cuda_backend_trivium_generate_keystream;
use crate::integer::gpu::server_key::{
CudaBootstrappingKey, CudaDynamicKeyswitchingKey, CudaServerKey,
};
use crate::integer::gpu::{cuda_backend_trivium_generate_keystream, PBSType};
use crate::shortint::parameters::LweBskGroupingFactor;
impl CudaServerKey {
/// Generates a Trivium keystream homomorphically on the GPU.
@@ -70,15 +69,9 @@ impl CudaServerKey {
&computing_ks_key.d_vec,
self.message_modulus,
self.carry_modulus,
d_bsk.glwe_dimension,
d_bsk.polynomial_size,
d_bsk.input_lwe_dimension,
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count,
d_bsk.decomp_base_log,
LweBskGroupingFactor(0),
PBSType::Classical,
d_bsk.ms_noise_reduction_configuration.as_ref(),
num_steps as u32,
);
@@ -93,15 +86,9 @@ impl CudaServerKey {
&computing_ks_key.d_vec,
self.message_modulus,
self.carry_modulus,
d_multibit_bsk.glwe_dimension,
d_multibit_bsk.polynomial_size,
d_multibit_bsk.input_lwe_dimension,
d_multibit_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_multibit_bsk.decomp_level_count,
d_multibit_bsk.decomp_base_log,
d_multibit_bsk.grouping_factor,
PBSType::MultiBit,
None,
num_steps as u32,
);

View File

@@ -1,12 +1,11 @@
use crate::core_crypto::gpu::CudaStreams;
use crate::core_crypto::prelude::LweBskGroupingFactor;
use crate::integer::gpu::ciphertext::boolean_value::CudaBooleanBlock;
use crate::integer::gpu::ciphertext::{CudaIntegerRadixCiphertext, CudaUnsignedRadixCiphertext};
use crate::integer::gpu::server_key::{
CudaBootstrappingKey, CudaDynamicKeyswitchingKey, CudaServerKey,
};
use crate::integer::gpu::{
cuda_backend_unchecked_all_eq_slices, cuda_backend_unchecked_contains_sub_slice, PBSType,
cuda_backend_unchecked_all_eq_slices, cuda_backend_unchecked_contains_sub_slice,
};
impl CudaServerKey {
@@ -65,6 +64,18 @@ impl CudaServerKey {
unsafe {
match &self.bootstrapping_key {
CudaBootstrappingKey::Classic(d_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
cuda_backend_unchecked_all_eq_slices(
streams,
&mut match_ct,
@@ -74,20 +85,25 @@ impl CudaServerKey {
&computing_ks_key.d_vec,
self.message_modulus,
self.carry_modulus,
d_bsk.glwe_dimension,
d_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count,
d_bsk.decomp_base_log,
PBSType::Classical,
LweBskGroupingFactor(0),
d_bsk.ms_noise_reduction_configuration.as_ref(),
);
}
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_multibit_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
cuda_backend_unchecked_all_eq_slices(
streams,
&mut match_ct,
@@ -97,16 +113,9 @@ impl CudaServerKey {
&computing_ks_key.d_vec,
self.message_modulus,
self.carry_modulus,
d_multibit_bsk.glwe_dimension,
d_multibit_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_multibit_bsk.decomp_level_count,
d_multibit_bsk.decomp_base_log,
PBSType::MultiBit,
d_multibit_bsk.grouping_factor,
None,
);
}
@@ -274,6 +283,18 @@ impl CudaServerKey {
unsafe {
match &self.bootstrapping_key {
CudaBootstrappingKey::Classic(d_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
cuda_backend_unchecked_contains_sub_slice(
streams,
&mut match_ct,
@@ -283,20 +304,25 @@ impl CudaServerKey {
&computing_ks_key.d_vec,
self.message_modulus,
self.carry_modulus,
d_bsk.glwe_dimension,
d_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count,
d_bsk.decomp_base_log,
PBSType::Classical,
LweBskGroupingFactor(0),
d_bsk.ms_noise_reduction_configuration.as_ref(),
);
}
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_multibit_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
cuda_backend_unchecked_contains_sub_slice(
streams,
&mut match_ct,
@@ -306,16 +332,9 @@ impl CudaServerKey {
&computing_ks_key.d_vec,
self.message_modulus,
self.carry_modulus,
d_multibit_bsk.glwe_dimension,
d_multibit_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_multibit_bsk.decomp_level_count,
d_multibit_bsk.decomp_base_log,
PBSType::MultiBit,
d_multibit_bsk.grouping_factor,
None,
);
}

View File

@@ -1,5 +1,5 @@
use crate::core_crypto::gpu::CudaStreams;
use crate::core_crypto::prelude::{LweBskGroupingFactor, UnsignedInteger};
use crate::core_crypto::prelude::UnsignedInteger;
use crate::integer::block_decomposition::DecomposableInto;
use crate::integer::gpu::ciphertext::boolean_value::CudaBooleanBlock;
use crate::integer::gpu::ciphertext::{CudaIntegerRadixCiphertext, CudaUnsignedRadixCiphertext};
@@ -13,7 +13,7 @@ use crate::integer::gpu::{
cuda_backend_unchecked_first_index_of, cuda_backend_unchecked_first_index_of_clear,
cuda_backend_unchecked_index_in_clears, cuda_backend_unchecked_index_of,
cuda_backend_unchecked_index_of_clear, cuda_backend_unchecked_is_in_clears,
cuda_backend_unchecked_match_value, cuda_backend_unchecked_match_value_or, PBSType,
cuda_backend_unchecked_match_value, cuda_backend_unchecked_match_value_or,
};
pub use crate::integer::server_key::radix_parallel::MatchValues;
use crate::prelude::CastInto;
@@ -61,6 +61,18 @@ impl CudaServerKey {
unsafe {
match &self.bootstrapping_key {
CudaBootstrappingKey::Classic(d_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
cuda_backend_unchecked_match_value(
streams,
&mut result_ct,
@@ -71,20 +83,25 @@ impl CudaServerKey {
self.carry_modulus,
&d_bsk.d_vec,
&computing_ks_key.d_vec,
d_bsk.glwe_dimension,
d_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count,
d_bsk.decomp_base_log,
PBSType::Classical,
LweBskGroupingFactor(0),
d_bsk.ms_noise_reduction_configuration.as_ref(),
);
}
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_multibit_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
cuda_backend_unchecked_match_value(
streams,
&mut result_ct,
@@ -95,16 +112,9 @@ impl CudaServerKey {
self.carry_modulus,
&d_multibit_bsk.d_vec,
&computing_ks_key.d_vec,
d_multibit_bsk.glwe_dimension,
d_multibit_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_multibit_bsk.decomp_level_count,
d_multibit_bsk.decomp_base_log,
PBSType::MultiBit,
d_multibit_bsk.grouping_factor,
None,
);
}
@@ -134,42 +144,52 @@ impl CudaServerKey {
match &self.bootstrapping_key {
CudaBootstrappingKey::Classic(d_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
cuda_backend_get_unchecked_match_value_size_on_gpu(
streams,
ct.as_ref(),
matches,
d_bsk.glwe_dimension,
d_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count,
d_bsk.decomp_base_log,
LweBskGroupingFactor(0),
self.message_modulus,
self.carry_modulus,
PBSType::Classical,
d_bsk.ms_noise_reduction_configuration.as_ref(),
)
}
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_multibit_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
cuda_backend_get_unchecked_match_value_size_on_gpu(
streams,
ct.as_ref(),
matches,
d_multibit_bsk.glwe_dimension,
d_multibit_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_multibit_bsk.decomp_level_count,
d_multibit_bsk.decomp_base_log,
d_multibit_bsk.grouping_factor,
self.message_modulus,
self.carry_modulus,
PBSType::MultiBit,
None,
)
}
@@ -297,6 +317,18 @@ impl CudaServerKey {
unsafe {
match &self.bootstrapping_key {
CudaBootstrappingKey::Classic(d_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
cuda_backend_unchecked_match_value_or(
streams,
&mut result,
@@ -307,20 +339,25 @@ impl CudaServerKey {
self.carry_modulus,
&d_bsk.d_vec,
&computing_ks_key.d_vec,
d_bsk.glwe_dimension,
d_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count,
d_bsk.decomp_base_log,
PBSType::Classical,
LweBskGroupingFactor(0),
d_bsk.ms_noise_reduction_configuration.as_ref(),
);
}
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_multibit_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
cuda_backend_unchecked_match_value_or(
streams,
&mut result,
@@ -331,16 +368,9 @@ impl CudaServerKey {
self.carry_modulus,
&d_multibit_bsk.d_vec,
&computing_ks_key.d_vec,
d_multibit_bsk.glwe_dimension,
d_multibit_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_multibit_bsk.decomp_level_count,
d_multibit_bsk.decomp_base_log,
PBSType::MultiBit,
d_multibit_bsk.grouping_factor,
None,
);
}
@@ -371,44 +401,54 @@ impl CudaServerKey {
match &self.bootstrapping_key {
CudaBootstrappingKey::Classic(d_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
cuda_backend_get_unchecked_match_value_or_size_on_gpu(
streams,
ct.as_ref(),
matches,
or_value,
d_bsk.glwe_dimension,
d_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count,
d_bsk.decomp_base_log,
LweBskGroupingFactor(0),
self.message_modulus,
self.carry_modulus,
PBSType::Classical,
d_bsk.ms_noise_reduction_configuration.as_ref(),
)
}
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_multibit_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
cuda_backend_get_unchecked_match_value_or_size_on_gpu(
streams,
ct.as_ref(),
matches,
or_value,
d_multibit_bsk.glwe_dimension,
d_multibit_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_multibit_bsk.decomp_level_count,
d_multibit_bsk.decomp_base_log,
d_multibit_bsk.grouping_factor,
self.message_modulus,
self.carry_modulus,
PBSType::MultiBit,
None,
)
}
@@ -512,6 +552,18 @@ impl CudaServerKey {
unsafe {
match &self.bootstrapping_key {
CudaBootstrappingKey::Classic(d_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
cuda_backend_unchecked_contains(
streams,
&mut result,
@@ -521,20 +573,25 @@ impl CudaServerKey {
&computing_ks_key.d_vec,
self.message_modulus,
self.carry_modulus,
d_bsk.glwe_dimension,
d_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count,
d_bsk.decomp_base_log,
PBSType::Classical,
LweBskGroupingFactor(0),
d_bsk.ms_noise_reduction_configuration.as_ref(),
);
}
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_multibit_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
cuda_backend_unchecked_contains(
streams,
&mut result,
@@ -544,16 +601,9 @@ impl CudaServerKey {
&computing_ks_key.d_vec,
self.message_modulus,
self.carry_modulus,
d_multibit_bsk.glwe_dimension,
d_multibit_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_multibit_bsk.decomp_level_count,
d_multibit_bsk.decomp_base_log,
PBSType::MultiBit,
d_multibit_bsk.grouping_factor,
None,
);
}
@@ -663,6 +713,18 @@ impl CudaServerKey {
unsafe {
match &self.bootstrapping_key {
CudaBootstrappingKey::Classic(d_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
cuda_backend_unchecked_contains_clear(
streams,
&mut result,
@@ -672,20 +734,25 @@ impl CudaServerKey {
&computing_ks_key.d_vec,
self.message_modulus,
self.carry_modulus,
d_bsk.glwe_dimension,
d_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count,
d_bsk.decomp_base_log,
PBSType::Classical,
LweBskGroupingFactor(0),
d_bsk.ms_noise_reduction_configuration.as_ref(),
);
}
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_multibit_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
cuda_backend_unchecked_contains_clear(
streams,
&mut result,
@@ -695,16 +762,9 @@ impl CudaServerKey {
&computing_ks_key.d_vec,
self.message_modulus,
self.carry_modulus,
d_multibit_bsk.glwe_dimension,
d_multibit_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_multibit_bsk.decomp_level_count,
d_multibit_bsk.decomp_base_log,
PBSType::MultiBit,
d_multibit_bsk.grouping_factor,
None,
);
}
@@ -805,6 +865,18 @@ impl CudaServerKey {
unsafe {
match &self.bootstrapping_key {
CudaBootstrappingKey::Classic(d_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
cuda_backend_unchecked_is_in_clears(
streams,
&mut boolean_res,
@@ -814,20 +886,25 @@ impl CudaServerKey {
&computing_ks_key.d_vec,
self.message_modulus,
self.carry_modulus,
d_bsk.glwe_dimension,
d_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count,
d_bsk.decomp_base_log,
PBSType::Classical,
LweBskGroupingFactor(0),
d_bsk.ms_noise_reduction_configuration.as_ref(),
);
}
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_multibit_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
cuda_backend_unchecked_is_in_clears(
streams,
&mut boolean_res,
@@ -837,16 +914,9 @@ impl CudaServerKey {
&computing_ks_key.d_vec,
self.message_modulus,
self.carry_modulus,
d_multibit_bsk.glwe_dimension,
d_multibit_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_multibit_bsk.decomp_level_count,
d_multibit_bsk.decomp_base_log,
PBSType::MultiBit,
d_multibit_bsk.grouping_factor,
None,
);
}
@@ -960,6 +1030,18 @@ impl CudaServerKey {
unsafe {
match &self.bootstrapping_key {
CudaBootstrappingKey::Classic(d_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
cuda_backend_unchecked_index_in_clears(
streams,
index_ct.as_mut(),
@@ -970,20 +1052,25 @@ impl CudaServerKey {
&computing_ks_key.d_vec,
self.message_modulus,
self.carry_modulus,
d_bsk.glwe_dimension,
d_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count,
d_bsk.decomp_base_log,
PBSType::Classical,
LweBskGroupingFactor(0),
d_bsk.ms_noise_reduction_configuration.as_ref(),
);
}
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_multibit_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
cuda_backend_unchecked_index_in_clears(
streams,
index_ct.as_mut(),
@@ -994,16 +1081,9 @@ impl CudaServerKey {
&computing_ks_key.d_vec,
self.message_modulus,
self.carry_modulus,
d_multibit_bsk.glwe_dimension,
d_multibit_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_multibit_bsk.decomp_level_count,
d_multibit_bsk.decomp_base_log,
PBSType::MultiBit,
d_multibit_bsk.grouping_factor,
None,
);
}
@@ -1127,6 +1207,18 @@ impl CudaServerKey {
unsafe {
match &self.bootstrapping_key {
CudaBootstrappingKey::Classic(d_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
cuda_backend_unchecked_first_index_in_clears(
streams,
index_ct.as_mut(),
@@ -1137,20 +1229,25 @@ impl CudaServerKey {
&computing_ks_key.d_vec,
self.message_modulus,
self.carry_modulus,
d_bsk.glwe_dimension,
d_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count,
d_bsk.decomp_base_log,
PBSType::Classical,
LweBskGroupingFactor(0),
d_bsk.ms_noise_reduction_configuration.as_ref(),
);
}
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_multibit_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
cuda_backend_unchecked_first_index_in_clears(
streams,
index_ct.as_mut(),
@@ -1161,16 +1258,9 @@ impl CudaServerKey {
&computing_ks_key.d_vec,
self.message_modulus,
self.carry_modulus,
d_multibit_bsk.glwe_dimension,
d_multibit_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_multibit_bsk.decomp_level_count,
d_multibit_bsk.decomp_base_log,
PBSType::MultiBit,
d_multibit_bsk.grouping_factor,
None,
);
}
@@ -1281,6 +1371,18 @@ impl CudaServerKey {
unsafe {
match &self.bootstrapping_key {
CudaBootstrappingKey::Classic(d_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
cuda_backend_unchecked_index_of(
streams,
index_ct.as_mut(),
@@ -1291,20 +1393,25 @@ impl CudaServerKey {
&computing_ks_key.d_vec,
self.message_modulus,
self.carry_modulus,
d_bsk.glwe_dimension,
d_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count,
d_bsk.decomp_base_log,
PBSType::Classical,
LweBskGroupingFactor(0),
d_bsk.ms_noise_reduction_configuration.as_ref(),
);
}
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_multibit_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
cuda_backend_unchecked_index_of(
streams,
index_ct.as_mut(),
@@ -1315,16 +1422,9 @@ impl CudaServerKey {
&computing_ks_key.d_vec,
self.message_modulus,
self.carry_modulus,
d_multibit_bsk.glwe_dimension,
d_multibit_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_multibit_bsk.decomp_level_count,
d_multibit_bsk.decomp_base_log,
PBSType::MultiBit,
d_multibit_bsk.grouping_factor,
None,
);
}
@@ -1464,6 +1564,18 @@ impl CudaServerKey {
unsafe {
match &self.bootstrapping_key {
CudaBootstrappingKey::Classic(d_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
cuda_backend_unchecked_index_of_clear(
streams,
index_ct.as_mut(),
@@ -1474,20 +1586,25 @@ impl CudaServerKey {
&computing_ks_key.d_vec,
self.message_modulus,
self.carry_modulus,
d_bsk.glwe_dimension,
d_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count,
d_bsk.decomp_base_log,
PBSType::Classical,
LweBskGroupingFactor(0),
d_bsk.ms_noise_reduction_configuration.as_ref(),
);
}
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_multibit_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
cuda_backend_unchecked_index_of_clear(
streams,
index_ct.as_mut(),
@@ -1498,16 +1615,9 @@ impl CudaServerKey {
&computing_ks_key.d_vec,
self.message_modulus,
self.carry_modulus,
d_multibit_bsk.glwe_dimension,
d_multibit_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_multibit_bsk.decomp_level_count,
d_multibit_bsk.decomp_base_log,
PBSType::MultiBit,
d_multibit_bsk.grouping_factor,
None,
);
}
@@ -1636,6 +1746,18 @@ impl CudaServerKey {
unsafe {
match &self.bootstrapping_key {
CudaBootstrappingKey::Classic(d_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
cuda_backend_unchecked_first_index_of_clear(
streams,
index_ct.as_mut(),
@@ -1646,20 +1768,25 @@ impl CudaServerKey {
&computing_ks_key.d_vec,
self.message_modulus,
self.carry_modulus,
d_bsk.glwe_dimension,
d_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count,
d_bsk.decomp_base_log,
PBSType::Classical,
LweBskGroupingFactor(0),
d_bsk.ms_noise_reduction_configuration.as_ref(),
);
}
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_multibit_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
cuda_backend_unchecked_first_index_of_clear(
streams,
index_ct.as_mut(),
@@ -1670,16 +1797,9 @@ impl CudaServerKey {
&computing_ks_key.d_vec,
self.message_modulus,
self.carry_modulus,
d_multibit_bsk.glwe_dimension,
d_multibit_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_multibit_bsk.decomp_level_count,
d_multibit_bsk.decomp_base_log,
PBSType::MultiBit,
d_multibit_bsk.grouping_factor,
None,
);
}
@@ -1806,6 +1926,18 @@ impl CudaServerKey {
unsafe {
match &self.bootstrapping_key {
CudaBootstrappingKey::Classic(d_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
cuda_backend_unchecked_first_index_of(
streams,
index_ct.as_mut(),
@@ -1816,20 +1948,25 @@ impl CudaServerKey {
&computing_ks_key.d_vec,
self.message_modulus,
self.carry_modulus,
d_bsk.glwe_dimension,
d_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_bsk.decomp_level_count,
d_bsk.decomp_base_log,
PBSType::Classical,
LweBskGroupingFactor(0),
d_bsk.ms_noise_reduction_configuration.as_ref(),
);
}
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
assert_eq!(
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk.input_lwe_dimension,
"KS key output LWE dimension mismatch with BSK input LWE dimension"
);
assert_eq!(
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk
.glwe_dimension
.to_equivalent_lwe_dimension(d_multibit_bsk.polynomial_size),
"KS key input LWE dimension mismatch with BSK big LWE dimension"
);
cuda_backend_unchecked_first_index_of(
streams,
index_ct.as_mut(),
@@ -1840,16 +1977,9 @@ impl CudaServerKey {
&computing_ks_key.d_vec,
self.message_modulus,
self.carry_modulus,
d_multibit_bsk.glwe_dimension,
d_multibit_bsk.polynomial_size,
computing_ks_key.input_key_lwe_size().to_lwe_dimension(),
computing_ks_key.output_key_lwe_size().to_lwe_dimension(),
d_multibit_bsk,
computing_ks_key.decomposition_level_count(),
computing_ks_key.decomposition_base_log(),
d_multibit_bsk.decomp_level_count,
d_multibit_bsk.decomp_base_log,
PBSType::MultiBit,
d_multibit_bsk.grouping_factor,
None,
);
}

View File

@@ -1477,4 +1477,67 @@ pub(crate) mod test {
}
}
}
#[test]
fn oprf_test_uniformity_bits_ci_run_filter() {
let sample_count: usize = 100_000;
let p_value_limit: f64 = 0.000_01;
use crate::shortint::gen_keys;
use crate::shortint::parameters::test_params::{
TEST_PARAM_MESSAGE_2_CARRY_2_KS32_PBS_TUNIFORM_2M128,
TEST_PARAM_MULTI_BIT_GROUP_3_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128,
};
use crate::shortint::parameters::PARAM_MESSAGE_2_CARRY_2_KS_PBS;
for params in [
ShortintParameterSet::from(
TEST_PARAM_MULTI_BIT_GROUP_3_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128,
),
ShortintParameterSet::from(PARAM_MESSAGE_2_CARRY_2_KS_PBS),
ShortintParameterSet::from(TEST_PARAM_MESSAGE_2_CARRY_2_KS32_PBS_TUNIFORM_2M128),
] {
let (ck, sk) = gen_keys(params);
let oprf_ck = OprfPrivateKey::new(&ck);
let oprf_sk = OprfServerKey::new(&oprf_ck, &ck).unwrap();
let random_bits_per_block = sk.message_modulus.0.ilog2() as u64;
for random_bits_count in [3u64, 4] {
let expected_num_blocks =
random_bits_count.div_ceil(random_bits_per_block) as usize;
test_uniformity(
sample_count,
p_value_limit,
1 << random_bits_count,
|seed| {
let seed = (seed as u128).to_le_bytes();
let blocks = oprf_sk.generate_oblivious_pseudo_random_bits(
seed.as_slice(),
random_bits_count,
&sk,
);
let mut combined: u64 = 0;
let mut shift = 0u64;
for (i, block) in blocks.iter().enumerate() {
let decrypted = ck.decrypt_message_and_carry(block);
let block_bits = bits_in_block(
i,
expected_num_blocks,
random_bits_count,
random_bits_per_block,
);
combined |= decrypted << shift;
shift += block_bits;
}
combined
},
);
}
}
}
}