mirror of
https://github.com/zama-ai/tfhe-rs.git
synced 2026-01-10 07:08:03 -05:00
works again
This commit is contained in:
@@ -2,27 +2,7 @@
|
||||
#define CUDA_INTEGER_COMPRESSION_H
|
||||
|
||||
#include "../../pbs/pbs_enums.h"
|
||||
|
||||
typedef struct {
|
||||
void *ptr;
|
||||
uint32_t num_radix_blocks;
|
||||
uint32_t lwe_dimension;
|
||||
} CudaLweCiphertextListFFI;
|
||||
|
||||
typedef struct {
|
||||
void *ptr;
|
||||
uint32_t storage_log_modulus;
|
||||
uint32_t lwe_per_glwe;
|
||||
// Input LWEs are grouped by groups of `lwe_per_glwe`(the last group may be
|
||||
// smaller)
|
||||
// Each group is then packed into one GLWE with `lwe_per_glwe` bodies (one for
|
||||
// each LWE of the group). In the end the total number of bodies is equal to
|
||||
// the number of input LWE
|
||||
uint32_t total_lwe_bodies_count;
|
||||
uint32_t glwe_dimension;
|
||||
uint32_t polynomial_size;
|
||||
} CudaPackedGlweCiphertextListFFI;
|
||||
|
||||
#include "../integer.h"
|
||||
extern "C" {
|
||||
uint64_t scratch_cuda_integer_compress_radix_ciphertext_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
|
||||
@@ -80,6 +80,26 @@ typedef struct {
|
||||
bool const divisor_has_more_bits_than_numerator;
|
||||
} CudaScalarDivisorFFI;
|
||||
|
||||
typedef struct {
|
||||
void *ptr;
|
||||
uint32_t num_radix_blocks;
|
||||
uint32_t lwe_dimension;
|
||||
} CudaLweCiphertextListFFI;
|
||||
|
||||
typedef struct {
|
||||
void *ptr;
|
||||
uint32_t storage_log_modulus;
|
||||
uint32_t lwe_per_glwe;
|
||||
// Input LWEs are grouped by groups of `lwe_per_glwe`(the last group may be
|
||||
// smaller)
|
||||
// Each group is then packed into one GLWE with `lwe_per_glwe` bodies (one for
|
||||
// each LWE of the group). In the end the total number of bodies is equal to
|
||||
// the number of input LWE
|
||||
uint32_t total_lwe_bodies_count;
|
||||
uint32_t glwe_dimension;
|
||||
uint32_t polynomial_size;
|
||||
} CudaPackedGlweCiphertextListFFI;
|
||||
|
||||
uint64_t scratch_cuda_apply_univariate_lut_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
int8_t **mem_ptr, void const *input_lut, uint32_t lwe_dimension,
|
||||
|
||||
@@ -107,9 +107,9 @@ __host__ void host_unsigned_integer_div_rem_kb_block_by_block_2_2(
|
||||
cuda_synchronize_stream(mem_ptr->sub_streams_2[j], gpu_indexes[j]);
|
||||
}
|
||||
|
||||
print_body<Torus>("remainder", (Torus *)remainder->ptr,
|
||||
remainder->num_radix_blocks, radix_params.big_lwe_dimension,
|
||||
576460752303423488ULL);
|
||||
// print_body<Torus>("remainder", (Torus *)remainder->ptr,
|
||||
// remainder->num_radix_blocks, radix_params.big_lwe_dimension,
|
||||
// 576460752303423488ULL);
|
||||
|
||||
for (int block_index = num_blocks - 1; block_index >= 0; block_index--) {
|
||||
uint32_t slice_len = num_blocks - block_index;
|
||||
@@ -128,16 +128,16 @@ __host__ void host_unsigned_integer_div_rem_kb_block_by_block_2_2(
|
||||
slice_len, remainder, block_index,
|
||||
num_blocks);
|
||||
|
||||
if (slice_len == 4) {
|
||||
print_body<Torus>("low1", (Torus *)low1->ptr, low1->num_radix_blocks,
|
||||
radix_params.big_lwe_dimension, 576460752303423488ULL);
|
||||
print_body<Torus>("low2", (Torus *)low2->ptr, low2->num_radix_blocks,
|
||||
radix_params.big_lwe_dimension, 576460752303423488ULL);
|
||||
print_body<Torus>("low3", (Torus *)low3->ptr, low3->num_radix_blocks,
|
||||
radix_params.big_lwe_dimension, 576460752303423488ULL);
|
||||
print_body<Torus>("rem", (Torus *)rem->ptr, rem->num_radix_blocks,
|
||||
radix_params.big_lwe_dimension, 576460752303423488ULL);
|
||||
}
|
||||
// if (slice_len == 4) {
|
||||
// print_body<Torus>("low1", (Torus *)low1->ptr, low1->num_radix_blocks,
|
||||
// radix_params.big_lwe_dimension, 576460752303423488ULL);
|
||||
// print_body<Torus>("low2", (Torus *)low2->ptr, low2->num_radix_blocks,
|
||||
// radix_params.big_lwe_dimension, 576460752303423488ULL);
|
||||
// print_body<Torus>("low3", (Torus *)low3->ptr, low3->num_radix_blocks,
|
||||
// radix_params.big_lwe_dimension, 576460752303423488ULL);
|
||||
// print_body<Torus>("rem", (Torus *)rem->ptr, rem->num_radix_blocks,
|
||||
// radix_params.big_lwe_dimension, 576460752303423488ULL);
|
||||
// }
|
||||
uint32_t compute_borrow = 1;
|
||||
uint32_t uses_input_borrow = 0;
|
||||
auto sub_result_f = [&](cudaStream_t const *streams,
|
||||
@@ -223,12 +223,12 @@ __host__ void host_unsigned_integer_div_rem_kb_block_by_block_2_2(
|
||||
auto o2 = mem_ptr->sub_2_overflowed;
|
||||
auto o3 = mem_ptr->sub_1_overflowed;
|
||||
|
||||
print_body<Torus>("r1", (Torus *)r1->ptr, r1->num_radix_blocks,
|
||||
radix_params.big_lwe_dimension, 576460752303423488ULL);
|
||||
print_body<Torus>("r2", (Torus *)r2->ptr, r2->num_radix_blocks,
|
||||
radix_params.big_lwe_dimension, 576460752303423488ULL);
|
||||
print_body<Torus>("r3", (Torus *)r3->ptr, r3->num_radix_blocks,
|
||||
radix_params.big_lwe_dimension, 576460752303423488ULL);
|
||||
// print_body<Torus>("r1", (Torus *)r1->ptr, r1->num_radix_blocks,
|
||||
// radix_params.big_lwe_dimension, 576460752303423488ULL);
|
||||
// print_body<Torus>("r2", (Torus *)r2->ptr, r2->num_radix_blocks,
|
||||
// radix_params.big_lwe_dimension, 576460752303423488ULL);
|
||||
// print_body<Torus>("r3", (Torus *)r3->ptr, r3->num_radix_blocks,
|
||||
// radix_params.big_lwe_dimension, 576460752303423488ULL);
|
||||
|
||||
// used as a bitor
|
||||
host_integer_radix_bitop_kb(mem_ptr->sub_streams_1, gpu_indexes, gpu_count,
|
||||
@@ -248,19 +248,19 @@ __host__ void host_unsigned_integer_div_rem_kb_block_by_block_2_2(
|
||||
cuda_synchronize_stream(mem_ptr->sub_streams_3[j], gpu_indexes[j]);
|
||||
}
|
||||
|
||||
print_body<Torus>("o1", (Torus *)o1->ptr, 1, radix_params.big_lwe_dimension,
|
||||
576460752303423488ULL);
|
||||
print_body<Torus>("o2", (Torus *)o2->ptr, 1, radix_params.big_lwe_dimension,
|
||||
576460752303423488ULL);
|
||||
print_body<Torus>("o3", (Torus *)o3->ptr, 1, radix_params.big_lwe_dimension,
|
||||
576460752303423488ULL);
|
||||
|
||||
print_body<Torus>("cmp1", (Torus *)mem_ptr->cmp_1->ptr, 1,
|
||||
radix_params.big_lwe_dimension, 576460752303423488ULL);
|
||||
print_body<Torus>("cmp2", (Torus *)mem_ptr->cmp_2->ptr, 1,
|
||||
radix_params.big_lwe_dimension, 576460752303423488ULL);
|
||||
print_body<Torus>("cmp3", (Torus *)mem_ptr->cmp_3->ptr, 1,
|
||||
radix_params.big_lwe_dimension, 576460752303423488ULL);
|
||||
// print_body<Torus>("o1", (Torus *)o1->ptr, 1, radix_params.big_lwe_dimension,
|
||||
// 576460752303423488ULL);
|
||||
// print_body<Torus>("o2", (Torus *)o2->ptr, 1, radix_params.big_lwe_dimension,
|
||||
// 576460752303423488ULL);
|
||||
// print_body<Torus>("o3", (Torus *)o3->ptr, 1, radix_params.big_lwe_dimension,
|
||||
// 576460752303423488ULL);
|
||||
//
|
||||
// print_body<Torus>("cmp1", (Torus *)mem_ptr->cmp_1->ptr, 1,
|
||||
// radix_params.big_lwe_dimension, 576460752303423488ULL);
|
||||
// print_body<Torus>("cmp2", (Torus *)mem_ptr->cmp_2->ptr, 1,
|
||||
// radix_params.big_lwe_dimension, 576460752303423488ULL);
|
||||
// print_body<Torus>("cmp3", (Torus *)mem_ptr->cmp_3->ptr, 1,
|
||||
// radix_params.big_lwe_dimension, 576460752303423488ULL);
|
||||
|
||||
// The cx variables tell whether the corresponding result of the subtraction
|
||||
// should be kept, and what value the quotient block should have
|
||||
@@ -312,24 +312,24 @@ __host__ void host_unsigned_integer_div_rem_kb_block_by_block_2_2(
|
||||
copy_radix_ciphertext_slice_async<Torus>(streams[0], gpu_indexes[0],
|
||||
mem_ptr->c0, 0, 1, o1, 0, 1);
|
||||
|
||||
print_body<Torus>("c0", (Torus *)mem_ptr->c0->ptr, 1,
|
||||
radix_params.big_lwe_dimension, 576460752303423488ULL);
|
||||
print_body<Torus>("c1", (Torus *)mem_ptr->c1->ptr, 1,
|
||||
radix_params.big_lwe_dimension, 576460752303423488ULL);
|
||||
print_body<Torus>("c2", (Torus *)mem_ptr->c2->ptr, 1,
|
||||
radix_params.big_lwe_dimension, 576460752303423488ULL);
|
||||
print_body<Torus>("c3", (Torus *)mem_ptr->c3->ptr, 1,
|
||||
radix_params.big_lwe_dimension, 576460752303423488ULL);
|
||||
// print_body<Torus>("c0", (Torus *)mem_ptr->c0->ptr, 1,
|
||||
// radix_params.big_lwe_dimension, 576460752303423488ULL);
|
||||
// print_body<Torus>("c1", (Torus *)mem_ptr->c1->ptr, 1,
|
||||
// radix_params.big_lwe_dimension, 576460752303423488ULL);
|
||||
// print_body<Torus>("c2", (Torus *)mem_ptr->c2->ptr, 1,
|
||||
// radix_params.big_lwe_dimension, 576460752303423488ULL);
|
||||
// print_body<Torus>("c3", (Torus *)mem_ptr->c3->ptr, 1,
|
||||
// radix_params.big_lwe_dimension, 576460752303423488ULL);
|
||||
|
||||
auto conditional_update =
|
||||
[&](cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count, CudaRadixCiphertextFFI *cx,
|
||||
CudaRadixCiphertextFFI *rx, int_radix_lut<Torus> *lut,
|
||||
uint32_t factor) {
|
||||
Torus factor) {
|
||||
// printf("rx->num_radix_blocks: %d\n", rx->num_radix_blocks);
|
||||
auto rx_list = to_lwe_ciphertext_list(rx);
|
||||
host_cleartext_multiplication<Torus>(
|
||||
streams[0], gpu_indexes[0], (Torus *)rx->ptr, (Torus *)rx->ptr, factor,
|
||||
radix_params.big_lwe_dimension, rx->num_radix_blocks);
|
||||
streams[0], gpu_indexes[0], (Torus *)rx->ptr, &rx_list, factor);
|
||||
host_add_the_same_block_to_all_blocks<Torus>(streams[0], gpu_indexes[0], rx,
|
||||
rx, cx, 4, 4);
|
||||
|
||||
@@ -385,21 +385,21 @@ __host__ void host_unsigned_integer_div_rem_kb_block_by_block_2_2(
|
||||
cuda_synchronize_stream(mem_ptr->sub_streams_7[j], gpu_indexes[j]);
|
||||
}
|
||||
|
||||
print_body<Torus>("gpu_after_r1", (Torus *)r1->ptr, r1->num_radix_blocks,
|
||||
radix_params.big_lwe_dimension, 576460752303423488ULL);
|
||||
print_body<Torus>("gpu_after_r2", (Torus *)r2->ptr, r2->num_radix_blocks,
|
||||
radix_params.big_lwe_dimension, 576460752303423488ULL);
|
||||
print_body<Torus>("gpu_after_r3", (Torus *)r3->ptr, r3->num_radix_blocks,
|
||||
radix_params.big_lwe_dimension, 576460752303423488ULL);
|
||||
print_body<Torus>("gpu_after_rem", (Torus *)rem->ptr, rem->num_radix_blocks,
|
||||
radix_params.big_lwe_dimension, 576460752303423488ULL);
|
||||
|
||||
print_body<Torus>("gpu_after_q1", (Torus *)mem_ptr->q1->ptr, mem_ptr->q1->num_radix_blocks,
|
||||
radix_params.big_lwe_dimension, 576460752303423488ULL);
|
||||
print_body<Torus>("gpu_after_q2", (Torus *)mem_ptr->q2->ptr, mem_ptr->q2->num_radix_blocks,
|
||||
radix_params.big_lwe_dimension, 576460752303423488ULL);
|
||||
print_body<Torus>("gpu_after_q3", (Torus *)mem_ptr->q3->ptr, mem_ptr->q3->num_radix_blocks,
|
||||
radix_params.big_lwe_dimension, 576460752303423488ULL);
|
||||
// print_body<Torus>("gpu_after_r1", (Torus *)r1->ptr, r1->num_radix_blocks,
|
||||
// radix_params.big_lwe_dimension, 576460752303423488ULL);
|
||||
// print_body<Torus>("gpu_after_r2", (Torus *)r2->ptr, r2->num_radix_blocks,
|
||||
// radix_params.big_lwe_dimension, 576460752303423488ULL);
|
||||
// print_body<Torus>("gpu_after_r3", (Torus *)r3->ptr, r3->num_radix_blocks,
|
||||
// radix_params.big_lwe_dimension, 576460752303423488ULL);
|
||||
// print_body<Torus>("gpu_after_rem", (Torus *)rem->ptr, rem->num_radix_blocks,
|
||||
// radix_params.big_lwe_dimension, 576460752303423488ULL);
|
||||
//
|
||||
// print_body<Torus>("gpu_after_q1", (Torus *)mem_ptr->q1->ptr, mem_ptr->q1->num_radix_blocks,
|
||||
// radix_params.big_lwe_dimension, 576460752303423488ULL);
|
||||
// print_body<Torus>("gpu_after_q2", (Torus *)mem_ptr->q2->ptr, mem_ptr->q2->num_radix_blocks,
|
||||
// radix_params.big_lwe_dimension, 576460752303423488ULL);
|
||||
// print_body<Torus>("gpu_after_q3", (Torus *)mem_ptr->q3->ptr, mem_ptr->q3->num_radix_blocks,
|
||||
// radix_params.big_lwe_dimension, 576460752303423488ULL);
|
||||
|
||||
host_addition<Torus>(streams[0], gpu_indexes[0], rem, rem,
|
||||
r3, rem->num_radix_blocks, 4, 4);
|
||||
@@ -431,7 +431,7 @@ __host__ void host_unsigned_integer_div_rem_kb_block_by_block_2_2(
|
||||
rem->num_radix_blocks = remainder->num_radix_blocks;
|
||||
copy_radix_ciphertext_slice_async<Torus>(
|
||||
streams[0], gpu_indexes[0],
|
||||
remainder, block_index, rem->num_radix_blocks,
|
||||
remainder, block_index, remainder->num_radix_blocks,
|
||||
rem, 0, tmp_rem_size);
|
||||
rem->num_radix_blocks = tmp_rem_size;
|
||||
|
||||
|
||||
@@ -7,6 +7,13 @@
|
||||
#include "utils/helper_profile.cuh"
|
||||
#include "utils/kernel_dimensions.cuh"
|
||||
|
||||
inline CudaLweCiphertextListFFI to_lwe_ciphertext_list(CudaRadixCiphertextFFI* radix) {
|
||||
return {
|
||||
.ptr = radix->ptr,
|
||||
.num_radix_blocks = radix->num_radix_blocks,
|
||||
.lwe_dimension = radix->lwe_dimension
|
||||
};
|
||||
}
|
||||
template <typename Torus>
|
||||
void create_zero_radix_ciphertext_async(cudaStream_t const stream,
|
||||
uint32_t const gpu_index,
|
||||
|
||||
@@ -105,134 +105,6 @@ const _: () = {
|
||||
ms_input_variance
|
||||
) - 32usize];
|
||||
};
|
||||
#[repr(C)]
|
||||
#[derive(Debug, Copy, Clone)]
|
||||
pub struct CudaLweCiphertextListFFI {
|
||||
pub ptr: *mut ffi::c_void,
|
||||
pub num_radix_blocks: u32,
|
||||
pub lwe_dimension: u32,
|
||||
}
|
||||
#[allow(clippy::unnecessary_operation, clippy::identity_op)]
|
||||
const _: () = {
|
||||
["Size of CudaLweCiphertextListFFI"]
|
||||
[::std::mem::size_of::<CudaLweCiphertextListFFI>() - 16usize];
|
||||
["Alignment of CudaLweCiphertextListFFI"]
|
||||
[::std::mem::align_of::<CudaLweCiphertextListFFI>() - 8usize];
|
||||
["Offset of field: CudaLweCiphertextListFFI::ptr"]
|
||||
[::std::mem::offset_of!(CudaLweCiphertextListFFI, ptr) - 0usize];
|
||||
["Offset of field: CudaLweCiphertextListFFI::num_radix_blocks"]
|
||||
[::std::mem::offset_of!(CudaLweCiphertextListFFI, num_radix_blocks) - 8usize];
|
||||
["Offset of field: CudaLweCiphertextListFFI::lwe_dimension"]
|
||||
[::std::mem::offset_of!(CudaLweCiphertextListFFI, lwe_dimension) - 12usize];
|
||||
};
|
||||
#[repr(C)]
|
||||
#[derive(Debug, Copy, Clone)]
|
||||
pub struct CudaPackedGlweCiphertextListFFI {
|
||||
pub ptr: *mut ffi::c_void,
|
||||
pub storage_log_modulus: u32,
|
||||
pub lwe_per_glwe: u32,
|
||||
pub total_lwe_bodies_count: u32,
|
||||
pub glwe_dimension: u32,
|
||||
pub polynomial_size: u32,
|
||||
}
|
||||
#[allow(clippy::unnecessary_operation, clippy::identity_op)]
|
||||
const _: () = {
|
||||
["Size of CudaPackedGlweCiphertextListFFI"]
|
||||
[::std::mem::size_of::<CudaPackedGlweCiphertextListFFI>() - 32usize];
|
||||
["Alignment of CudaPackedGlweCiphertextListFFI"]
|
||||
[::std::mem::align_of::<CudaPackedGlweCiphertextListFFI>() - 8usize];
|
||||
["Offset of field: CudaPackedGlweCiphertextListFFI::ptr"]
|
||||
[::std::mem::offset_of!(CudaPackedGlweCiphertextListFFI, ptr) - 0usize];
|
||||
["Offset of field: CudaPackedGlweCiphertextListFFI::storage_log_modulus"]
|
||||
[::std::mem::offset_of!(CudaPackedGlweCiphertextListFFI, storage_log_modulus) - 8usize];
|
||||
["Offset of field: CudaPackedGlweCiphertextListFFI::lwe_per_glwe"]
|
||||
[::std::mem::offset_of!(CudaPackedGlweCiphertextListFFI, lwe_per_glwe) - 12usize];
|
||||
["Offset of field: CudaPackedGlweCiphertextListFFI::total_lwe_bodies_count"]
|
||||
[::std::mem::offset_of!(CudaPackedGlweCiphertextListFFI, total_lwe_bodies_count) - 16usize];
|
||||
["Offset of field: CudaPackedGlweCiphertextListFFI::glwe_dimension"]
|
||||
[::std::mem::offset_of!(CudaPackedGlweCiphertextListFFI, glwe_dimension) - 20usize];
|
||||
["Offset of field: CudaPackedGlweCiphertextListFFI::polynomial_size"]
|
||||
[::std::mem::offset_of!(CudaPackedGlweCiphertextListFFI, polynomial_size) - 24usize];
|
||||
};
|
||||
unsafe extern "C" {
|
||||
pub fn scratch_cuda_integer_compress_radix_ciphertext_64(
|
||||
streams: *const *mut ffi::c_void,
|
||||
gpu_indexes: *const u32,
|
||||
gpu_count: u32,
|
||||
mem_ptr: *mut *mut i8,
|
||||
compression_glwe_dimension: u32,
|
||||
compression_polynomial_size: u32,
|
||||
lwe_dimension: u32,
|
||||
ks_level: u32,
|
||||
ks_base_log: u32,
|
||||
num_radix_blocks: u32,
|
||||
message_modulus: u32,
|
||||
carry_modulus: u32,
|
||||
pbs_type: PBS_TYPE,
|
||||
lwe_per_glwe: u32,
|
||||
allocate_gpu_memory: bool,
|
||||
) -> u64;
|
||||
}
|
||||
unsafe extern "C" {
|
||||
pub fn scratch_cuda_integer_decompress_radix_ciphertext_64(
|
||||
streams: *const *mut ffi::c_void,
|
||||
gpu_indexes: *const u32,
|
||||
gpu_count: u32,
|
||||
mem_ptr: *mut *mut i8,
|
||||
encryption_glwe_dimension: u32,
|
||||
encryption_polynomial_size: u32,
|
||||
compression_glwe_dimension: u32,
|
||||
compression_polynomial_size: u32,
|
||||
lwe_dimension: u32,
|
||||
pbs_level: u32,
|
||||
pbs_base_log: u32,
|
||||
num_blocks_to_decompress: u32,
|
||||
message_modulus: u32,
|
||||
carry_modulus: u32,
|
||||
pbs_type: PBS_TYPE,
|
||||
allocate_gpu_memory: bool,
|
||||
allocate_ms_array: bool,
|
||||
) -> u64;
|
||||
}
|
||||
unsafe extern "C" {
|
||||
pub fn cuda_integer_compress_radix_ciphertext_64(
|
||||
streams: *const *mut ffi::c_void,
|
||||
gpu_indexes: *const u32,
|
||||
gpu_count: u32,
|
||||
glwe_array_out: *mut CudaPackedGlweCiphertextListFFI,
|
||||
lwe_array_in: *const CudaLweCiphertextListFFI,
|
||||
fp_ksk: *const *mut ffi::c_void,
|
||||
mem_ptr: *mut i8,
|
||||
);
|
||||
}
|
||||
unsafe extern "C" {
|
||||
pub fn cuda_integer_decompress_radix_ciphertext_64(
|
||||
streams: *const *mut ffi::c_void,
|
||||
gpu_indexes: *const u32,
|
||||
gpu_count: u32,
|
||||
lwe_array_out: *mut CudaLweCiphertextListFFI,
|
||||
glwe_in: *const CudaPackedGlweCiphertextListFFI,
|
||||
indexes_array: *const u32,
|
||||
bsks: *const *mut ffi::c_void,
|
||||
mem_ptr: *mut i8,
|
||||
);
|
||||
}
|
||||
unsafe extern "C" {
|
||||
pub fn cleanup_cuda_integer_compress_radix_ciphertext_64(
|
||||
streams: *const *mut ffi::c_void,
|
||||
gpu_indexes: *const u32,
|
||||
gpu_count: u32,
|
||||
mem_ptr_void: *mut *mut i8,
|
||||
);
|
||||
}
|
||||
unsafe extern "C" {
|
||||
pub fn cleanup_cuda_integer_decompress_radix_ciphertext_64(
|
||||
streams: *const *mut ffi::c_void,
|
||||
gpu_indexes: *const u32,
|
||||
gpu_count: u32,
|
||||
mem_ptr_void: *mut *mut i8,
|
||||
);
|
||||
}
|
||||
pub const SHIFT_OR_ROTATE_TYPE_LEFT_SHIFT: SHIFT_OR_ROTATE_TYPE = 0;
|
||||
pub const SHIFT_OR_ROTATE_TYPE_RIGHT_SHIFT: SHIFT_OR_ROTATE_TYPE = 1;
|
||||
pub const SHIFT_OR_ROTATE_TYPE_LEFT_ROTATE: SHIFT_OR_ROTATE_TYPE = 2;
|
||||
@@ -367,6 +239,55 @@ const _: () = {
|
||||
divisor_has_more_bits_than_numerator
|
||||
) - 60usize];
|
||||
};
|
||||
#[repr(C)]
|
||||
#[derive(Debug, Copy, Clone)]
|
||||
pub struct CudaLweCiphertextListFFI {
|
||||
pub ptr: *mut ffi::c_void,
|
||||
pub num_radix_blocks: u32,
|
||||
pub lwe_dimension: u32,
|
||||
}
|
||||
#[allow(clippy::unnecessary_operation, clippy::identity_op)]
|
||||
const _: () = {
|
||||
["Size of CudaLweCiphertextListFFI"]
|
||||
[::std::mem::size_of::<CudaLweCiphertextListFFI>() - 16usize];
|
||||
["Alignment of CudaLweCiphertextListFFI"]
|
||||
[::std::mem::align_of::<CudaLweCiphertextListFFI>() - 8usize];
|
||||
["Offset of field: CudaLweCiphertextListFFI::ptr"]
|
||||
[::std::mem::offset_of!(CudaLweCiphertextListFFI, ptr) - 0usize];
|
||||
["Offset of field: CudaLweCiphertextListFFI::num_radix_blocks"]
|
||||
[::std::mem::offset_of!(CudaLweCiphertextListFFI, num_radix_blocks) - 8usize];
|
||||
["Offset of field: CudaLweCiphertextListFFI::lwe_dimension"]
|
||||
[::std::mem::offset_of!(CudaLweCiphertextListFFI, lwe_dimension) - 12usize];
|
||||
};
|
||||
#[repr(C)]
|
||||
#[derive(Debug, Copy, Clone)]
|
||||
pub struct CudaPackedGlweCiphertextListFFI {
|
||||
pub ptr: *mut ffi::c_void,
|
||||
pub storage_log_modulus: u32,
|
||||
pub lwe_per_glwe: u32,
|
||||
pub total_lwe_bodies_count: u32,
|
||||
pub glwe_dimension: u32,
|
||||
pub polynomial_size: u32,
|
||||
}
|
||||
#[allow(clippy::unnecessary_operation, clippy::identity_op)]
|
||||
const _: () = {
|
||||
["Size of CudaPackedGlweCiphertextListFFI"]
|
||||
[::std::mem::size_of::<CudaPackedGlweCiphertextListFFI>() - 32usize];
|
||||
["Alignment of CudaPackedGlweCiphertextListFFI"]
|
||||
[::std::mem::align_of::<CudaPackedGlweCiphertextListFFI>() - 8usize];
|
||||
["Offset of field: CudaPackedGlweCiphertextListFFI::ptr"]
|
||||
[::std::mem::offset_of!(CudaPackedGlweCiphertextListFFI, ptr) - 0usize];
|
||||
["Offset of field: CudaPackedGlweCiphertextListFFI::storage_log_modulus"]
|
||||
[::std::mem::offset_of!(CudaPackedGlweCiphertextListFFI, storage_log_modulus) - 8usize];
|
||||
["Offset of field: CudaPackedGlweCiphertextListFFI::lwe_per_glwe"]
|
||||
[::std::mem::offset_of!(CudaPackedGlweCiphertextListFFI, lwe_per_glwe) - 12usize];
|
||||
["Offset of field: CudaPackedGlweCiphertextListFFI::total_lwe_bodies_count"]
|
||||
[::std::mem::offset_of!(CudaPackedGlweCiphertextListFFI, total_lwe_bodies_count) - 16usize];
|
||||
["Offset of field: CudaPackedGlweCiphertextListFFI::glwe_dimension"]
|
||||
[::std::mem::offset_of!(CudaPackedGlweCiphertextListFFI, glwe_dimension) - 20usize];
|
||||
["Offset of field: CudaPackedGlweCiphertextListFFI::polynomial_size"]
|
||||
[::std::mem::offset_of!(CudaPackedGlweCiphertextListFFI, polynomial_size) - 24usize];
|
||||
};
|
||||
unsafe extern "C" {
|
||||
pub fn scratch_cuda_apply_univariate_lut_kb_64(
|
||||
streams: *const *mut ffi::c_void,
|
||||
@@ -1934,6 +1855,85 @@ unsafe extern "C" {
|
||||
mem_ptr_void: *mut *mut i8,
|
||||
);
|
||||
}
|
||||
unsafe extern "C" {
|
||||
pub fn scratch_cuda_integer_compress_radix_ciphertext_64(
|
||||
streams: *const *mut ffi::c_void,
|
||||
gpu_indexes: *const u32,
|
||||
gpu_count: u32,
|
||||
mem_ptr: *mut *mut i8,
|
||||
compression_glwe_dimension: u32,
|
||||
compression_polynomial_size: u32,
|
||||
lwe_dimension: u32,
|
||||
ks_level: u32,
|
||||
ks_base_log: u32,
|
||||
num_radix_blocks: u32,
|
||||
message_modulus: u32,
|
||||
carry_modulus: u32,
|
||||
pbs_type: PBS_TYPE,
|
||||
lwe_per_glwe: u32,
|
||||
allocate_gpu_memory: bool,
|
||||
) -> u64;
|
||||
}
|
||||
unsafe extern "C" {
|
||||
pub fn scratch_cuda_integer_decompress_radix_ciphertext_64(
|
||||
streams: *const *mut ffi::c_void,
|
||||
gpu_indexes: *const u32,
|
||||
gpu_count: u32,
|
||||
mem_ptr: *mut *mut i8,
|
||||
encryption_glwe_dimension: u32,
|
||||
encryption_polynomial_size: u32,
|
||||
compression_glwe_dimension: u32,
|
||||
compression_polynomial_size: u32,
|
||||
lwe_dimension: u32,
|
||||
pbs_level: u32,
|
||||
pbs_base_log: u32,
|
||||
num_blocks_to_decompress: u32,
|
||||
message_modulus: u32,
|
||||
carry_modulus: u32,
|
||||
pbs_type: PBS_TYPE,
|
||||
allocate_gpu_memory: bool,
|
||||
allocate_ms_array: bool,
|
||||
) -> u64;
|
||||
}
|
||||
unsafe extern "C" {
|
||||
pub fn cuda_integer_compress_radix_ciphertext_64(
|
||||
streams: *const *mut ffi::c_void,
|
||||
gpu_indexes: *const u32,
|
||||
gpu_count: u32,
|
||||
glwe_array_out: *mut CudaPackedGlweCiphertextListFFI,
|
||||
lwe_array_in: *const CudaLweCiphertextListFFI,
|
||||
fp_ksk: *const *mut ffi::c_void,
|
||||
mem_ptr: *mut i8,
|
||||
);
|
||||
}
|
||||
unsafe extern "C" {
|
||||
pub fn cuda_integer_decompress_radix_ciphertext_64(
|
||||
streams: *const *mut ffi::c_void,
|
||||
gpu_indexes: *const u32,
|
||||
gpu_count: u32,
|
||||
lwe_array_out: *mut CudaLweCiphertextListFFI,
|
||||
glwe_in: *const CudaPackedGlweCiphertextListFFI,
|
||||
indexes_array: *const u32,
|
||||
bsks: *const *mut ffi::c_void,
|
||||
mem_ptr: *mut i8,
|
||||
);
|
||||
}
|
||||
unsafe extern "C" {
|
||||
pub fn cleanup_cuda_integer_compress_radix_ciphertext_64(
|
||||
streams: *const *mut ffi::c_void,
|
||||
gpu_indexes: *const u32,
|
||||
gpu_count: u32,
|
||||
mem_ptr_void: *mut *mut i8,
|
||||
);
|
||||
}
|
||||
unsafe extern "C" {
|
||||
pub fn cleanup_cuda_integer_decompress_radix_ciphertext_64(
|
||||
streams: *const *mut ffi::c_void,
|
||||
gpu_indexes: *const u32,
|
||||
gpu_count: u32,
|
||||
mem_ptr_void: *mut *mut i8,
|
||||
);
|
||||
}
|
||||
pub const KS_TYPE_BIG_TO_SMALL: KS_TYPE = 0;
|
||||
pub const KS_TYPE_SMALL_TO_BIG: KS_TYPE = 1;
|
||||
pub type KS_TYPE = ffi::c_uint;
|
||||
|
||||
Reference in New Issue
Block a user