works again

This commit is contained in:
Beka Barbakadze
2025-08-28 14:58:51 +04:00
parent 1ef18c81f5
commit e87f35ba4f
5 changed files with 215 additions and 208 deletions

View File

@@ -2,27 +2,7 @@
#define CUDA_INTEGER_COMPRESSION_H
#include "../../pbs/pbs_enums.h"
typedef struct {
void *ptr;
uint32_t num_radix_blocks;
uint32_t lwe_dimension;
} CudaLweCiphertextListFFI;
typedef struct {
void *ptr;
uint32_t storage_log_modulus;
uint32_t lwe_per_glwe;
// Input LWEs are grouped by groups of `lwe_per_glwe`(the last group may be
// smaller)
// Each group is then packed into one GLWE with `lwe_per_glwe` bodies (one for
// each LWE of the group). In the end the total number of bodies is equal to
// the number of input LWE
uint32_t total_lwe_bodies_count;
uint32_t glwe_dimension;
uint32_t polynomial_size;
} CudaPackedGlweCiphertextListFFI;
#include "../integer.h"
extern "C" {
uint64_t scratch_cuda_integer_compress_radix_ciphertext_64(
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,

View File

@@ -80,6 +80,26 @@ typedef struct {
bool const divisor_has_more_bits_than_numerator;
} CudaScalarDivisorFFI;
typedef struct {
void *ptr;
uint32_t num_radix_blocks;
uint32_t lwe_dimension;
} CudaLweCiphertextListFFI;
typedef struct {
void *ptr;
uint32_t storage_log_modulus;
uint32_t lwe_per_glwe;
// Input LWEs are grouped by groups of `lwe_per_glwe`(the last group may be
// smaller)
// Each group is then packed into one GLWE with `lwe_per_glwe` bodies (one for
// each LWE of the group). In the end the total number of bodies is equal to
// the number of input LWE
uint32_t total_lwe_bodies_count;
uint32_t glwe_dimension;
uint32_t polynomial_size;
} CudaPackedGlweCiphertextListFFI;
uint64_t scratch_cuda_apply_univariate_lut_kb_64(
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
int8_t **mem_ptr, void const *input_lut, uint32_t lwe_dimension,

View File

@@ -107,9 +107,9 @@ __host__ void host_unsigned_integer_div_rem_kb_block_by_block_2_2(
cuda_synchronize_stream(mem_ptr->sub_streams_2[j], gpu_indexes[j]);
}
print_body<Torus>("remainder", (Torus *)remainder->ptr,
remainder->num_radix_blocks, radix_params.big_lwe_dimension,
576460752303423488ULL);
// print_body<Torus>("remainder", (Torus *)remainder->ptr,
// remainder->num_radix_blocks, radix_params.big_lwe_dimension,
// 576460752303423488ULL);
for (int block_index = num_blocks - 1; block_index >= 0; block_index--) {
uint32_t slice_len = num_blocks - block_index;
@@ -128,16 +128,16 @@ __host__ void host_unsigned_integer_div_rem_kb_block_by_block_2_2(
slice_len, remainder, block_index,
num_blocks);
if (slice_len == 4) {
print_body<Torus>("low1", (Torus *)low1->ptr, low1->num_radix_blocks,
radix_params.big_lwe_dimension, 576460752303423488ULL);
print_body<Torus>("low2", (Torus *)low2->ptr, low2->num_radix_blocks,
radix_params.big_lwe_dimension, 576460752303423488ULL);
print_body<Torus>("low3", (Torus *)low3->ptr, low3->num_radix_blocks,
radix_params.big_lwe_dimension, 576460752303423488ULL);
print_body<Torus>("rem", (Torus *)rem->ptr, rem->num_radix_blocks,
radix_params.big_lwe_dimension, 576460752303423488ULL);
}
// if (slice_len == 4) {
// print_body<Torus>("low1", (Torus *)low1->ptr, low1->num_radix_blocks,
// radix_params.big_lwe_dimension, 576460752303423488ULL);
// print_body<Torus>("low2", (Torus *)low2->ptr, low2->num_radix_blocks,
// radix_params.big_lwe_dimension, 576460752303423488ULL);
// print_body<Torus>("low3", (Torus *)low3->ptr, low3->num_radix_blocks,
// radix_params.big_lwe_dimension, 576460752303423488ULL);
// print_body<Torus>("rem", (Torus *)rem->ptr, rem->num_radix_blocks,
// radix_params.big_lwe_dimension, 576460752303423488ULL);
// }
uint32_t compute_borrow = 1;
uint32_t uses_input_borrow = 0;
auto sub_result_f = [&](cudaStream_t const *streams,
@@ -223,12 +223,12 @@ __host__ void host_unsigned_integer_div_rem_kb_block_by_block_2_2(
auto o2 = mem_ptr->sub_2_overflowed;
auto o3 = mem_ptr->sub_1_overflowed;
print_body<Torus>("r1", (Torus *)r1->ptr, r1->num_radix_blocks,
radix_params.big_lwe_dimension, 576460752303423488ULL);
print_body<Torus>("r2", (Torus *)r2->ptr, r2->num_radix_blocks,
radix_params.big_lwe_dimension, 576460752303423488ULL);
print_body<Torus>("r3", (Torus *)r3->ptr, r3->num_radix_blocks,
radix_params.big_lwe_dimension, 576460752303423488ULL);
// print_body<Torus>("r1", (Torus *)r1->ptr, r1->num_radix_blocks,
// radix_params.big_lwe_dimension, 576460752303423488ULL);
// print_body<Torus>("r2", (Torus *)r2->ptr, r2->num_radix_blocks,
// radix_params.big_lwe_dimension, 576460752303423488ULL);
// print_body<Torus>("r3", (Torus *)r3->ptr, r3->num_radix_blocks,
// radix_params.big_lwe_dimension, 576460752303423488ULL);
// used as a bitor
host_integer_radix_bitop_kb(mem_ptr->sub_streams_1, gpu_indexes, gpu_count,
@@ -248,19 +248,19 @@ __host__ void host_unsigned_integer_div_rem_kb_block_by_block_2_2(
cuda_synchronize_stream(mem_ptr->sub_streams_3[j], gpu_indexes[j]);
}
print_body<Torus>("o1", (Torus *)o1->ptr, 1, radix_params.big_lwe_dimension,
576460752303423488ULL);
print_body<Torus>("o2", (Torus *)o2->ptr, 1, radix_params.big_lwe_dimension,
576460752303423488ULL);
print_body<Torus>("o3", (Torus *)o3->ptr, 1, radix_params.big_lwe_dimension,
576460752303423488ULL);
print_body<Torus>("cmp1", (Torus *)mem_ptr->cmp_1->ptr, 1,
radix_params.big_lwe_dimension, 576460752303423488ULL);
print_body<Torus>("cmp2", (Torus *)mem_ptr->cmp_2->ptr, 1,
radix_params.big_lwe_dimension, 576460752303423488ULL);
print_body<Torus>("cmp3", (Torus *)mem_ptr->cmp_3->ptr, 1,
radix_params.big_lwe_dimension, 576460752303423488ULL);
// print_body<Torus>("o1", (Torus *)o1->ptr, 1, radix_params.big_lwe_dimension,
// 576460752303423488ULL);
// print_body<Torus>("o2", (Torus *)o2->ptr, 1, radix_params.big_lwe_dimension,
// 576460752303423488ULL);
// print_body<Torus>("o3", (Torus *)o3->ptr, 1, radix_params.big_lwe_dimension,
// 576460752303423488ULL);
//
// print_body<Torus>("cmp1", (Torus *)mem_ptr->cmp_1->ptr, 1,
// radix_params.big_lwe_dimension, 576460752303423488ULL);
// print_body<Torus>("cmp2", (Torus *)mem_ptr->cmp_2->ptr, 1,
// radix_params.big_lwe_dimension, 576460752303423488ULL);
// print_body<Torus>("cmp3", (Torus *)mem_ptr->cmp_3->ptr, 1,
// radix_params.big_lwe_dimension, 576460752303423488ULL);
// The cx variables tell whether the corresponding result of the subtraction
// should be kept, and what value the quotient block should have
@@ -312,24 +312,24 @@ __host__ void host_unsigned_integer_div_rem_kb_block_by_block_2_2(
copy_radix_ciphertext_slice_async<Torus>(streams[0], gpu_indexes[0],
mem_ptr->c0, 0, 1, o1, 0, 1);
print_body<Torus>("c0", (Torus *)mem_ptr->c0->ptr, 1,
radix_params.big_lwe_dimension, 576460752303423488ULL);
print_body<Torus>("c1", (Torus *)mem_ptr->c1->ptr, 1,
radix_params.big_lwe_dimension, 576460752303423488ULL);
print_body<Torus>("c2", (Torus *)mem_ptr->c2->ptr, 1,
radix_params.big_lwe_dimension, 576460752303423488ULL);
print_body<Torus>("c3", (Torus *)mem_ptr->c3->ptr, 1,
radix_params.big_lwe_dimension, 576460752303423488ULL);
// print_body<Torus>("c0", (Torus *)mem_ptr->c0->ptr, 1,
// radix_params.big_lwe_dimension, 576460752303423488ULL);
// print_body<Torus>("c1", (Torus *)mem_ptr->c1->ptr, 1,
// radix_params.big_lwe_dimension, 576460752303423488ULL);
// print_body<Torus>("c2", (Torus *)mem_ptr->c2->ptr, 1,
// radix_params.big_lwe_dimension, 576460752303423488ULL);
// print_body<Torus>("c3", (Torus *)mem_ptr->c3->ptr, 1,
// radix_params.big_lwe_dimension, 576460752303423488ULL);
auto conditional_update =
[&](cudaStream_t const *streams, uint32_t const *gpu_indexes,
uint32_t gpu_count, CudaRadixCiphertextFFI *cx,
CudaRadixCiphertextFFI *rx, int_radix_lut<Torus> *lut,
uint32_t factor) {
Torus factor) {
// printf("rx->num_radix_blocks: %d\n", rx->num_radix_blocks);
auto rx_list = to_lwe_ciphertext_list(rx);
host_cleartext_multiplication<Torus>(
streams[0], gpu_indexes[0], (Torus *)rx->ptr, (Torus *)rx->ptr, factor,
radix_params.big_lwe_dimension, rx->num_radix_blocks);
streams[0], gpu_indexes[0], (Torus *)rx->ptr, &rx_list, factor);
host_add_the_same_block_to_all_blocks<Torus>(streams[0], gpu_indexes[0], rx,
rx, cx, 4, 4);
@@ -385,21 +385,21 @@ __host__ void host_unsigned_integer_div_rem_kb_block_by_block_2_2(
cuda_synchronize_stream(mem_ptr->sub_streams_7[j], gpu_indexes[j]);
}
print_body<Torus>("gpu_after_r1", (Torus *)r1->ptr, r1->num_radix_blocks,
radix_params.big_lwe_dimension, 576460752303423488ULL);
print_body<Torus>("gpu_after_r2", (Torus *)r2->ptr, r2->num_radix_blocks,
radix_params.big_lwe_dimension, 576460752303423488ULL);
print_body<Torus>("gpu_after_r3", (Torus *)r3->ptr, r3->num_radix_blocks,
radix_params.big_lwe_dimension, 576460752303423488ULL);
print_body<Torus>("gpu_after_rem", (Torus *)rem->ptr, rem->num_radix_blocks,
radix_params.big_lwe_dimension, 576460752303423488ULL);
print_body<Torus>("gpu_after_q1", (Torus *)mem_ptr->q1->ptr, mem_ptr->q1->num_radix_blocks,
radix_params.big_lwe_dimension, 576460752303423488ULL);
print_body<Torus>("gpu_after_q2", (Torus *)mem_ptr->q2->ptr, mem_ptr->q2->num_radix_blocks,
radix_params.big_lwe_dimension, 576460752303423488ULL);
print_body<Torus>("gpu_after_q3", (Torus *)mem_ptr->q3->ptr, mem_ptr->q3->num_radix_blocks,
radix_params.big_lwe_dimension, 576460752303423488ULL);
// print_body<Torus>("gpu_after_r1", (Torus *)r1->ptr, r1->num_radix_blocks,
// radix_params.big_lwe_dimension, 576460752303423488ULL);
// print_body<Torus>("gpu_after_r2", (Torus *)r2->ptr, r2->num_radix_blocks,
// radix_params.big_lwe_dimension, 576460752303423488ULL);
// print_body<Torus>("gpu_after_r3", (Torus *)r3->ptr, r3->num_radix_blocks,
// radix_params.big_lwe_dimension, 576460752303423488ULL);
// print_body<Torus>("gpu_after_rem", (Torus *)rem->ptr, rem->num_radix_blocks,
// radix_params.big_lwe_dimension, 576460752303423488ULL);
//
// print_body<Torus>("gpu_after_q1", (Torus *)mem_ptr->q1->ptr, mem_ptr->q1->num_radix_blocks,
// radix_params.big_lwe_dimension, 576460752303423488ULL);
// print_body<Torus>("gpu_after_q2", (Torus *)mem_ptr->q2->ptr, mem_ptr->q2->num_radix_blocks,
// radix_params.big_lwe_dimension, 576460752303423488ULL);
// print_body<Torus>("gpu_after_q3", (Torus *)mem_ptr->q3->ptr, mem_ptr->q3->num_radix_blocks,
// radix_params.big_lwe_dimension, 576460752303423488ULL);
host_addition<Torus>(streams[0], gpu_indexes[0], rem, rem,
r3, rem->num_radix_blocks, 4, 4);
@@ -431,7 +431,7 @@ __host__ void host_unsigned_integer_div_rem_kb_block_by_block_2_2(
rem->num_radix_blocks = remainder->num_radix_blocks;
copy_radix_ciphertext_slice_async<Torus>(
streams[0], gpu_indexes[0],
remainder, block_index, rem->num_radix_blocks,
remainder, block_index, remainder->num_radix_blocks,
rem, 0, tmp_rem_size);
rem->num_radix_blocks = tmp_rem_size;

View File

@@ -7,6 +7,13 @@
#include "utils/helper_profile.cuh"
#include "utils/kernel_dimensions.cuh"
inline CudaLweCiphertextListFFI to_lwe_ciphertext_list(CudaRadixCiphertextFFI* radix) {
return {
.ptr = radix->ptr,
.num_radix_blocks = radix->num_radix_blocks,
.lwe_dimension = radix->lwe_dimension
};
}
template <typename Torus>
void create_zero_radix_ciphertext_async(cudaStream_t const stream,
uint32_t const gpu_index,

View File

@@ -105,134 +105,6 @@ const _: () = {
ms_input_variance
) - 32usize];
};
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct CudaLweCiphertextListFFI {
pub ptr: *mut ffi::c_void,
pub num_radix_blocks: u32,
pub lwe_dimension: u32,
}
#[allow(clippy::unnecessary_operation, clippy::identity_op)]
const _: () = {
["Size of CudaLweCiphertextListFFI"]
[::std::mem::size_of::<CudaLweCiphertextListFFI>() - 16usize];
["Alignment of CudaLweCiphertextListFFI"]
[::std::mem::align_of::<CudaLweCiphertextListFFI>() - 8usize];
["Offset of field: CudaLweCiphertextListFFI::ptr"]
[::std::mem::offset_of!(CudaLweCiphertextListFFI, ptr) - 0usize];
["Offset of field: CudaLweCiphertextListFFI::num_radix_blocks"]
[::std::mem::offset_of!(CudaLweCiphertextListFFI, num_radix_blocks) - 8usize];
["Offset of field: CudaLweCiphertextListFFI::lwe_dimension"]
[::std::mem::offset_of!(CudaLweCiphertextListFFI, lwe_dimension) - 12usize];
};
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct CudaPackedGlweCiphertextListFFI {
pub ptr: *mut ffi::c_void,
pub storage_log_modulus: u32,
pub lwe_per_glwe: u32,
pub total_lwe_bodies_count: u32,
pub glwe_dimension: u32,
pub polynomial_size: u32,
}
#[allow(clippy::unnecessary_operation, clippy::identity_op)]
const _: () = {
["Size of CudaPackedGlweCiphertextListFFI"]
[::std::mem::size_of::<CudaPackedGlweCiphertextListFFI>() - 32usize];
["Alignment of CudaPackedGlweCiphertextListFFI"]
[::std::mem::align_of::<CudaPackedGlweCiphertextListFFI>() - 8usize];
["Offset of field: CudaPackedGlweCiphertextListFFI::ptr"]
[::std::mem::offset_of!(CudaPackedGlweCiphertextListFFI, ptr) - 0usize];
["Offset of field: CudaPackedGlweCiphertextListFFI::storage_log_modulus"]
[::std::mem::offset_of!(CudaPackedGlweCiphertextListFFI, storage_log_modulus) - 8usize];
["Offset of field: CudaPackedGlweCiphertextListFFI::lwe_per_glwe"]
[::std::mem::offset_of!(CudaPackedGlweCiphertextListFFI, lwe_per_glwe) - 12usize];
["Offset of field: CudaPackedGlweCiphertextListFFI::total_lwe_bodies_count"]
[::std::mem::offset_of!(CudaPackedGlweCiphertextListFFI, total_lwe_bodies_count) - 16usize];
["Offset of field: CudaPackedGlweCiphertextListFFI::glwe_dimension"]
[::std::mem::offset_of!(CudaPackedGlweCiphertextListFFI, glwe_dimension) - 20usize];
["Offset of field: CudaPackedGlweCiphertextListFFI::polynomial_size"]
[::std::mem::offset_of!(CudaPackedGlweCiphertextListFFI, polynomial_size) - 24usize];
};
unsafe extern "C" {
pub fn scratch_cuda_integer_compress_radix_ciphertext_64(
streams: *const *mut ffi::c_void,
gpu_indexes: *const u32,
gpu_count: u32,
mem_ptr: *mut *mut i8,
compression_glwe_dimension: u32,
compression_polynomial_size: u32,
lwe_dimension: u32,
ks_level: u32,
ks_base_log: u32,
num_radix_blocks: u32,
message_modulus: u32,
carry_modulus: u32,
pbs_type: PBS_TYPE,
lwe_per_glwe: u32,
allocate_gpu_memory: bool,
) -> u64;
}
unsafe extern "C" {
pub fn scratch_cuda_integer_decompress_radix_ciphertext_64(
streams: *const *mut ffi::c_void,
gpu_indexes: *const u32,
gpu_count: u32,
mem_ptr: *mut *mut i8,
encryption_glwe_dimension: u32,
encryption_polynomial_size: u32,
compression_glwe_dimension: u32,
compression_polynomial_size: u32,
lwe_dimension: u32,
pbs_level: u32,
pbs_base_log: u32,
num_blocks_to_decompress: u32,
message_modulus: u32,
carry_modulus: u32,
pbs_type: PBS_TYPE,
allocate_gpu_memory: bool,
allocate_ms_array: bool,
) -> u64;
}
unsafe extern "C" {
pub fn cuda_integer_compress_radix_ciphertext_64(
streams: *const *mut ffi::c_void,
gpu_indexes: *const u32,
gpu_count: u32,
glwe_array_out: *mut CudaPackedGlweCiphertextListFFI,
lwe_array_in: *const CudaLweCiphertextListFFI,
fp_ksk: *const *mut ffi::c_void,
mem_ptr: *mut i8,
);
}
unsafe extern "C" {
pub fn cuda_integer_decompress_radix_ciphertext_64(
streams: *const *mut ffi::c_void,
gpu_indexes: *const u32,
gpu_count: u32,
lwe_array_out: *mut CudaLweCiphertextListFFI,
glwe_in: *const CudaPackedGlweCiphertextListFFI,
indexes_array: *const u32,
bsks: *const *mut ffi::c_void,
mem_ptr: *mut i8,
);
}
unsafe extern "C" {
pub fn cleanup_cuda_integer_compress_radix_ciphertext_64(
streams: *const *mut ffi::c_void,
gpu_indexes: *const u32,
gpu_count: u32,
mem_ptr_void: *mut *mut i8,
);
}
unsafe extern "C" {
pub fn cleanup_cuda_integer_decompress_radix_ciphertext_64(
streams: *const *mut ffi::c_void,
gpu_indexes: *const u32,
gpu_count: u32,
mem_ptr_void: *mut *mut i8,
);
}
pub const SHIFT_OR_ROTATE_TYPE_LEFT_SHIFT: SHIFT_OR_ROTATE_TYPE = 0;
pub const SHIFT_OR_ROTATE_TYPE_RIGHT_SHIFT: SHIFT_OR_ROTATE_TYPE = 1;
pub const SHIFT_OR_ROTATE_TYPE_LEFT_ROTATE: SHIFT_OR_ROTATE_TYPE = 2;
@@ -367,6 +239,55 @@ const _: () = {
divisor_has_more_bits_than_numerator
) - 60usize];
};
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct CudaLweCiphertextListFFI {
pub ptr: *mut ffi::c_void,
pub num_radix_blocks: u32,
pub lwe_dimension: u32,
}
#[allow(clippy::unnecessary_operation, clippy::identity_op)]
const _: () = {
["Size of CudaLweCiphertextListFFI"]
[::std::mem::size_of::<CudaLweCiphertextListFFI>() - 16usize];
["Alignment of CudaLweCiphertextListFFI"]
[::std::mem::align_of::<CudaLweCiphertextListFFI>() - 8usize];
["Offset of field: CudaLweCiphertextListFFI::ptr"]
[::std::mem::offset_of!(CudaLweCiphertextListFFI, ptr) - 0usize];
["Offset of field: CudaLweCiphertextListFFI::num_radix_blocks"]
[::std::mem::offset_of!(CudaLweCiphertextListFFI, num_radix_blocks) - 8usize];
["Offset of field: CudaLweCiphertextListFFI::lwe_dimension"]
[::std::mem::offset_of!(CudaLweCiphertextListFFI, lwe_dimension) - 12usize];
};
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct CudaPackedGlweCiphertextListFFI {
pub ptr: *mut ffi::c_void,
pub storage_log_modulus: u32,
pub lwe_per_glwe: u32,
pub total_lwe_bodies_count: u32,
pub glwe_dimension: u32,
pub polynomial_size: u32,
}
#[allow(clippy::unnecessary_operation, clippy::identity_op)]
const _: () = {
["Size of CudaPackedGlweCiphertextListFFI"]
[::std::mem::size_of::<CudaPackedGlweCiphertextListFFI>() - 32usize];
["Alignment of CudaPackedGlweCiphertextListFFI"]
[::std::mem::align_of::<CudaPackedGlweCiphertextListFFI>() - 8usize];
["Offset of field: CudaPackedGlweCiphertextListFFI::ptr"]
[::std::mem::offset_of!(CudaPackedGlweCiphertextListFFI, ptr) - 0usize];
["Offset of field: CudaPackedGlweCiphertextListFFI::storage_log_modulus"]
[::std::mem::offset_of!(CudaPackedGlweCiphertextListFFI, storage_log_modulus) - 8usize];
["Offset of field: CudaPackedGlweCiphertextListFFI::lwe_per_glwe"]
[::std::mem::offset_of!(CudaPackedGlweCiphertextListFFI, lwe_per_glwe) - 12usize];
["Offset of field: CudaPackedGlweCiphertextListFFI::total_lwe_bodies_count"]
[::std::mem::offset_of!(CudaPackedGlweCiphertextListFFI, total_lwe_bodies_count) - 16usize];
["Offset of field: CudaPackedGlweCiphertextListFFI::glwe_dimension"]
[::std::mem::offset_of!(CudaPackedGlweCiphertextListFFI, glwe_dimension) - 20usize];
["Offset of field: CudaPackedGlweCiphertextListFFI::polynomial_size"]
[::std::mem::offset_of!(CudaPackedGlweCiphertextListFFI, polynomial_size) - 24usize];
};
unsafe extern "C" {
pub fn scratch_cuda_apply_univariate_lut_kb_64(
streams: *const *mut ffi::c_void,
@@ -1934,6 +1855,85 @@ unsafe extern "C" {
mem_ptr_void: *mut *mut i8,
);
}
unsafe extern "C" {
pub fn scratch_cuda_integer_compress_radix_ciphertext_64(
streams: *const *mut ffi::c_void,
gpu_indexes: *const u32,
gpu_count: u32,
mem_ptr: *mut *mut i8,
compression_glwe_dimension: u32,
compression_polynomial_size: u32,
lwe_dimension: u32,
ks_level: u32,
ks_base_log: u32,
num_radix_blocks: u32,
message_modulus: u32,
carry_modulus: u32,
pbs_type: PBS_TYPE,
lwe_per_glwe: u32,
allocate_gpu_memory: bool,
) -> u64;
}
unsafe extern "C" {
pub fn scratch_cuda_integer_decompress_radix_ciphertext_64(
streams: *const *mut ffi::c_void,
gpu_indexes: *const u32,
gpu_count: u32,
mem_ptr: *mut *mut i8,
encryption_glwe_dimension: u32,
encryption_polynomial_size: u32,
compression_glwe_dimension: u32,
compression_polynomial_size: u32,
lwe_dimension: u32,
pbs_level: u32,
pbs_base_log: u32,
num_blocks_to_decompress: u32,
message_modulus: u32,
carry_modulus: u32,
pbs_type: PBS_TYPE,
allocate_gpu_memory: bool,
allocate_ms_array: bool,
) -> u64;
}
unsafe extern "C" {
pub fn cuda_integer_compress_radix_ciphertext_64(
streams: *const *mut ffi::c_void,
gpu_indexes: *const u32,
gpu_count: u32,
glwe_array_out: *mut CudaPackedGlweCiphertextListFFI,
lwe_array_in: *const CudaLweCiphertextListFFI,
fp_ksk: *const *mut ffi::c_void,
mem_ptr: *mut i8,
);
}
unsafe extern "C" {
pub fn cuda_integer_decompress_radix_ciphertext_64(
streams: *const *mut ffi::c_void,
gpu_indexes: *const u32,
gpu_count: u32,
lwe_array_out: *mut CudaLweCiphertextListFFI,
glwe_in: *const CudaPackedGlweCiphertextListFFI,
indexes_array: *const u32,
bsks: *const *mut ffi::c_void,
mem_ptr: *mut i8,
);
}
unsafe extern "C" {
pub fn cleanup_cuda_integer_compress_radix_ciphertext_64(
streams: *const *mut ffi::c_void,
gpu_indexes: *const u32,
gpu_count: u32,
mem_ptr_void: *mut *mut i8,
);
}
unsafe extern "C" {
pub fn cleanup_cuda_integer_decompress_radix_ciphertext_64(
streams: *const *mut ffi::c_void,
gpu_indexes: *const u32,
gpu_count: u32,
mem_ptr_void: *mut *mut i8,
);
}
pub const KS_TYPE_BIG_TO_SMALL: KS_TYPE = 0;
pub const KS_TYPE_SMALL_TO_BIG: KS_TYPE = 1;
pub type KS_TYPE = ffi::c_uint;