mirror of
https://github.com/zama-ai/concrete.git
synced 2026-02-09 12:15:09 -05:00
refactor(cuda): remove SharedMemory
This commit is contained in:
@@ -40,11 +40,6 @@ public:
|
||||
__device__ VectorPolynomial(T *data, uint32_t num_polynomials)
|
||||
: m_data(data), m_num_polynomials(num_polynomials) {}
|
||||
|
||||
__device__ VectorPolynomial(SharedMemory &shmem, uint32_t num_polynomials)
|
||||
: m_num_polynomials(num_polynomials) {
|
||||
shmem.get_allocation(&m_data, m_num_polynomials * params::degree);
|
||||
}
|
||||
|
||||
__device__ VectorPolynomial<T, params> get_chunk(int chunk_num,
|
||||
int chunk_size) {
|
||||
int pos = chunk_num * chunk_size;
|
||||
@@ -109,10 +104,6 @@ public:
|
||||
__device__ Polynomial(char *memory, uint32_t degree)
|
||||
: coefficients((T *)memory), degree(degree) {}
|
||||
|
||||
__device__ Polynomial(SharedMemory &shmem, uint32_t degree) : degree(degree) {
|
||||
shmem.get_allocation(&this->coefficients, degree);
|
||||
}
|
||||
|
||||
__host__ Polynomial(DeviceMemory &dmem, uint32_t degree, int device)
|
||||
: degree(degree) {
|
||||
dmem.get_allocation(&this->coefficients, params::degree, device);
|
||||
@@ -396,30 +387,8 @@ public:
|
||||
__device__ Vector(T *elements, uint32_t size)
|
||||
: m_data(elements), m_size(size) {}
|
||||
|
||||
template <typename V>
|
||||
__device__ Vector(SharedMemory &shmem, V src, int size) : m_size(size) {
|
||||
shmem.get_allocation(&m_data, m_size);
|
||||
int tid = threadIdx.x;
|
||||
#pragma unroll
|
||||
for (int i = 0; i < params::opt && tid < m_size; i++) {
|
||||
if (tid > m_size)
|
||||
continue;
|
||||
m_data[tid] = src[tid];
|
||||
tid += params::degree / params::opt;
|
||||
}
|
||||
}
|
||||
|
||||
__device__ Vector(SharedMemory &shmem, uint32_t size) : m_size(size) {
|
||||
shmem.get_allocation(&m_data, m_size);
|
||||
}
|
||||
|
||||
__host__ Vector() {}
|
||||
|
||||
__host__ Vector(DeviceMemory &dmem, uint32_t size, int device)
|
||||
: m_size(size) {
|
||||
dmem.get_allocation(&m_data, m_size, device);
|
||||
}
|
||||
|
||||
__device__ T &operator[](int i) { return m_data[i]; }
|
||||
|
||||
__device__ Vector<T, params> get_chunk(int chunk_num, int chunk_size) {
|
||||
|
||||
@@ -9,19 +9,6 @@
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
|
||||
class SharedMemory {
|
||||
public:
|
||||
char *m_memory_block;
|
||||
int m_last_byte;
|
||||
|
||||
__device__ SharedMemory(char *ptr) : m_memory_block(ptr), m_last_byte(0) {}
|
||||
|
||||
template <typename T> __device__ void get_allocation(T **ptr, int elements) {
|
||||
*ptr = (T *)(&this->m_memory_block[m_last_byte]);
|
||||
this->m_last_byte += elements * sizeof(T);
|
||||
}
|
||||
};
|
||||
|
||||
class DeviceMemory {
|
||||
public:
|
||||
std::vector<std::tuple<void *, int>> m_allocated;
|
||||
|
||||
Reference in New Issue
Block a user