feat: support GPU (bootstrapping)

This commit is contained in:
youben11
2022-07-21 14:45:28 +01:00
committed by Ayoub Benaissa
parent a487b03699
commit d169a27fc0
26 changed files with 715 additions and 47 deletions

View File

@@ -0,0 +1,18 @@
// Part of the Concrete Compiler Project, under the BSD3 License with Zama
// Exceptions. See
// https://github.com/zama-ai/concrete-compiler-internal/blob/main/LICENSE.txt
// for license information.
#ifndef ZAMALANG_CONVERSION_BCONCRETETOCAPI_PASS_H_
#define ZAMALANG_CONVERSION_BCONCRETETOCAPI_PASS_H_
#include "mlir/Pass/Pass.h"
namespace mlir {
namespace concretelang {
/// Create a pass to convert `BConcrete` dialect to CAPI calls.
std::unique_ptr<OperationPass<ModuleOp>> createConvertBConcreteToCAPIPass();
} // namespace concretelang
} // namespace mlir
#endif

View File

@@ -0,0 +1,18 @@
// Part of the Concrete Compiler Project, under the BSD3 License with Zama
// Exceptions. See
// https://github.com/zama-ai/concrete-compiler-internal/blob/main/LICENSE.txt
// for license information.
#ifndef ZAMALANG_CONVERSION_CONCRETETOGPU_PASS_H_
#define ZAMALANG_CONVERSION_CONCRETETOGPU_PASS_H_
#include "mlir/Pass/Pass.h"
namespace mlir {
namespace concretelang {
/// Create a pass to convert `Concrete` operations to GPU.
std::unique_ptr<OperationPass<ModuleOp>> createConvertConcreteToGPUPass();
} // namespace concretelang
} // namespace mlir
#endif

View File

@@ -13,7 +13,9 @@
#include "mlir/Dialect/MemRef/IR/MemRef.h"
#include "mlir/Dialect/SCF/IR/SCF.h"
#include "concretelang/Conversion/BConcreteToCAPI/Pass.h"
#include "concretelang/Conversion/ConcreteToBConcrete/Pass.h"
#include "concretelang/Conversion/ConcreteToGPU/Pass.h"
#include "concretelang/Conversion/FHETensorOpsToLinalg/Pass.h"
#include "concretelang/Conversion/FHEToTFHE/Pass.h"
#include "concretelang/Conversion/LinalgExtras/Passes.h"

View File

@@ -47,6 +47,20 @@ def ConcreteToBConcrete : Pass<"concrete-to-bconcrete", "mlir::ModuleOp"> {
let dependentDialects = ["mlir::linalg::LinalgDialect", "mlir::concretelang::Concrete::ConcreteDialect", "mlir::concretelang::BConcrete::BConcreteDialect"];
}
def BConcreteToCAPI : Pass<"bconcrete-to-capi", "mlir::ModuleOp"> {
let summary = "Lowers operations from the BConcrete dialect to CAPI calls";
let description = [{ Lowers operations from the BConcrete dialect to CAPI calls }];
let constructor = "mlir::concretelang::createConvertBConcreteToCAPIPass()";
let dependentDialects = ["mlir::concretelang::BConcrete::BConcreteDialect"];
}
def ConcreteToGPU : Pass<"concrete-to-gpu", "mlir::ModuleOp"> {
let summary = "Transforms operations in the Concrete dialect to GPU";
let description = [{ Transforms operations in the Concrete dialect to GPU }];
let constructor = "mlir::concretelang::createConvertConcreteToGPUPass()";
let dependentDialects = ["mlir::concretelang::Concrete::ConcreteDialect"];
}
def MLIRLowerableDialectsToLLVM : Pass<"mlir-lowerable-dialects-to-llvm", "mlir::ModuleOp"> {
let summary = "Lowers operations from MLIR lowerable dialects to LLVM";
let constructor = "mlir::concretelang::createConvertMLIRLowerableDialectsToLLVMPass()";

View File

@@ -9,3 +9,9 @@ mlir::LogicalResult insertForwardDeclaration(mlir::Operation *op,
mlir::OpBuilder &rewriter,
llvm::StringRef funcName,
mlir::FunctionType funcType);
/// \brief Returns the value of the context argument from the enclosing func
///
/// \param op initial operation to start the search from
/// \return mlir::Value the context value
mlir::Value getContextArgument(mlir::Operation *op);

View File

@@ -6,6 +6,7 @@
#ifndef ZAMALANG_DIALECT_BConcrete_BConcrete_OPS_H
#define ZAMALANG_DIALECT_BConcrete_BConcrete_OPS_H
#include <mlir/Dialect/LLVMIR/LLVMDialect.h>
#include <mlir/IR/Builders.h>
#include <mlir/IR/BuiltinOps.h>
#include <mlir/IR/BuiltinTypes.h>

View File

@@ -5,6 +5,7 @@ include "mlir/Interfaces/SideEffectInterfaces.td"
include "mlir/Interfaces/ControlFlowInterfaces.td"
include "mlir/IR/BuiltinTypes.td"
include "mlir/Dialect/MemRef/IR/MemRefBase.td"
include "mlir/Dialect/LLVMIR/LLVMOpBase.td"
include "concretelang/Dialect/BConcrete/IR/BConcreteDialect.td"
include "concretelang/Dialect/Concrete/IR/ConcreteTypes.td"
@@ -157,4 +158,27 @@ def BConcrete_AwaitFutureOp :
let results = (outs 1DTensorOf<[I64]>:$result);
}
def BConcrete_BootstrapLweGPUBufferOp : BConcrete_Op<"bootstrap_lwe_gpu_buffer"> {
let arguments = (ins
1DTensorOf<[I64]>:$input_ciphertext,
1DTensorOf<[I64]>:$table,
I32:$inputLweDim,
I32:$polySize,
I32:$level,
I32:$baseLog,
LLVM_PointerTo<I64>:$bsk
);
let results = (outs 1DTensorOf<[I64]>:$result);
}
def BConcrete_MoveBskToGPUOp : BConcrete_Op<"move_bsk_to_gpu"> {
let arguments = (ins);
let results = (outs LLVM_PointerTo<I64>:$bsk);
}
def BConcrete_FreeBskFromGPUOp : BConcrete_Op<"free_bsk_from_gpu"> {
let arguments = (ins LLVM_PointerTo<I64>:$bsk);
let results = (outs);
}
#endif

View File

@@ -52,7 +52,7 @@ def Concrete_NegateLweCiphertextOp : Concrete_Op<"negate_lwe_ciphertext"> {
let results = (outs Concrete_LweCiphertextType:$result);
}
def Concrete_GlweFromTable : Concrete_Op<"glwe_from_table"> {
def Concrete_GlweFromTable : Concrete_Op<"glwe_from_table", [NoSideEffect]> {
let summary = "Creates a GLWE ciphertext which is the trivial encrytion of a the input table interpreted as a polynomial (to use later in a bootstrap)";
let arguments = (ins 1DTensorOf<[I64]>:$table);
@@ -71,6 +71,35 @@ def Concrete_BootstrapLweOp : Concrete_Op<"bootstrap_lwe"> {
let results = (outs Concrete_LweCiphertextType:$result);
}
def Concrete_BootstrapLweGPUOp : Concrete_Op<"bootstrap_lwe_gpu"> {
let summary = "Bootstrap an LWE ciphertext in GPU using a lookup table";
let arguments = (ins
Concrete_LweCiphertextType:$input_ciphertext,
1DTensorOf<[I64]>:$table,
I32:$inputLweDim,
I32:$polySize,
I32:$level,
I32:$baseLog,
Concrete_GPUBsk:$bsk
);
let results = (outs Concrete_LweCiphertextType:$result);
}
def Concrete_MoveBskToGPUOp : Concrete_Op<"move_bsk_to_gpu"> {
let summary = "Move bsk to GPU";
let arguments = (ins);
let results = (outs Concrete_GPUBsk:$bsk);
}
def Concrete_FreeBskFromGPUOp : Concrete_Op<"free_bsk_from_gpu"> {
let summary = "Free bsk memory from GPU";
let arguments = (ins Concrete_GPUBsk:$bsk);
let results = (outs);
}
def Concrete_KeySwitchLweOp : Concrete_Op<"keyswitch_lwe"> {
let summary = "Keyswitches a LWE ciphertext";

View File

@@ -93,4 +93,14 @@ def Concrete_Context : Concrete_Type<"Context"> {
}];
}
def Concrete_GPUBsk : Concrete_Type<"GPUBsk"> {
let mnemonic = "gpu_bsk";
let summary = "A bsk in GPU";
let description = [{
A bootstrapping key in GPU memory
}];
}
#endif

View File

@@ -105,6 +105,87 @@ void memref_copy_one_rank(uint64_t *src_allocated, uint64_t *src_aligned,
uint64_t src_stride, uint64_t *dst_allocated,
uint64_t *dst_aligned, uint64_t dst_offset,
uint64_t dst_size, uint64_t dst_stride);
}
/// \brief Run bootstrapping on GPU.
///
/// It handles memory copy of the different arguments from CPU to GPU, and
/// freeing memory, except for the bootstrapping key, which should already be in
/// GPU.
///
/// \param out_allocated
/// \param out_aligned
/// \param out_offset
/// \param out_size
/// \param out_stride
/// \param ct0_allocated
/// \param ct0_aligned
/// \param ct0_offset
/// \param ct0_size
/// \param ct0_stride
/// \param tlu_allocated
/// \param tlu_aligned
/// \param tlu_offset
/// \param tlu_size
/// \param tlu_stride
/// \param input_lwe_dim LWE input dimension
/// \param poly_size polynomial size
/// \param level level
/// \param base_log base log
/// \param bsk pointer to bsk on GPU
void memref_bootstrap_lwe_cuda_u64(
uint64_t *out_allocated, uint64_t *out_aligned, uint64_t out_offset,
uint64_t out_size, uint64_t out_stride, uint64_t *ct0_allocated,
uint64_t *ct0_aligned, uint64_t ct0_offset, uint64_t ct0_size,
uint64_t ct0_stride, uint64_t *tlu_allocated, uint64_t *tlu_aligned,
uint64_t tlu_offset, uint64_t tlu_size, uint64_t tlu_stride,
uint32_t input_lwe_dim, uint32_t poly_size, uint32_t level,
uint32_t base_log, void *bsk);
/// \brief Copy ciphertext from CPU to GPU using a single stream.
///
/// It handles memory allocation on GPU.
///
/// \param ct_allocated
/// \param ct_aligned
/// \param ct_offset
/// \param ct_size
/// \param ct_stride
/// \param gpu_idx index of the GPU to use
/// \return void* pointer to the GPU ciphertext
void *move_ct_to_gpu(uint64_t *ct_allocated, uint64_t *ct_aligned,
uint64_t ct_offset, uint64_t ct_size, uint64_t ct_stride,
uint32_t gpu_idx);
/// \brief Copy ciphertext from GPU to CPU using a single stream.
///
/// Memory on GPU won't be freed after the copy.
///
/// \param out_allocated
/// \param out_aligned
/// \param out_offset
/// \param out_size
/// \param out_stride
/// \param ct_gpu
/// \param size
/// \param gpu_idx index of the GPU to use
void move_ct_to_cpu(uint64_t *out_allocated, uint64_t *out_aligned,
uint64_t out_offset, uint64_t out_size, uint64_t out_stride,
void *ct_gpu, size_t size, uint32_t gpu_idx);
/// \brief Copy bootstrapping key from CPU to GPU using a single stream.
///
/// It handles memory allocation on GPU.
///
/// \param context
/// \param gpu_idx index of the GPU to use
/// \return void* pointer to the GPU bsk
void *move_bsk_to_gpu(mlir::concretelang::RuntimeContext *context,
uint32_t gpu_idx);
/// \brief Free gpu memory.
///
/// \param gpu_ptr pointer to the GPU memory to free
/// \param gpu_idx index of the GPU to use
void free_from_gpu(void *gpu_ptr, uint32_t gpu_idx);
}
#endif

View File

@@ -54,6 +54,8 @@ struct CompilationOptions {
bool dataflowParallelize;
bool asyncOffload;
bool optimizeConcrete;
/// use GPU during execution by generating GPU operations if possible
bool useGPU;
llvm::Optional<std::vector<int64_t>> fhelinalgTileSizes;
llvm::Optional<std::string> clientParametersFuncName;
@@ -64,7 +66,7 @@ struct CompilationOptions {
: v0FHEConstraints(llvm::None), verifyDiagnostics(false),
autoParallelize(false), loopParallelize(false),
dataflowParallelize(false), asyncOffload(false), optimizeConcrete(true),
clientParametersFuncName(llvm::None),
useGPU(false), clientParametersFuncName(llvm::None),
optimizerConfig(optimizer::DEFAULT_CONFIG){};
CompilationOptions(std::string funcname) : CompilationOptions() {

View File

@@ -57,6 +57,10 @@ mlir::LogicalResult asyncOffload(mlir::MLIRContext &context,
mlir::ModuleOp &module,
std::function<bool(mlir::Pass *)> enablePass);
mlir::LogicalResult
transformsConcreteToGPU(mlir::MLIRContext &context, mlir::ModuleOp &module,
std::function<bool(mlir::Pass *)> enablePass);
mlir::LogicalResult
lowerBConcreteToStd(mlir::MLIRContext &context, mlir::ModuleOp &module,
std::function<bool(mlir::Pass *)> enablePass);