mirror of
https://github.com/zama-ai/concrete.git
synced 2026-02-09 12:15:09 -05:00
feat: support GPU (bootstrapping)
This commit is contained in:
@@ -55,7 +55,19 @@ include_directories(${CONCRETE_FFI_RELEASE})
|
||||
add_library(Concrete STATIC IMPORTED)
|
||||
set_target_properties(Concrete PROPERTIES IMPORTED_LOCATION ${CONCRETE_FFI_RELEASE}/libconcrete_core_ffi.a)
|
||||
|
||||
# -------------------------------------------------------------------------------
|
||||
#--------------------------------------------------------------------------------
|
||||
# Concrete Cuda Configuration
|
||||
#--------------------------------------------------------------------------------
|
||||
option(CONCRETELANG_CUDA_SUPPORT "Support Concrete CUDA Execution." OFF)
|
||||
if(CONCRETELANG_CUDA_SUPPORT)
|
||||
message(STATUS "Building with Concrete CUDA execution support")
|
||||
include_directories(${CONCRETE_CORE_PATH}/concrete-cuda/cuda/include)
|
||||
add_library(ConcreteCUDA STATIC IMPORTED)
|
||||
set_target_properties(ConcreteCUDA PROPERTIES IMPORTED_LOCATION ${CONCRETE_CORE_PATH}/concrete-cuda/cuda/build/src/libconcrete_cuda.a )
|
||||
add_compile_options(-DCONCRETELANG_CUDA_SUPPORT)
|
||||
endif()
|
||||
|
||||
#--------------------------------------------------------------------------------
|
||||
# Python Configuration
|
||||
# -------------------------------------------------------------------------------
|
||||
option(CONCRETELANG_BINDINGS_PYTHON_ENABLED "Enables ConcreteLang Python bindings." ON)
|
||||
|
||||
@@ -0,0 +1,18 @@
|
||||
// Part of the Concrete Compiler Project, under the BSD3 License with Zama
|
||||
// Exceptions. See
|
||||
// https://github.com/zama-ai/concrete-compiler-internal/blob/main/LICENSE.txt
|
||||
// for license information.
|
||||
|
||||
#ifndef ZAMALANG_CONVERSION_BCONCRETETOCAPI_PASS_H_
|
||||
#define ZAMALANG_CONVERSION_BCONCRETETOCAPI_PASS_H_
|
||||
|
||||
#include "mlir/Pass/Pass.h"
|
||||
|
||||
namespace mlir {
|
||||
namespace concretelang {
|
||||
/// Create a pass to convert `BConcrete` dialect to CAPI calls.
|
||||
std::unique_ptr<OperationPass<ModuleOp>> createConvertBConcreteToCAPIPass();
|
||||
} // namespace concretelang
|
||||
} // namespace mlir
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,18 @@
|
||||
// Part of the Concrete Compiler Project, under the BSD3 License with Zama
|
||||
// Exceptions. See
|
||||
// https://github.com/zama-ai/concrete-compiler-internal/blob/main/LICENSE.txt
|
||||
// for license information.
|
||||
|
||||
#ifndef ZAMALANG_CONVERSION_CONCRETETOGPU_PASS_H_
|
||||
#define ZAMALANG_CONVERSION_CONCRETETOGPU_PASS_H_
|
||||
|
||||
#include "mlir/Pass/Pass.h"
|
||||
|
||||
namespace mlir {
|
||||
namespace concretelang {
|
||||
/// Create a pass to convert `Concrete` operations to GPU.
|
||||
std::unique_ptr<OperationPass<ModuleOp>> createConvertConcreteToGPUPass();
|
||||
} // namespace concretelang
|
||||
} // namespace mlir
|
||||
|
||||
#endif
|
||||
@@ -13,7 +13,9 @@
|
||||
#include "mlir/Dialect/MemRef/IR/MemRef.h"
|
||||
#include "mlir/Dialect/SCF/IR/SCF.h"
|
||||
|
||||
#include "concretelang/Conversion/BConcreteToCAPI/Pass.h"
|
||||
#include "concretelang/Conversion/ConcreteToBConcrete/Pass.h"
|
||||
#include "concretelang/Conversion/ConcreteToGPU/Pass.h"
|
||||
#include "concretelang/Conversion/FHETensorOpsToLinalg/Pass.h"
|
||||
#include "concretelang/Conversion/FHEToTFHE/Pass.h"
|
||||
#include "concretelang/Conversion/LinalgExtras/Passes.h"
|
||||
|
||||
@@ -47,6 +47,20 @@ def ConcreteToBConcrete : Pass<"concrete-to-bconcrete", "mlir::ModuleOp"> {
|
||||
let dependentDialects = ["mlir::linalg::LinalgDialect", "mlir::concretelang::Concrete::ConcreteDialect", "mlir::concretelang::BConcrete::BConcreteDialect"];
|
||||
}
|
||||
|
||||
def BConcreteToCAPI : Pass<"bconcrete-to-capi", "mlir::ModuleOp"> {
|
||||
let summary = "Lowers operations from the BConcrete dialect to CAPI calls";
|
||||
let description = [{ Lowers operations from the BConcrete dialect to CAPI calls }];
|
||||
let constructor = "mlir::concretelang::createConvertBConcreteToCAPIPass()";
|
||||
let dependentDialects = ["mlir::concretelang::BConcrete::BConcreteDialect"];
|
||||
}
|
||||
|
||||
def ConcreteToGPU : Pass<"concrete-to-gpu", "mlir::ModuleOp"> {
|
||||
let summary = "Transforms operations in the Concrete dialect to GPU";
|
||||
let description = [{ Transforms operations in the Concrete dialect to GPU }];
|
||||
let constructor = "mlir::concretelang::createConvertConcreteToGPUPass()";
|
||||
let dependentDialects = ["mlir::concretelang::Concrete::ConcreteDialect"];
|
||||
}
|
||||
|
||||
def MLIRLowerableDialectsToLLVM : Pass<"mlir-lowerable-dialects-to-llvm", "mlir::ModuleOp"> {
|
||||
let summary = "Lowers operations from MLIR lowerable dialects to LLVM";
|
||||
let constructor = "mlir::concretelang::createConvertMLIRLowerableDialectsToLLVMPass()";
|
||||
|
||||
@@ -9,3 +9,9 @@ mlir::LogicalResult insertForwardDeclaration(mlir::Operation *op,
|
||||
mlir::OpBuilder &rewriter,
|
||||
llvm::StringRef funcName,
|
||||
mlir::FunctionType funcType);
|
||||
|
||||
/// \brief Returns the value of the context argument from the enclosing func
|
||||
///
|
||||
/// \param op initial operation to start the search from
|
||||
/// \return mlir::Value the context value
|
||||
mlir::Value getContextArgument(mlir::Operation *op);
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
#ifndef ZAMALANG_DIALECT_BConcrete_BConcrete_OPS_H
|
||||
#define ZAMALANG_DIALECT_BConcrete_BConcrete_OPS_H
|
||||
|
||||
#include <mlir/Dialect/LLVMIR/LLVMDialect.h>
|
||||
#include <mlir/IR/Builders.h>
|
||||
#include <mlir/IR/BuiltinOps.h>
|
||||
#include <mlir/IR/BuiltinTypes.h>
|
||||
|
||||
@@ -5,6 +5,7 @@ include "mlir/Interfaces/SideEffectInterfaces.td"
|
||||
include "mlir/Interfaces/ControlFlowInterfaces.td"
|
||||
include "mlir/IR/BuiltinTypes.td"
|
||||
include "mlir/Dialect/MemRef/IR/MemRefBase.td"
|
||||
include "mlir/Dialect/LLVMIR/LLVMOpBase.td"
|
||||
|
||||
include "concretelang/Dialect/BConcrete/IR/BConcreteDialect.td"
|
||||
include "concretelang/Dialect/Concrete/IR/ConcreteTypes.td"
|
||||
@@ -157,4 +158,27 @@ def BConcrete_AwaitFutureOp :
|
||||
let results = (outs 1DTensorOf<[I64]>:$result);
|
||||
}
|
||||
|
||||
def BConcrete_BootstrapLweGPUBufferOp : BConcrete_Op<"bootstrap_lwe_gpu_buffer"> {
|
||||
let arguments = (ins
|
||||
1DTensorOf<[I64]>:$input_ciphertext,
|
||||
1DTensorOf<[I64]>:$table,
|
||||
I32:$inputLweDim,
|
||||
I32:$polySize,
|
||||
I32:$level,
|
||||
I32:$baseLog,
|
||||
LLVM_PointerTo<I64>:$bsk
|
||||
);
|
||||
let results = (outs 1DTensorOf<[I64]>:$result);
|
||||
}
|
||||
|
||||
def BConcrete_MoveBskToGPUOp : BConcrete_Op<"move_bsk_to_gpu"> {
|
||||
let arguments = (ins);
|
||||
let results = (outs LLVM_PointerTo<I64>:$bsk);
|
||||
}
|
||||
|
||||
def BConcrete_FreeBskFromGPUOp : BConcrete_Op<"free_bsk_from_gpu"> {
|
||||
let arguments = (ins LLVM_PointerTo<I64>:$bsk);
|
||||
let results = (outs);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@@ -52,7 +52,7 @@ def Concrete_NegateLweCiphertextOp : Concrete_Op<"negate_lwe_ciphertext"> {
|
||||
let results = (outs Concrete_LweCiphertextType:$result);
|
||||
}
|
||||
|
||||
def Concrete_GlweFromTable : Concrete_Op<"glwe_from_table"> {
|
||||
def Concrete_GlweFromTable : Concrete_Op<"glwe_from_table", [NoSideEffect]> {
|
||||
let summary = "Creates a GLWE ciphertext which is the trivial encrytion of a the input table interpreted as a polynomial (to use later in a bootstrap)";
|
||||
|
||||
let arguments = (ins 1DTensorOf<[I64]>:$table);
|
||||
@@ -71,6 +71,35 @@ def Concrete_BootstrapLweOp : Concrete_Op<"bootstrap_lwe"> {
|
||||
let results = (outs Concrete_LweCiphertextType:$result);
|
||||
}
|
||||
|
||||
def Concrete_BootstrapLweGPUOp : Concrete_Op<"bootstrap_lwe_gpu"> {
|
||||
let summary = "Bootstrap an LWE ciphertext in GPU using a lookup table";
|
||||
|
||||
let arguments = (ins
|
||||
Concrete_LweCiphertextType:$input_ciphertext,
|
||||
1DTensorOf<[I64]>:$table,
|
||||
I32:$inputLweDim,
|
||||
I32:$polySize,
|
||||
I32:$level,
|
||||
I32:$baseLog,
|
||||
Concrete_GPUBsk:$bsk
|
||||
);
|
||||
let results = (outs Concrete_LweCiphertextType:$result);
|
||||
}
|
||||
|
||||
def Concrete_MoveBskToGPUOp : Concrete_Op<"move_bsk_to_gpu"> {
|
||||
let summary = "Move bsk to GPU";
|
||||
|
||||
let arguments = (ins);
|
||||
let results = (outs Concrete_GPUBsk:$bsk);
|
||||
}
|
||||
|
||||
def Concrete_FreeBskFromGPUOp : Concrete_Op<"free_bsk_from_gpu"> {
|
||||
let summary = "Free bsk memory from GPU";
|
||||
|
||||
let arguments = (ins Concrete_GPUBsk:$bsk);
|
||||
let results = (outs);
|
||||
}
|
||||
|
||||
def Concrete_KeySwitchLweOp : Concrete_Op<"keyswitch_lwe"> {
|
||||
let summary = "Keyswitches a LWE ciphertext";
|
||||
|
||||
|
||||
@@ -93,4 +93,14 @@ def Concrete_Context : Concrete_Type<"Context"> {
|
||||
}];
|
||||
}
|
||||
|
||||
def Concrete_GPUBsk : Concrete_Type<"GPUBsk"> {
|
||||
let mnemonic = "gpu_bsk";
|
||||
|
||||
let summary = "A bsk in GPU";
|
||||
|
||||
let description = [{
|
||||
A bootstrapping key in GPU memory
|
||||
}];
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@@ -105,6 +105,87 @@ void memref_copy_one_rank(uint64_t *src_allocated, uint64_t *src_aligned,
|
||||
uint64_t src_stride, uint64_t *dst_allocated,
|
||||
uint64_t *dst_aligned, uint64_t dst_offset,
|
||||
uint64_t dst_size, uint64_t dst_stride);
|
||||
}
|
||||
|
||||
/// \brief Run bootstrapping on GPU.
|
||||
///
|
||||
/// It handles memory copy of the different arguments from CPU to GPU, and
|
||||
/// freeing memory, except for the bootstrapping key, which should already be in
|
||||
/// GPU.
|
||||
///
|
||||
/// \param out_allocated
|
||||
/// \param out_aligned
|
||||
/// \param out_offset
|
||||
/// \param out_size
|
||||
/// \param out_stride
|
||||
/// \param ct0_allocated
|
||||
/// \param ct0_aligned
|
||||
/// \param ct0_offset
|
||||
/// \param ct0_size
|
||||
/// \param ct0_stride
|
||||
/// \param tlu_allocated
|
||||
/// \param tlu_aligned
|
||||
/// \param tlu_offset
|
||||
/// \param tlu_size
|
||||
/// \param tlu_stride
|
||||
/// \param input_lwe_dim LWE input dimension
|
||||
/// \param poly_size polynomial size
|
||||
/// \param level level
|
||||
/// \param base_log base log
|
||||
/// \param bsk pointer to bsk on GPU
|
||||
void memref_bootstrap_lwe_cuda_u64(
|
||||
uint64_t *out_allocated, uint64_t *out_aligned, uint64_t out_offset,
|
||||
uint64_t out_size, uint64_t out_stride, uint64_t *ct0_allocated,
|
||||
uint64_t *ct0_aligned, uint64_t ct0_offset, uint64_t ct0_size,
|
||||
uint64_t ct0_stride, uint64_t *tlu_allocated, uint64_t *tlu_aligned,
|
||||
uint64_t tlu_offset, uint64_t tlu_size, uint64_t tlu_stride,
|
||||
uint32_t input_lwe_dim, uint32_t poly_size, uint32_t level,
|
||||
uint32_t base_log, void *bsk);
|
||||
|
||||
/// \brief Copy ciphertext from CPU to GPU using a single stream.
|
||||
///
|
||||
/// It handles memory allocation on GPU.
|
||||
///
|
||||
/// \param ct_allocated
|
||||
/// \param ct_aligned
|
||||
/// \param ct_offset
|
||||
/// \param ct_size
|
||||
/// \param ct_stride
|
||||
/// \param gpu_idx index of the GPU to use
|
||||
/// \return void* pointer to the GPU ciphertext
|
||||
void *move_ct_to_gpu(uint64_t *ct_allocated, uint64_t *ct_aligned,
|
||||
uint64_t ct_offset, uint64_t ct_size, uint64_t ct_stride,
|
||||
uint32_t gpu_idx);
|
||||
|
||||
/// \brief Copy ciphertext from GPU to CPU using a single stream.
|
||||
///
|
||||
/// Memory on GPU won't be freed after the copy.
|
||||
///
|
||||
/// \param out_allocated
|
||||
/// \param out_aligned
|
||||
/// \param out_offset
|
||||
/// \param out_size
|
||||
/// \param out_stride
|
||||
/// \param ct_gpu
|
||||
/// \param size
|
||||
/// \param gpu_idx index of the GPU to use
|
||||
void move_ct_to_cpu(uint64_t *out_allocated, uint64_t *out_aligned,
|
||||
uint64_t out_offset, uint64_t out_size, uint64_t out_stride,
|
||||
void *ct_gpu, size_t size, uint32_t gpu_idx);
|
||||
|
||||
/// \brief Copy bootstrapping key from CPU to GPU using a single stream.
|
||||
///
|
||||
/// It handles memory allocation on GPU.
|
||||
///
|
||||
/// \param context
|
||||
/// \param gpu_idx index of the GPU to use
|
||||
/// \return void* pointer to the GPU bsk
|
||||
void *move_bsk_to_gpu(mlir::concretelang::RuntimeContext *context,
|
||||
uint32_t gpu_idx);
|
||||
|
||||
/// \brief Free gpu memory.
|
||||
///
|
||||
/// \param gpu_ptr pointer to the GPU memory to free
|
||||
/// \param gpu_idx index of the GPU to use
|
||||
void free_from_gpu(void *gpu_ptr, uint32_t gpu_idx);
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -54,6 +54,8 @@ struct CompilationOptions {
|
||||
bool dataflowParallelize;
|
||||
bool asyncOffload;
|
||||
bool optimizeConcrete;
|
||||
/// use GPU during execution by generating GPU operations if possible
|
||||
bool useGPU;
|
||||
llvm::Optional<std::vector<int64_t>> fhelinalgTileSizes;
|
||||
|
||||
llvm::Optional<std::string> clientParametersFuncName;
|
||||
@@ -64,7 +66,7 @@ struct CompilationOptions {
|
||||
: v0FHEConstraints(llvm::None), verifyDiagnostics(false),
|
||||
autoParallelize(false), loopParallelize(false),
|
||||
dataflowParallelize(false), asyncOffload(false), optimizeConcrete(true),
|
||||
clientParametersFuncName(llvm::None),
|
||||
useGPU(false), clientParametersFuncName(llvm::None),
|
||||
optimizerConfig(optimizer::DEFAULT_CONFIG){};
|
||||
|
||||
CompilationOptions(std::string funcname) : CompilationOptions() {
|
||||
|
||||
@@ -57,6 +57,10 @@ mlir::LogicalResult asyncOffload(mlir::MLIRContext &context,
|
||||
mlir::ModuleOp &module,
|
||||
std::function<bool(mlir::Pass *)> enablePass);
|
||||
|
||||
mlir::LogicalResult
|
||||
transformsConcreteToGPU(mlir::MLIRContext &context, mlir::ModuleOp &module,
|
||||
std::function<bool(mlir::Pass *)> enablePass);
|
||||
|
||||
mlir::LogicalResult
|
||||
lowerBConcreteToStd(mlir::MLIRContext &context, mlir::ModuleOp &module,
|
||||
std::function<bool(mlir::Pass *)> enablePass);
|
||||
|
||||
119
compiler/lib/Conversion/BConcreteToCAPI/BConcreteToCAPI.cpp
Normal file
119
compiler/lib/Conversion/BConcreteToCAPI/BConcreteToCAPI.cpp
Normal file
@@ -0,0 +1,119 @@
|
||||
// Part of the Concrete Compiler Project, under the BSD3 License with Zama
|
||||
// Exceptions. See
|
||||
// https://github.com/zama-ai/concrete-compiler-internal/blob/main/LICENSE.txt
|
||||
// for license information.
|
||||
|
||||
#include <mlir/Dialect/Func/IR/FuncOps.h>
|
||||
#include <mlir/Pass/Pass.h>
|
||||
#include <mlir/Transforms/DialectConversion.h>
|
||||
|
||||
#include "concretelang/Conversion/Passes.h"
|
||||
#include "concretelang/Conversion/Tools.h"
|
||||
#include "concretelang/Dialect/BConcrete/IR/BConcreteDialect.h"
|
||||
#include "concretelang/Dialect/BConcrete/IR/BConcreteOps.h"
|
||||
|
||||
char move_bsk_to_gpu[] = "move_bsk_to_gpu";
|
||||
char free_from_gpu[] = "free_from_gpu";
|
||||
|
||||
/// \brief Rewrites `BConcrete.move_bsk_to_gpu` into a CAPI call to
|
||||
/// `move_bsk_to_gpu`
|
||||
///
|
||||
/// Also insert the forward declaration of `move_bsk_to_gpu`
|
||||
struct MoveBskOpPattern : public mlir::OpRewritePattern<
|
||||
mlir::concretelang::BConcrete::MoveBskToGPUOp> {
|
||||
MoveBskOpPattern(::mlir::MLIRContext *context,
|
||||
mlir::PatternBenefit benefit = 1)
|
||||
: ::mlir::OpRewritePattern<mlir::concretelang::BConcrete::MoveBskToGPUOp>(
|
||||
context, benefit) {}
|
||||
|
||||
::mlir::LogicalResult
|
||||
matchAndRewrite(mlir::concretelang::BConcrete::MoveBskToGPUOp moveBskOp,
|
||||
::mlir::PatternRewriter &rewriter) const override {
|
||||
|
||||
auto ctx = getContextArgument(moveBskOp);
|
||||
|
||||
mlir::SmallVector<mlir::Value> operands{ctx};
|
||||
|
||||
// Insert forward declaration of the function
|
||||
auto contextType =
|
||||
mlir::concretelang::Concrete::ContextType::get(rewriter.getContext());
|
||||
auto funcType = mlir::FunctionType::get(
|
||||
rewriter.getContext(), {contextType},
|
||||
{mlir::LLVM::LLVMPointerType::get(rewriter.getI64Type())});
|
||||
if (insertForwardDeclaration(moveBskOp, rewriter, move_bsk_to_gpu, funcType)
|
||||
.failed()) {
|
||||
return mlir::failure();
|
||||
}
|
||||
|
||||
rewriter.replaceOpWithNewOp<mlir::func::CallOp>(
|
||||
moveBskOp, move_bsk_to_gpu, moveBskOp.getResult().getType(), operands);
|
||||
|
||||
return ::mlir::success();
|
||||
};
|
||||
};
|
||||
|
||||
/// \brief Rewrites `BConcrete.free_bsk_from_gpu` into a CAPI call to
|
||||
/// `free_from_gpu`
|
||||
///
|
||||
/// Also insert the forward declaration of `free_from_gpu`
|
||||
struct FreeBskOpPattern : public mlir::OpRewritePattern<
|
||||
mlir::concretelang::BConcrete::FreeBskFromGPUOp> {
|
||||
FreeBskOpPattern(::mlir::MLIRContext *context,
|
||||
mlir::PatternBenefit benefit = 1)
|
||||
: ::mlir::OpRewritePattern<
|
||||
mlir::concretelang::BConcrete::FreeBskFromGPUOp>(context, benefit) {
|
||||
}
|
||||
|
||||
::mlir::LogicalResult
|
||||
matchAndRewrite(mlir::concretelang::BConcrete::FreeBskFromGPUOp freeBskOp,
|
||||
::mlir::PatternRewriter &rewriter) const override {
|
||||
|
||||
mlir::SmallVector<mlir::Value> operands{freeBskOp.bsk()};
|
||||
|
||||
// Insert forward declaration of the function
|
||||
auto funcType = mlir::FunctionType::get(
|
||||
rewriter.getContext(),
|
||||
{mlir::LLVM::LLVMPointerType::get(rewriter.getI64Type())}, {});
|
||||
if (insertForwardDeclaration(freeBskOp, rewriter, free_from_gpu, funcType)
|
||||
.failed()) {
|
||||
return mlir::failure();
|
||||
}
|
||||
|
||||
rewriter.replaceOpWithNewOp<mlir::func::CallOp>(
|
||||
freeBskOp, free_from_gpu, mlir::TypeRange({}), operands);
|
||||
|
||||
return ::mlir::success();
|
||||
};
|
||||
};
|
||||
|
||||
namespace {
|
||||
struct BConcreteToCAPIPass : public BConcreteToCAPIBase<BConcreteToCAPIPass> {
|
||||
void runOnOperation() final;
|
||||
};
|
||||
} // namespace
|
||||
|
||||
void BConcreteToCAPIPass::runOnOperation() {
|
||||
auto op = this->getOperation();
|
||||
|
||||
mlir::ConversionTarget target(getContext());
|
||||
mlir::RewritePatternSet patterns(&getContext());
|
||||
|
||||
target.addIllegalOp<mlir::concretelang::BConcrete::MoveBskToGPUOp>();
|
||||
target.addLegalDialect<mlir::func::FuncDialect>();
|
||||
|
||||
patterns.insert<MoveBskOpPattern>(&getContext());
|
||||
patterns.insert<FreeBskOpPattern>(&getContext());
|
||||
|
||||
// Apply conversion
|
||||
if (mlir::applyPartialConversion(op, target, std::move(patterns)).failed()) {
|
||||
this->signalPassFailure();
|
||||
}
|
||||
}
|
||||
|
||||
namespace mlir {
|
||||
namespace concretelang {
|
||||
std::unique_ptr<OperationPass<ModuleOp>> createConvertBConcreteToCAPIPass() {
|
||||
return std::make_unique<BConcreteToCAPIPass>();
|
||||
}
|
||||
} // namespace concretelang
|
||||
} // namespace mlir
|
||||
15
compiler/lib/Conversion/BConcreteToCAPI/CMakeLists.txt
Normal file
15
compiler/lib/Conversion/BConcreteToCAPI/CMakeLists.txt
Normal file
@@ -0,0 +1,15 @@
|
||||
add_mlir_dialect_library(BConcreteToCAPI
|
||||
BConcreteToCAPI.cpp
|
||||
|
||||
ADDITIONAL_HEADER_DIRS
|
||||
${PROJECT_SOURCE_DIR}/include/concretelang/Dialect/BConcrete
|
||||
|
||||
DEPENDS
|
||||
BConcreteDialect
|
||||
mlir-headers
|
||||
|
||||
LINK_LIBS PUBLIC
|
||||
MLIRIR
|
||||
MLIRTransforms)
|
||||
|
||||
target_link_libraries(BConcreteToCAPI PUBLIC BConcreteDialect MLIRIR)
|
||||
@@ -3,6 +3,8 @@ add_subdirectory(TFHEGlobalParametrization)
|
||||
add_subdirectory(TFHEToConcrete)
|
||||
add_subdirectory(FHETensorOpsToLinalg)
|
||||
add_subdirectory(ConcreteToBConcrete)
|
||||
add_subdirectory(ConcreteToGPU)
|
||||
add_subdirectory(BConcreteToCAPI)
|
||||
add_subdirectory(MLIRLowerableDialectsToLLVM)
|
||||
add_subdirectory(LinalgExtras)
|
||||
|
||||
|
||||
@@ -9,6 +9,7 @@
|
||||
#include <mlir/Dialect/Affine/IR/AffineOps.h>
|
||||
#include <mlir/Dialect/Bufferization/IR/Bufferization.h>
|
||||
#include <mlir/Dialect/Func/IR/FuncOps.h>
|
||||
#include <mlir/Dialect/LLVMIR/LLVMDialect.h>
|
||||
#include <mlir/Dialect/Linalg/IR/Linalg.h>
|
||||
#include <mlir/Dialect/SCF/IR/SCF.h>
|
||||
#include <mlir/Dialect/Tensor/IR/Tensor.h>
|
||||
@@ -64,6 +65,10 @@ class ConcreteToBConcreteTypeConverter : public mlir::TypeConverter {
|
||||
public:
|
||||
ConcreteToBConcreteTypeConverter() {
|
||||
addConversion([](mlir::Type type) { return type; });
|
||||
addConversion([&](mlir::concretelang::Concrete::GPUBskType type) {
|
||||
return mlir::LLVM::LLVMPointerType::get(
|
||||
mlir::IntegerType::get(type.getContext(), 64));
|
||||
});
|
||||
addConversion([&](mlir::concretelang::Concrete::PlaintextType type) {
|
||||
return mlir::IntegerType::get(type.getContext(), 64);
|
||||
});
|
||||
@@ -160,28 +165,34 @@ struct LowToBConcrete : public mlir::OpRewritePattern<ConcreteOp> {
|
||||
matchAndRewrite(ConcreteOp concreteOp,
|
||||
::mlir::PatternRewriter &rewriter) const override {
|
||||
ConcreteToBConcreteTypeConverter converter;
|
||||
mlir::concretelang::Concrete::LweCiphertextType resultTy =
|
||||
((mlir::Type)concreteOp->getResult(0).getType())
|
||||
.cast<mlir::concretelang::Concrete::LweCiphertextType>();
|
||||
auto newResultTy =
|
||||
converter.convertType(resultTy).cast<mlir::RankedTensorType>();
|
||||
mlir::TypeRange resultTyRange = concreteOp->getResultTypes();
|
||||
|
||||
llvm::ArrayRef<::mlir::NamedAttribute> attributes =
|
||||
concreteOp.getOperation()->getAttrs();
|
||||
|
||||
auto crt = resultTy.getCrtDecomposition();
|
||||
mlir::Operation *bConcreteOp;
|
||||
if (crt.empty()) {
|
||||
bConcreteOp = rewriter.replaceOpWithNewOp<BConcreteOp>(
|
||||
concreteOp, newResultTy, concreteOp.getOperation()->getOperands(),
|
||||
attributes);
|
||||
if (resultTyRange.size() == 1 &&
|
||||
resultTyRange.front()
|
||||
.isa<mlir::concretelang::Concrete::LweCiphertextType>()) {
|
||||
auto crt = resultTyRange.front()
|
||||
.cast<mlir::concretelang::Concrete::LweCiphertextType>()
|
||||
.getCrtDecomposition();
|
||||
if (crt.empty()) {
|
||||
bConcreteOp = rewriter.replaceOpWithNewOp<BConcreteOp>(
|
||||
concreteOp, resultTyRange, concreteOp.getOperation()->getOperands(),
|
||||
attributes);
|
||||
} else {
|
||||
auto newAttributes = attributes.vec();
|
||||
newAttributes.push_back(rewriter.getNamedAttr(
|
||||
"crtDecomposition", rewriter.getI64ArrayAttr(crt)));
|
||||
bConcreteOp = rewriter.replaceOpWithNewOp<BConcreteCRTOp>(
|
||||
concreteOp, resultTyRange, concreteOp.getOperation()->getOperands(),
|
||||
newAttributes);
|
||||
}
|
||||
} else {
|
||||
auto newAttributes = attributes.vec();
|
||||
newAttributes.push_back(rewriter.getNamedAttr(
|
||||
"crtDecomposition", rewriter.getI64ArrayAttr(crt)));
|
||||
bConcreteOp = rewriter.replaceOpWithNewOp<BConcreteCRTOp>(
|
||||
concreteOp, newResultTy, concreteOp.getOperation()->getOperands(),
|
||||
newAttributes);
|
||||
bConcreteOp = rewriter.replaceOpWithNewOp<BConcreteOp>(
|
||||
concreteOp, resultTyRange, concreteOp.getOperation()->getOperands(),
|
||||
attributes);
|
||||
}
|
||||
|
||||
mlir::concretelang::convertOperandAndResultTypes(
|
||||
@@ -906,7 +917,16 @@ void ConcreteToBConcretePass::runOnOperation() {
|
||||
mlir::concretelang::BConcrete::KeySwitchLweBufferOp>,
|
||||
LowToBConcrete<mlir::concretelang::Concrete::BootstrapLweOp,
|
||||
mlir::concretelang::BConcrete::BootstrapLweBufferOp,
|
||||
mlir::concretelang::BConcrete::KeySwitchLweBufferOp>,
|
||||
mlir::concretelang::BConcrete::BootstrapLweBufferOp>,
|
||||
LowToBConcrete<mlir::concretelang::Concrete::BootstrapLweGPUOp,
|
||||
mlir::concretelang::BConcrete::BootstrapLweGPUBufferOp,
|
||||
mlir::concretelang::BConcrete::BootstrapLweGPUBufferOp>,
|
||||
LowToBConcrete<mlir::concretelang::Concrete::MoveBskToGPUOp,
|
||||
mlir::concretelang::BConcrete::MoveBskToGPUOp,
|
||||
mlir::concretelang::BConcrete::MoveBskToGPUOp>,
|
||||
LowToBConcrete<mlir::concretelang::Concrete::FreeBskFromGPUOp,
|
||||
mlir::concretelang::BConcrete::FreeBskFromGPUOp,
|
||||
mlir::concretelang::BConcrete::FreeBskFromGPUOp>,
|
||||
LowToBConcrete<Concrete::WopPBSLweOp, BConcrete::WopPBSCRTLweBufferOp,
|
||||
BConcrete::WopPBSCRTLweBufferOp>>(&getContext());
|
||||
|
||||
|
||||
16
compiler/lib/Conversion/ConcreteToGPU/CMakeLists.txt
Normal file
16
compiler/lib/Conversion/ConcreteToGPU/CMakeLists.txt
Normal file
@@ -0,0 +1,16 @@
|
||||
add_mlir_dialect_library(ConcreteToGPU
|
||||
ConcreteToGPU.cpp
|
||||
|
||||
ADDITIONAL_HEADER_DIRS
|
||||
${PROJECT_SOURCE_DIR}/include/concretelang/Dialect/Concrete
|
||||
|
||||
DEPENDS
|
||||
ConcreteDialect
|
||||
mlir-headers
|
||||
|
||||
LINK_LIBS PUBLIC
|
||||
MLIRIR
|
||||
MLIRTransforms
|
||||
)
|
||||
|
||||
target_link_libraries(ConcreteToGPU PUBLIC ConcreteDialect MLIRIR)
|
||||
108
compiler/lib/Conversion/ConcreteToGPU/ConcreteToGPU.cpp
Normal file
108
compiler/lib/Conversion/ConcreteToGPU/ConcreteToGPU.cpp
Normal file
@@ -0,0 +1,108 @@
|
||||
// Part of the Concrete Compiler Project, under the BSD3 License with Zama
|
||||
// Exceptions. See
|
||||
// https://github.com/zama-ai/concrete-compiler-internal/blob/main/LICENSE.txt
|
||||
// for license information.
|
||||
|
||||
#include <mlir/Pass/Pass.h>
|
||||
#include <mlir/Transforms/DialectConversion.h>
|
||||
|
||||
#include "concretelang/Conversion/Passes.h"
|
||||
#include "concretelang/Dialect/Concrete/IR/ConcreteDialect.h"
|
||||
#include "concretelang/Dialect/Concrete/IR/ConcreteOps.h"
|
||||
#include "concretelang/Dialect/Concrete/IR/ConcreteTypes.h"
|
||||
|
||||
/// This rewrite pattern transforms any instance of `Concrete.bootstrap_lwe`
|
||||
/// into `Concrete.bootstrap_lwe_gpu`. It also inserts operations to allocate
|
||||
/// memory, copy bsk into GPU, and free memory after bootstrapping.
|
||||
struct BstOpPattern : public mlir::OpRewritePattern<
|
||||
mlir::concretelang::Concrete::BootstrapLweOp> {
|
||||
BstOpPattern(::mlir::MLIRContext *context, mlir::PatternBenefit benefit = 1)
|
||||
: ::mlir::OpRewritePattern<mlir::concretelang::Concrete::BootstrapLweOp>(
|
||||
context, benefit) {}
|
||||
|
||||
::mlir::LogicalResult
|
||||
matchAndRewrite(mlir::concretelang::Concrete::BootstrapLweOp bstOp,
|
||||
::mlir::PatternRewriter &rewriter) const override {
|
||||
|
||||
auto baselog = bstOp.baseLog();
|
||||
auto level = bstOp.level();
|
||||
mlir::Value ct = bstOp.input_ciphertext();
|
||||
|
||||
auto ctType =
|
||||
ct.getType().cast<mlir::concretelang::Concrete::LweCiphertextType>();
|
||||
auto inputLweDim = ctType.getDimension();
|
||||
|
||||
auto outType = bstOp.getResult()
|
||||
.getType()
|
||||
.cast<mlir::concretelang::Concrete::LweCiphertextType>();
|
||||
auto outputLweDim = outType.getDimension();
|
||||
|
||||
// copy bsk into GPU
|
||||
mlir::Value bskGPU =
|
||||
rewriter
|
||||
.create<mlir::concretelang::Concrete::MoveBskToGPUOp>(
|
||||
bstOp.getLoc(), mlir::concretelang::Concrete::GPUBskType::get(
|
||||
rewriter.getContext()))
|
||||
.getResult();
|
||||
|
||||
mlir::Value inputLweDimCst = rewriter.create<mlir::arith::ConstantIntOp>(
|
||||
bstOp.getLoc(), inputLweDim, 32);
|
||||
mlir::Value polySizeCst = rewriter.create<mlir::arith::ConstantIntOp>(
|
||||
bstOp.getLoc(), outputLweDim, 32);
|
||||
mlir::Value levelCst =
|
||||
rewriter.create<mlir::arith::ConstantIntOp>(bstOp.getLoc(), level, 32);
|
||||
mlir::Value baselogCst = rewriter.create<mlir::arith::ConstantIntOp>(
|
||||
bstOp.getLoc(), baselog, 32);
|
||||
|
||||
mlir::Type tableType =
|
||||
mlir::RankedTensorType::get({4}, rewriter.getI64Type());
|
||||
mlir::Value tableCst = rewriter.create<mlir::arith::ConstantOp>(
|
||||
bstOp.getLoc(),
|
||||
mlir::DenseIntElementsAttr::get(
|
||||
tableType, {llvm::APInt(64, 0), llvm::APInt(64, 0),
|
||||
llvm::APInt(64, 0), llvm::APInt(64, 0)}));
|
||||
|
||||
rewriter
|
||||
.replaceOpWithNewOp<mlir::concretelang::Concrete::BootstrapLweGPUOp>(
|
||||
bstOp, outType, ct, tableCst, inputLweDimCst, polySizeCst, levelCst,
|
||||
baselogCst, bskGPU);
|
||||
|
||||
// free bsk memory from GPU
|
||||
rewriter.create<mlir::concretelang::Concrete::FreeBskFromGPUOp>(
|
||||
bstOp.getLoc(), bskGPU);
|
||||
|
||||
return ::mlir::success();
|
||||
};
|
||||
};
|
||||
|
||||
namespace {
|
||||
struct ConcreteToGPUPass : public ConcreteToGPUBase<ConcreteToGPUPass> {
|
||||
void runOnOperation() final;
|
||||
};
|
||||
} // namespace
|
||||
|
||||
void ConcreteToGPUPass::runOnOperation() {
|
||||
auto op = this->getOperation();
|
||||
|
||||
mlir::ConversionTarget target(getContext());
|
||||
mlir::RewritePatternSet patterns(&getContext());
|
||||
|
||||
target.addLegalDialect<mlir::concretelang::Concrete::ConcreteDialect,
|
||||
mlir::arith::ArithmeticDialect>();
|
||||
target.addIllegalOp<mlir::concretelang::Concrete::BootstrapLweOp>();
|
||||
|
||||
patterns.insert<BstOpPattern>(&getContext());
|
||||
|
||||
// Apply conversion
|
||||
if (mlir::applyPartialConversion(op, target, std::move(patterns)).failed()) {
|
||||
this->signalPassFailure();
|
||||
}
|
||||
}
|
||||
|
||||
namespace mlir {
|
||||
namespace concretelang {
|
||||
std::unique_ptr<OperationPass<ModuleOp>> createConvertConcreteToGPUPass() {
|
||||
return std::make_unique<ConcreteToGPUPass>();
|
||||
}
|
||||
} // namespace concretelang
|
||||
} // namespace mlir
|
||||
@@ -6,6 +6,7 @@
|
||||
#include "mlir/Dialect/Func/IR/FuncOps.h"
|
||||
|
||||
#include "concretelang/Conversion/Tools.h"
|
||||
#include "concretelang/Dialect/Concrete/IR/ConcreteTypes.h"
|
||||
|
||||
mlir::LogicalResult insertForwardDeclaration(mlir::Operation *op,
|
||||
mlir::OpBuilder &rewriter,
|
||||
@@ -35,3 +36,27 @@ mlir::LogicalResult insertForwardDeclaration(mlir::Operation *op,
|
||||
mlir::SymbolTable::lookupSymbolIn(module, funcName)));
|
||||
return mlir::success();
|
||||
}
|
||||
|
||||
/// Returns the value of the context argument from the enclosing func
|
||||
mlir::Value getContextArgument(mlir::Operation *op) {
|
||||
mlir::Block *block = op->getBlock();
|
||||
while (block != nullptr) {
|
||||
if (llvm::isa<mlir::func::FuncOp>(block->getParentOp())) {
|
||||
|
||||
auto context = std::find_if(
|
||||
block->getArguments().rbegin(), block->getArguments().rend(),
|
||||
[](mlir::BlockArgument &arg) {
|
||||
return arg.getType()
|
||||
.isa<mlir::concretelang::Concrete::ContextType>();
|
||||
});
|
||||
|
||||
assert(context != block->getArguments().rend() &&
|
||||
"Cannot find the Concrete.context");
|
||||
|
||||
return *context;
|
||||
}
|
||||
block = block->getParentOp()->getBlock();
|
||||
}
|
||||
assert("can't find a function that enclose the op");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
#include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h"
|
||||
#include "mlir/Dialect/Bufferization/Transforms/BufferUtils.h"
|
||||
#include "mlir/Dialect/Func/IR/FuncOps.h"
|
||||
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
|
||||
#include "mlir/Dialect/MemRef/IR/MemRef.h"
|
||||
#include "mlir/Dialect/SCF/IR/SCF.h"
|
||||
#include "mlir/Dialect/Tensor/IR/Tensor.h"
|
||||
@@ -75,6 +76,7 @@ char memref_bootstrap_lwe_u64[] = "memref_bootstrap_lwe_u64";
|
||||
char memref_keyswitch_async_lwe_u64[] = "memref_keyswitch_async_lwe_u64";
|
||||
char memref_bootstrap_async_lwe_u64[] = "memref_bootstrap_async_lwe_u64";
|
||||
char memref_await_future[] = "memref_await_future";
|
||||
char memref_bootstrap_lwe_cuda_u64[] = "memref_bootstrap_lwe_cuda_u64";
|
||||
char memref_expand_lut_in_trivial_glwe_ct_u64[] =
|
||||
"memref_expand_lut_in_trivial_glwe_ct_u64";
|
||||
|
||||
@@ -89,6 +91,9 @@ mlir::LogicalResult insertForwardDeclarationOfTheCAPI(
|
||||
mlir::concretelang::RT::FutureType::get(rewriter.getIndexType());
|
||||
auto contextType =
|
||||
mlir::concretelang::Concrete::ContextType::get(rewriter.getContext());
|
||||
auto i32Type = rewriter.getI32Type();
|
||||
auto i64PointerType = mlir::LLVM::LLVMPointerType::get(rewriter.getI64Type());
|
||||
|
||||
mlir::FunctionType funcType;
|
||||
|
||||
if (funcName == memref_add_lwe_ciphertexts_u64) {
|
||||
@@ -124,6 +129,12 @@ mlir::LogicalResult insertForwardDeclarationOfTheCAPI(
|
||||
funcType = mlir::FunctionType::get(
|
||||
rewriter.getContext(),
|
||||
{memref1DType, futureType, memref1DType, memref1DType}, {});
|
||||
} else if (funcName == memref_bootstrap_lwe_cuda_u64) {
|
||||
funcType = mlir::FunctionType::get(rewriter.getContext(),
|
||||
{memref1DType, memref1DType,
|
||||
memref1DType, i32Type, i32Type, i32Type,
|
||||
i32Type, i64PointerType},
|
||||
{});
|
||||
} else if (funcName == memref_expand_lut_in_trivial_glwe_ct_u64) {
|
||||
funcType = mlir::FunctionType::get(rewriter.getContext(),
|
||||
{
|
||||
@@ -156,32 +167,6 @@ mlir::LogicalResult insertForwardDeclarationOfTheCAPI(
|
||||
return insertForwardDeclaration(op, rewriter, funcName, funcType);
|
||||
}
|
||||
|
||||
/// Returns the value of the context argument from the enclosing func
|
||||
mlir::Value getContextArgument(mlir::Operation *op) {
|
||||
mlir::Block *block = op->getBlock();
|
||||
while (block != nullptr) {
|
||||
if (llvm::isa<mlir::func::FuncOp>(block->getParentOp())) {
|
||||
block = &mlir::cast<mlir::func::FuncOp>(block->getParentOp())
|
||||
.getBody()
|
||||
.front();
|
||||
|
||||
auto context =
|
||||
std::find_if(block->getArguments().rbegin(),
|
||||
block->getArguments().rend(), [](BlockArgument &arg) {
|
||||
return arg.getType()
|
||||
.isa<mlir::concretelang::Concrete::ContextType>();
|
||||
});
|
||||
assert(context != block->getArguments().rend() &&
|
||||
"Cannot find the Concrete.context");
|
||||
|
||||
return *context;
|
||||
}
|
||||
block = block->getParentOp()->getBlock();
|
||||
}
|
||||
assert("can't find a function that enclose the op");
|
||||
return nullptr;
|
||||
};
|
||||
|
||||
template <typename Op>
|
||||
void pushAdditionalArgs(Op op, mlir::SmallVector<mlir::Value> &operands,
|
||||
RewriterBase &rewriter);
|
||||
@@ -578,6 +563,10 @@ void mlir::concretelang::BConcrete::
|
||||
BufferizableWithCallOpInterface<BConcrete::NegateLweBufferOp,
|
||||
memref_negate_lwe_ciphertext_u64>>(
|
||||
*ctx);
|
||||
BConcrete::BootstrapLweGPUBufferOp::attachInterface<
|
||||
BufferizableWithCallOpInterface<BConcrete::BootstrapLweGPUBufferOp,
|
||||
memref_bootstrap_lwe_cuda_u64, false>>(
|
||||
*ctx);
|
||||
BConcrete::KeySwitchLweBufferOp::attachInterface<
|
||||
BufferizableWithCallOpInterface<BConcrete::KeySwitchLweBufferOp,
|
||||
memref_keyswitch_lwe_u64>>(*ctx);
|
||||
|
||||
@@ -18,6 +18,16 @@ if(CONCRETELANG_DATAFLOW_EXECUTION_ENABLED)
|
||||
)
|
||||
endif()
|
||||
|
||||
if(CONCRETELANG_CUDA_SUPPORT)
|
||||
target_link_libraries(
|
||||
ConcretelangRuntime
|
||||
PRIVATE
|
||||
ConcreteCUDA
|
||||
-L/usr/local/cuda/lib64
|
||||
cudart
|
||||
)
|
||||
endif()
|
||||
|
||||
if(${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
|
||||
target_link_libraries(ConcretelangRuntime PUBLIC omp)
|
||||
else()
|
||||
|
||||
@@ -57,6 +57,122 @@ void encode_and_expand_lut(uint64_t *output, size_t output_size,
|
||||
#include "concretelang/ClientLib/CRT.h"
|
||||
#include "concretelang/Runtime/wrappers.h"
|
||||
|
||||
#ifdef CONCRETELANG_CUDA_SUPPORT
|
||||
|
||||
// We need to define the double2 struct from the CUDA backend header files
|
||||
// This shouldn't be defined here, but included along with concrete-cuda header
|
||||
// files
|
||||
typedef struct double2 {
|
||||
double x, y;
|
||||
} double2;
|
||||
|
||||
#include "bootstrap.h"
|
||||
#include "device.h"
|
||||
|
||||
void memref_keyswitch_lwe_cuda_u64(uint64_t *out_allocated,
|
||||
uint64_t *out_aligned, uint64_t out_offset,
|
||||
uint64_t out_size, uint64_t out_stride,
|
||||
uint64_t *ct0_allocated,
|
||||
uint64_t *ct0_aligned, uint64_t ct0_offset,
|
||||
uint64_t ct0_size, uint64_t ct0_stride,
|
||||
void *ksk_gpu) {
|
||||
// TODO: GPU implementation
|
||||
}
|
||||
|
||||
void *move_ct_to_gpu(uint64_t *ct_allocated, uint64_t *ct_aligned,
|
||||
uint64_t ct_offset, uint64_t ct_size, uint64_t ct_stride,
|
||||
uint32_t gpu_idx) {
|
||||
void *stream = cuda_create_stream(gpu_idx);
|
||||
void *ct_gpu = cuda_malloc(ct_size * sizeof(uint64_t), gpu_idx);
|
||||
cuda_memcpy_async_to_gpu(ct_gpu, ct_aligned + ct_offset,
|
||||
ct_size * sizeof(uint64_t), stream, gpu_idx);
|
||||
cuda_synchronize_device(gpu_idx);
|
||||
cuda_destroy_stream(stream, gpu_idx);
|
||||
return ct_gpu;
|
||||
}
|
||||
|
||||
void *move_bsk_to_gpu(mlir::concretelang::RuntimeContext *context,
|
||||
uint32_t gpu_idx = 0) {
|
||||
void *stream = cuda_create_stream(gpu_idx);
|
||||
LweBootstrapKey_u64 *bsk = get_bootstrap_key_u64(context);
|
||||
BufferView bskBuffer = bootstrap_buffer_lwe_u64(bsk);
|
||||
void *bsk_gpu = cuda_malloc(bskBuffer.length, gpu_idx);
|
||||
cuda_memcpy_async_to_gpu(bsk_gpu, (void *)bskBuffer.pointer, bskBuffer.length,
|
||||
stream, gpu_idx);
|
||||
cuda_synchronize_device(gpu_idx);
|
||||
cuda_destroy_stream(stream, gpu_idx);
|
||||
return bsk_gpu;
|
||||
}
|
||||
|
||||
void move_ct_to_cpu(uint64_t *out_allocated, uint64_t *out_aligned,
|
||||
uint64_t out_offset, uint64_t out_size, uint64_t out_stride,
|
||||
void *ct_gpu, size_t size, uint32_t gpu_idx) {
|
||||
void *stream = cuda_create_stream(gpu_idx);
|
||||
cuda_memcpy_async_to_cpu(out_aligned + out_offset, ct_gpu,
|
||||
size * sizeof(uint64_t), stream, gpu_idx);
|
||||
cuda_synchronize_device(gpu_idx);
|
||||
cuda_destroy_stream(stream, gpu_idx);
|
||||
}
|
||||
|
||||
void free_from_gpu(void *gpu_ptr, uint32_t gpu_idx = 0) {
|
||||
cuda_drop(gpu_ptr, gpu_idx);
|
||||
}
|
||||
|
||||
void memref_bootstrap_lwe_cuda_u64(
|
||||
uint64_t *out_allocated, uint64_t *out_aligned, uint64_t out_offset,
|
||||
uint64_t out_size, uint64_t out_stride, uint64_t *ct0_allocated,
|
||||
uint64_t *ct0_aligned, uint64_t ct0_offset, uint64_t ct0_size,
|
||||
uint64_t ct0_stride, uint64_t *tlu_allocated, uint64_t *tlu_aligned,
|
||||
uint64_t tlu_offset, uint64_t tlu_size, uint64_t tlu_stride,
|
||||
uint32_t input_lwe_dim, uint32_t poly_size, uint32_t level,
|
||||
uint32_t base_log, void *bsk_gpu) {
|
||||
|
||||
uint32_t gpu_idx = 0;
|
||||
void *stream = cuda_create_stream(gpu_idx);
|
||||
|
||||
// move input ciphertext into gpu
|
||||
void *ct0_gpu = move_ct_to_gpu(ct0_allocated, ct0_aligned, ct0_offset,
|
||||
ct0_size, ct0_stride, gpu_idx);
|
||||
// move output ciphertext into gpu
|
||||
void *out_gpu = move_ct_to_gpu(out_allocated, out_aligned, out_offset,
|
||||
out_size, out_stride, gpu_idx);
|
||||
// hardcoded values
|
||||
uint32_t num_samples = 1, num_test_vectors = 1, lwe_idx = 0;
|
||||
void *test_vector_idxes = malloc(num_samples * sizeof(uint32_t));
|
||||
((uint32_t *)test_vector_idxes)[0] = 0;
|
||||
void *test_vector = malloc(poly_size * sizeof(uint64_t));
|
||||
for (size_t i = 0; i < poly_size; i++) {
|
||||
((uint64_t *)test_vector)[i] = (uint64_t)1 << 61;
|
||||
}
|
||||
// move test vector into gpu
|
||||
void *test_vector_gpu = cuda_malloc(poly_size * sizeof(uint64_t), gpu_idx);
|
||||
cuda_memcpy_async_to_gpu(test_vector_gpu, test_vector,
|
||||
poly_size * sizeof(uint64_t), stream, gpu_idx);
|
||||
// move test vector indexes into gpu
|
||||
void *test_vector_idxes_gpu =
|
||||
cuda_malloc(num_samples * sizeof(uint32_t), gpu_idx);
|
||||
cuda_memcpy_async_to_gpu(test_vector_idxes_gpu, test_vector_idxes,
|
||||
num_samples * sizeof(uint32_t), stream, gpu_idx);
|
||||
// run gpu bootstrap
|
||||
cuda_bootstrap_low_latency_lwe_ciphertext_vector_64(
|
||||
stream, out_gpu, test_vector_gpu, test_vector_idxes_gpu, ct0_gpu, bsk_gpu,
|
||||
input_lwe_dim, poly_size, base_log, level, num_samples, num_test_vectors,
|
||||
lwe_idx, cuda_get_max_shared_memory(gpu_idx));
|
||||
// copy output ciphertext back to cpu
|
||||
move_ct_to_cpu(out_allocated, out_aligned, out_offset, out_size, out_stride,
|
||||
out_gpu, out_size, gpu_idx);
|
||||
cuda_synchronize_device(gpu_idx);
|
||||
// free memory that we allocated on gpu
|
||||
cuda_drop(ct0_gpu, gpu_idx);
|
||||
cuda_drop(out_gpu, gpu_idx);
|
||||
cuda_drop(test_vector_gpu, gpu_idx);
|
||||
cuda_drop(test_vector_idxes_gpu, gpu_idx);
|
||||
|
||||
cuda_destroy_stream(stream, gpu_idx);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
void memref_expand_lut_in_trivial_glwe_ct_u64(
|
||||
uint64_t *glwe_ct_allocated, uint64_t *glwe_ct_aligned,
|
||||
uint64_t glwe_ct_offset, uint64_t glwe_ct_size, uint64_t glwe_ct_stride,
|
||||
|
||||
@@ -317,6 +317,14 @@ CompilerEngine::compile(llvm::SourceMgr &sm, Target target, OptionalLib lib) {
|
||||
return errorDiag("Optimizing Concrete failed");
|
||||
}
|
||||
|
||||
// Transforming into GPU
|
||||
if (this->compilerOptions.useGPU &&
|
||||
mlir::concretelang::pipeline::transformsConcreteToGPU(mlirContext, module,
|
||||
this->enablePass)
|
||||
.failed()) {
|
||||
return errorDiag("Transforming Concrete to GPU failed");
|
||||
}
|
||||
|
||||
if (target == Target::CONCRETE)
|
||||
return std::move(res);
|
||||
|
||||
|
||||
@@ -239,6 +239,16 @@ optimizeConcrete(mlir::MLIRContext &context, mlir::ModuleOp &module,
|
||||
return pm.run(module.getOperation());
|
||||
}
|
||||
|
||||
mlir::LogicalResult
|
||||
transformsConcreteToGPU(mlir::MLIRContext &context, mlir::ModuleOp &module,
|
||||
std::function<bool(mlir::Pass *)> enablePass) {
|
||||
mlir::PassManager pm(&context);
|
||||
pipelinePrinting("ConcreteToGPU", pm, context);
|
||||
addPotentiallyNestedPass(
|
||||
pm, mlir::concretelang::createConvertConcreteToGPUPass(), enablePass);
|
||||
return pm.run(module.getOperation());
|
||||
}
|
||||
|
||||
mlir::LogicalResult
|
||||
lowerConcreteToBConcrete(mlir::MLIRContext &context, mlir::ModuleOp &module,
|
||||
std::function<bool(mlir::Pass *)> enablePass,
|
||||
@@ -283,6 +293,8 @@ lowerBConcreteToStd(mlir::MLIRContext &context, mlir::ModuleOp &module,
|
||||
enablePass);
|
||||
addPotentiallyNestedPass(pm, mlir::concretelang::createAddRuntimeContext(),
|
||||
enablePass);
|
||||
addPotentiallyNestedPass(
|
||||
pm, mlir::concretelang::createConvertBConcreteToCAPIPass(), enablePass);
|
||||
return pm.run(module.getOperation());
|
||||
}
|
||||
|
||||
|
||||
@@ -98,6 +98,12 @@ llvm::cl::opt<bool>
|
||||
"dialects. (Enabled by default)"),
|
||||
llvm::cl::init<bool>(true));
|
||||
|
||||
llvm::cl::opt<bool>
|
||||
useGPU("use-gpu",
|
||||
llvm::cl::desc("enable/disable generating concrete GPU "
|
||||
"operations (Disabled by default)"),
|
||||
llvm::cl::init<bool>(false));
|
||||
|
||||
llvm::cl::list<std::string> passes(
|
||||
"passes",
|
||||
llvm::cl::desc("Specify the passes to run (use only for compiler tests)"),
|
||||
@@ -283,6 +289,7 @@ cmdlineCompilationOptions() {
|
||||
options.loopParallelize = cmdline::loopParallelize;
|
||||
options.dataflowParallelize = cmdline::dataflowParallelize;
|
||||
options.optimizeConcrete = cmdline::optimizeConcrete;
|
||||
options.useGPU = cmdline::useGPU;
|
||||
|
||||
if (!cmdline::v0Constraint.empty()) {
|
||||
if (cmdline::v0Constraint.size() != 2) {
|
||||
|
||||
Reference in New Issue
Block a user