test: add GPU end2end tests

This commit is contained in:
youben11
2022-09-22 07:49:21 +01:00
committed by Ayoub Benaissa
parent 2383c5aec3
commit 7cd45d1514
17 changed files with 169 additions and 31 deletions

View File

@@ -231,6 +231,14 @@ run-end-to-end-dataflow-tests: build-end-to-end-dataflow-tests
$(BUILD_DIR)/tools/concretelang/tests/end_to_end_tests/end_to_end_jit_auto_parallelization
$(BUILD_DIR)/tools/concretelang/tests/end_to_end_tests/end_to_end_jit_distributed
## GPU tests
build-end-to-end-gpu-tests: build-initialized
cmake --build $(BUILD_DIR) --target end_to_end_gpu_test
run-end-to-end-gpu-tests: build-end-to-end-gpu-tests
$(BUILD_DIR)/tools/concretelang/tests/end_to_end_tests/end_to_end_gpu_test
# benchmark
build-benchmarks: build-initialized

View File

@@ -12,7 +12,7 @@ namespace mlir {
namespace concretelang {
/// Create a pass to convert `Concrete` dialect to `BConcrete` dialect.
std::unique_ptr<OperationPass<ModuleOp>>
createConvertConcreteToBConcretePass(bool loopParallelize, bool useGPU);
createConvertConcreteToBConcretePass(bool loopParallelize, bool emitGPUOps);
} // namespace concretelang
} // namespace mlir

View File

@@ -155,7 +155,7 @@ def BConcrete_AwaitFutureOp :
// This is a different op in BConcrete just because of the way we are lowering to CAPI
// When the CAPI lowering is detached from bufferization, we can remove this op, and lower
// to the appropriate CAPI (gpu or cpu) depending on the useGPU compilation option
// to the appropriate CAPI (gpu or cpu) depending on the emitGPUOps compilation option
def BConcrete_BootstrapLweGPUBufferOp : BConcrete_Op<"bootstrap_lwe_gpu_buffer"> {
let arguments = (ins
1DTensorOf<[I64]>:$input_ciphertext,
@@ -172,7 +172,7 @@ def BConcrete_BootstrapLweGPUBufferOp : BConcrete_Op<"bootstrap_lwe_gpu_buffer">
// This is a different op in BConcrete just because of the way we are lowering to CAPI
// When the CAPI lowering is detached from bufferization, we can remove this op, and lower
// to the appropriate CAPI (gpu or cpu) depending on the useGPU compilation option
// to the appropriate CAPI (gpu or cpu) depending on the emitGPUOps compilation option
def BConcrete_KeySwitchLweGPUBufferOp : BConcrete_Op<"keyswitch_lwe_gpu_buffer"> {
let arguments = (ins
1DTensorOf<[I64]>:$ciphertext,

View File

@@ -55,7 +55,7 @@ struct CompilationOptions {
bool asyncOffload;
bool optimizeConcrete;
/// use GPU during execution by generating GPU operations if possible
bool useGPU;
bool emitGPUOps;
llvm::Optional<std::vector<int64_t>> fhelinalgTileSizes;
llvm::Optional<std::string> clientParametersFuncName;
@@ -66,7 +66,7 @@ struct CompilationOptions {
: v0FHEConstraints(llvm::None), verifyDiagnostics(false),
autoParallelize(false), loopParallelize(false),
dataflowParallelize(false), asyncOffload(false), optimizeConcrete(true),
useGPU(false), clientParametersFuncName(llvm::None),
emitGPUOps(false), clientParametersFuncName(llvm::None),
optimizerConfig(optimizer::DEFAULT_CONFIG){};
CompilationOptions(std::string funcname) : CompilationOptions() {

View File

@@ -47,7 +47,7 @@ lowerTFHEToConcrete(mlir::MLIRContext &context, mlir::ModuleOp &module,
mlir::LogicalResult
lowerConcreteToBConcrete(mlir::MLIRContext &context, mlir::ModuleOp &module,
std::function<bool(mlir::Pass *)> enablePass,
bool parallelizeLoops, bool useGPU);
bool parallelizeLoops, bool emitGPUOps);
mlir::LogicalResult
optimizeConcrete(mlir::MLIRContext &context, mlir::ModuleOp &module,

View File

@@ -48,12 +48,12 @@ struct ConcreteToBConcretePass
: public ConcreteToBConcreteBase<ConcreteToBConcretePass> {
void runOnOperation() final;
ConcreteToBConcretePass() = delete;
ConcreteToBConcretePass(bool loopParallelize, bool useGPU)
: loopParallelize(loopParallelize), useGPU(useGPU){};
ConcreteToBConcretePass(bool loopParallelize, bool emitGPUOps)
: loopParallelize(loopParallelize), emitGPUOps(emitGPUOps){};
private:
bool loopParallelize;
bool useGPU;
bool emitGPUOps;
};
} // namespace
@@ -919,7 +919,7 @@ void ConcreteToBConcretePass::runOnOperation() {
LowToBConcrete<Concrete::WopPBSLweOp, BConcrete::WopPBSCRTLweBufferOp,
BConcrete::WopPBSCRTLweBufferOp>>(&getContext());
if (this->useGPU) {
if (this->emitGPUOps) {
patterns
.insert<LowToBConcrete<
mlir::concretelang::Concrete::BootstrapLweOp,
@@ -1063,8 +1063,8 @@ void ConcreteToBConcretePass::runOnOperation() {
namespace mlir {
namespace concretelang {
std::unique_ptr<OperationPass<ModuleOp>>
createConvertConcreteToBConcretePass(bool loopParallelize, bool useGPU) {
return std::make_unique<ConcreteToBConcretePass>(loopParallelize, useGPU);
createConvertConcreteToBConcretePass(bool loopParallelize, bool emitGPUOps) {
return std::make_unique<ConcreteToBConcretePass>(loopParallelize, emitGPUOps);
}
} // namespace concretelang
} // namespace mlir

View File

@@ -356,7 +356,7 @@ CompilerEngine::compile(llvm::SourceMgr &sm, Target target, OptionalLib lib) {
// Concrete -> BConcrete
if (mlir::concretelang::pipeline::lowerConcreteToBConcrete(
mlirContext, module, this->enablePass, loopParallelize,
options.useGPU)
options.emitGPUOps)
.failed()) {
return StreamStringError(
"Lowering from Concrete to Bufferized Concrete failed");

View File

@@ -242,13 +242,13 @@ optimizeConcrete(mlir::MLIRContext &context, mlir::ModuleOp &module,
mlir::LogicalResult
lowerConcreteToBConcrete(mlir::MLIRContext &context, mlir::ModuleOp &module,
std::function<bool(mlir::Pass *)> enablePass,
bool parallelizeLoops, bool useGPU) {
bool parallelizeLoops, bool emitGPUOps) {
mlir::PassManager pm(&context);
pipelinePrinting("ConcreteToBConcrete", pm, context);
std::unique_ptr<Pass> conversionPass =
mlir::concretelang::createConvertConcreteToBConcretePass(parallelizeLoops,
useGPU);
emitGPUOps);
bool passEnabled = enablePass(conversionPass.get());

View File

@@ -98,8 +98,8 @@ llvm::cl::opt<bool>
"dialects. (Enabled by default)"),
llvm::cl::init<bool>(true));
llvm::cl::opt<bool> useGPU(
"use-gpu",
llvm::cl::opt<bool> emitGPUOps(
"emit-gpu-ops",
llvm::cl::desc(
"enable/disable generating GPU operations (Disabled by default)"),
llvm::cl::init<bool>(false));
@@ -289,7 +289,7 @@ cmdlineCompilationOptions() {
options.loopParallelize = cmdline::loopParallelize;
options.dataflowParallelize = cmdline::dataflowParallelize;
options.optimizeConcrete = cmdline::optimizeConcrete;
options.useGPU = cmdline::useGPU;
options.emitGPUOps = cmdline::emitGPUOps;
if (!cmdline::v0Constraint.empty()) {
if (cmdline::v0Constraint.size() != 2) {

View File

@@ -1,4 +1,4 @@
// RUN: concretecompiler --passes concrete-to-bconcrete --action=dump-bconcrete --use-gpu %s 2>&1| FileCheck %s
// RUN: concretecompiler --passes concrete-to-bconcrete --action=dump-bconcrete --emit-gpu-ops %s 2>&1| FileCheck %s
//CHECK: func.func @main(%arg0: tensor<1025xi64>) -> tensor<1025xi64> {

View File

@@ -137,7 +137,7 @@ static int registerEndToEndTestFromFile(std::string prefix, std::string path,
registe("loop", loop);
#ifdef CONCRETELANG_CUDA_SUPPORT
mlir::concretelang::CompilationOptions gpu;
gpu.useGPU = true;
gpu.emitGPUOps = true;
registe("gpu", gpu);
#endif
// mlir::concretelang::CompilationOptions dataflow;

View File

@@ -138,7 +138,7 @@ static int registerEndToEndTestFromFile(std::string prefix, std::string path,
registe("loop", loop);
#ifdef CONCRETELANG_CUDA_SUPPORT
mlir::concretelang::CompilationOptions gpu;
gpu.useGPU = true;
gpu.emitGPUOps = true;
registe("gpu", gpu);
#endif
#ifdef CONCRETELANG_DATAFLOW_EXECUTION_ENABLED

View File

@@ -70,3 +70,11 @@ if(CONCRETELANG_DATAFLOW_EXECUTION_ENABLED)
globals.cc
)
endif()
if(CONCRETELANG_CUDA_SUPPORT)
add_concretecompiler_unittest(
end_to_end_gpu_test
end_to_end_gpu_test.cc
globals.cc
)
endif()

View File

@@ -0,0 +1,55 @@
#include <cstdint>
#include <gtest/gtest.h>
#include <type_traits>
#include "end_to_end_gpu_test.h"
#include "tests_tools/GtestEnvironment.h"
TEST(GPULookupTable, lut_precision2) {
checkedJit(lambda, R"XXX(
func.func @main(%arg0: !FHE.eint<2>) -> !FHE.eint<2> {
%arg1 = arith.constant dense<[1, 2, 3, 0]> : tensor<4xi64>
%1 = "FHE.apply_lookup_table"(%arg0, %arg1): (!FHE.eint<2>, tensor<4xi64>) -> (!FHE.eint<2>)
return %1: !FHE.eint<2>
}
)XXX",
"main", true);
ASSERT_EXPECTED_VALUE(lambda(0_u64), (uint64_t)1);
ASSERT_EXPECTED_VALUE(lambda(1_u64), (uint64_t)2);
ASSERT_EXPECTED_VALUE(lambda(2_u64), (uint64_t)3);
ASSERT_EXPECTED_VALUE(lambda(3_u64), (uint64_t)0);
}
TEST(GPULookupTable, lut_precision4) {
checkedJit(lambda, R"XXX(
func.func @main(%arg0: !FHE.eint<4>) -> !FHE.eint<4> {
%arg1 = arith.constant dense<[1, 2, 3, 4, 5, 6, 7, 8, 7, 6, 5, 4, 3, 2, 1, 0]> : tensor<16xi64>
%1 = "FHE.apply_lookup_table"(%arg0, %arg1): (!FHE.eint<4>, tensor<16xi64>) -> (!FHE.eint<4>)
return %1: !FHE.eint<4>
}
)XXX",
"main", true);
ASSERT_EXPECTED_VALUE(lambda(0_u64), (uint64_t)1);
ASSERT_EXPECTED_VALUE(lambda(1_u64), (uint64_t)2);
ASSERT_EXPECTED_VALUE(lambda(7_u64), (uint64_t)8);
ASSERT_EXPECTED_VALUE(lambda(15_u64), (uint64_t)0);
}
TEST(GPULookupTable, lut_precision7) {
checkedJit(lambda, R"XXX(
func.func @main(%arg0: !FHE.eint<7>) -> !FHE.eint<7> {
%arg1 = arith.constant dense<[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0]> : tensor<128xi64>
%1 = "FHE.apply_lookup_table"(%arg0, %arg1): (!FHE.eint<7>, tensor<128xi64>) -> (!FHE.eint<7>)
return %1: !FHE.eint<7>
}
)XXX",
"main", true);
ASSERT_EXPECTED_VALUE(lambda(0_u64), (uint64_t)1);
ASSERT_EXPECTED_VALUE(lambda(1_u64), (uint64_t)2);
ASSERT_EXPECTED_VALUE(lambda(120_u64), (uint64_t)121);
ASSERT_EXPECTED_VALUE(lambda(127_u64), (uint64_t)0);
}

View File

@@ -0,0 +1,60 @@
#ifndef END_TO_END_GPU_TEST_H
#define END_TO_END_GPU_TEST_H
#include <gtest/gtest.h>
#include "../tests_tools/keySetCache.h"
#include "concretelang/Support/CompilerEngine.h"
#include "concretelang/Support/JITSupport.h"
#include "end_to_end_test.h"
#include "globals.h"
#include "tests_tools/assert.h"
// Jit-compiles the function specified by `func` from `src` and
// returns the corresponding lambda. Any compilation errors are caught
// and reult in abnormal termination.
inline llvm::Expected<
mlir::concretelang::ClientServer<mlir::concretelang::JITSupport>>
internalCheckedJit(llvm::StringRef src, llvm::StringRef func = "main",
bool useDefaultFHEConstraints = false,
bool dataflowParallelize = false,
bool loopParallelize = false) {
auto options =
mlir::concretelang::CompilationOptions(std::string(func.data()));
options.emitGPUOps = true;
if (useDefaultFHEConstraints) {
options.v0FHEConstraints = defaultV0Constraints;
options.optimizerConfig.strategy_v0 = true;
}
// Allow loop parallelism in all cases
options.loopParallelize = loopParallelize;
#ifdef CONCRETELANG_DATAFLOW_EXECUTION_ENABLED
#ifdef CONCRETELANG_DATAFLOW_TESTING_ENABLED
options.dataflowParallelize = true;
options.loopParallelize = true;
#else
options.dataflowParallelize = dataflowParallelize;
#endif
#endif
auto lambdaOrErr =
mlir::concretelang::ClientServer<mlir::concretelang::JITSupport>::create(
src, options, getTestKeySetCache(), mlir::concretelang::JITSupport());
return lambdaOrErr;
}
// Wrapper around `internalCheckedJit` that causes
// `ASSERT_EXPECTED_SUCCESS` to use the file and line number of the
// caller instead of `internalCheckedJit`.
#define checkedJit(VARNAME, ...) \
auto VARNAMEOrErr = internalCheckedJit(__VA_ARGS__); \
ASSERT_EXPECTED_SUCCESS(VARNAMEOrErr); \
auto VARNAME = std::move(*VARNAMEOrErr);
#endif

View File

@@ -8,6 +8,7 @@
#include "concretelang/Support/CompilerEngine.h"
#include "concretelang/Support/JITSupport.h"
#include "end_to_end_test.h"
#include "globals.h"
#include "tests_tools/assert.h"
@@ -46,16 +47,6 @@ internalCheckedJit(llvm::StringRef src, llvm::StringRef func = "main",
return lambdaOrErr;
}
// Shorthands to create integer literals of a specific type
static inline uint8_t operator"" _u8(unsigned long long int v) { return v; }
static inline uint16_t operator"" _u16(unsigned long long int v) { return v; }
static inline uint32_t operator"" _u32(unsigned long long int v) { return v; }
static inline uint64_t operator"" _u64(unsigned long long int v) { return v; }
// Evaluates to the number of elements of a statically initialized
// array
#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0]))
// Wrapper around `internalCheckedJit` that causes
// `ASSERT_EXPECTED_SUCCESS` to use the file and line number of the
// caller instead of `internalCheckedJit`.

View File

@@ -0,0 +1,16 @@
#ifndef END_TO_END_TEST_H
#define END_TO_END_TEST_H
#include <gtest/gtest.h>
// Shorthands to create integer literals of a specific type
static inline uint8_t operator"" _u8(unsigned long long int v) { return v; }
static inline uint16_t operator"" _u16(unsigned long long int v) { return v; }
static inline uint32_t operator"" _u32(unsigned long long int v) { return v; }
static inline uint64_t operator"" _u64(unsigned long long int v) { return v; }
// Evaluates to the number of elements of a statically initialized
// array
#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0]))
#endif