mirror of
https://github.com/zama-ai/concrete.git
synced 2026-02-09 03:55:04 -05:00
test: add GPU end2end tests
This commit is contained in:
@@ -231,6 +231,14 @@ run-end-to-end-dataflow-tests: build-end-to-end-dataflow-tests
|
||||
$(BUILD_DIR)/tools/concretelang/tests/end_to_end_tests/end_to_end_jit_auto_parallelization
|
||||
$(BUILD_DIR)/tools/concretelang/tests/end_to_end_tests/end_to_end_jit_distributed
|
||||
|
||||
## GPU tests
|
||||
|
||||
build-end-to-end-gpu-tests: build-initialized
|
||||
cmake --build $(BUILD_DIR) --target end_to_end_gpu_test
|
||||
|
||||
run-end-to-end-gpu-tests: build-end-to-end-gpu-tests
|
||||
$(BUILD_DIR)/tools/concretelang/tests/end_to_end_tests/end_to_end_gpu_test
|
||||
|
||||
# benchmark
|
||||
|
||||
build-benchmarks: build-initialized
|
||||
|
||||
@@ -12,7 +12,7 @@ namespace mlir {
|
||||
namespace concretelang {
|
||||
/// Create a pass to convert `Concrete` dialect to `BConcrete` dialect.
|
||||
std::unique_ptr<OperationPass<ModuleOp>>
|
||||
createConvertConcreteToBConcretePass(bool loopParallelize, bool useGPU);
|
||||
createConvertConcreteToBConcretePass(bool loopParallelize, bool emitGPUOps);
|
||||
} // namespace concretelang
|
||||
} // namespace mlir
|
||||
|
||||
|
||||
@@ -155,7 +155,7 @@ def BConcrete_AwaitFutureOp :
|
||||
|
||||
// This is a different op in BConcrete just because of the way we are lowering to CAPI
|
||||
// When the CAPI lowering is detached from bufferization, we can remove this op, and lower
|
||||
// to the appropriate CAPI (gpu or cpu) depending on the useGPU compilation option
|
||||
// to the appropriate CAPI (gpu or cpu) depending on the emitGPUOps compilation option
|
||||
def BConcrete_BootstrapLweGPUBufferOp : BConcrete_Op<"bootstrap_lwe_gpu_buffer"> {
|
||||
let arguments = (ins
|
||||
1DTensorOf<[I64]>:$input_ciphertext,
|
||||
@@ -172,7 +172,7 @@ def BConcrete_BootstrapLweGPUBufferOp : BConcrete_Op<"bootstrap_lwe_gpu_buffer">
|
||||
|
||||
// This is a different op in BConcrete just because of the way we are lowering to CAPI
|
||||
// When the CAPI lowering is detached from bufferization, we can remove this op, and lower
|
||||
// to the appropriate CAPI (gpu or cpu) depending on the useGPU compilation option
|
||||
// to the appropriate CAPI (gpu or cpu) depending on the emitGPUOps compilation option
|
||||
def BConcrete_KeySwitchLweGPUBufferOp : BConcrete_Op<"keyswitch_lwe_gpu_buffer"> {
|
||||
let arguments = (ins
|
||||
1DTensorOf<[I64]>:$ciphertext,
|
||||
|
||||
@@ -55,7 +55,7 @@ struct CompilationOptions {
|
||||
bool asyncOffload;
|
||||
bool optimizeConcrete;
|
||||
/// use GPU during execution by generating GPU operations if possible
|
||||
bool useGPU;
|
||||
bool emitGPUOps;
|
||||
llvm::Optional<std::vector<int64_t>> fhelinalgTileSizes;
|
||||
|
||||
llvm::Optional<std::string> clientParametersFuncName;
|
||||
@@ -66,7 +66,7 @@ struct CompilationOptions {
|
||||
: v0FHEConstraints(llvm::None), verifyDiagnostics(false),
|
||||
autoParallelize(false), loopParallelize(false),
|
||||
dataflowParallelize(false), asyncOffload(false), optimizeConcrete(true),
|
||||
useGPU(false), clientParametersFuncName(llvm::None),
|
||||
emitGPUOps(false), clientParametersFuncName(llvm::None),
|
||||
optimizerConfig(optimizer::DEFAULT_CONFIG){};
|
||||
|
||||
CompilationOptions(std::string funcname) : CompilationOptions() {
|
||||
|
||||
@@ -47,7 +47,7 @@ lowerTFHEToConcrete(mlir::MLIRContext &context, mlir::ModuleOp &module,
|
||||
mlir::LogicalResult
|
||||
lowerConcreteToBConcrete(mlir::MLIRContext &context, mlir::ModuleOp &module,
|
||||
std::function<bool(mlir::Pass *)> enablePass,
|
||||
bool parallelizeLoops, bool useGPU);
|
||||
bool parallelizeLoops, bool emitGPUOps);
|
||||
|
||||
mlir::LogicalResult
|
||||
optimizeConcrete(mlir::MLIRContext &context, mlir::ModuleOp &module,
|
||||
|
||||
@@ -48,12 +48,12 @@ struct ConcreteToBConcretePass
|
||||
: public ConcreteToBConcreteBase<ConcreteToBConcretePass> {
|
||||
void runOnOperation() final;
|
||||
ConcreteToBConcretePass() = delete;
|
||||
ConcreteToBConcretePass(bool loopParallelize, bool useGPU)
|
||||
: loopParallelize(loopParallelize), useGPU(useGPU){};
|
||||
ConcreteToBConcretePass(bool loopParallelize, bool emitGPUOps)
|
||||
: loopParallelize(loopParallelize), emitGPUOps(emitGPUOps){};
|
||||
|
||||
private:
|
||||
bool loopParallelize;
|
||||
bool useGPU;
|
||||
bool emitGPUOps;
|
||||
};
|
||||
} // namespace
|
||||
|
||||
@@ -919,7 +919,7 @@ void ConcreteToBConcretePass::runOnOperation() {
|
||||
LowToBConcrete<Concrete::WopPBSLweOp, BConcrete::WopPBSCRTLweBufferOp,
|
||||
BConcrete::WopPBSCRTLweBufferOp>>(&getContext());
|
||||
|
||||
if (this->useGPU) {
|
||||
if (this->emitGPUOps) {
|
||||
patterns
|
||||
.insert<LowToBConcrete<
|
||||
mlir::concretelang::Concrete::BootstrapLweOp,
|
||||
@@ -1063,8 +1063,8 @@ void ConcreteToBConcretePass::runOnOperation() {
|
||||
namespace mlir {
|
||||
namespace concretelang {
|
||||
std::unique_ptr<OperationPass<ModuleOp>>
|
||||
createConvertConcreteToBConcretePass(bool loopParallelize, bool useGPU) {
|
||||
return std::make_unique<ConcreteToBConcretePass>(loopParallelize, useGPU);
|
||||
createConvertConcreteToBConcretePass(bool loopParallelize, bool emitGPUOps) {
|
||||
return std::make_unique<ConcreteToBConcretePass>(loopParallelize, emitGPUOps);
|
||||
}
|
||||
} // namespace concretelang
|
||||
} // namespace mlir
|
||||
|
||||
@@ -356,7 +356,7 @@ CompilerEngine::compile(llvm::SourceMgr &sm, Target target, OptionalLib lib) {
|
||||
// Concrete -> BConcrete
|
||||
if (mlir::concretelang::pipeline::lowerConcreteToBConcrete(
|
||||
mlirContext, module, this->enablePass, loopParallelize,
|
||||
options.useGPU)
|
||||
options.emitGPUOps)
|
||||
.failed()) {
|
||||
return StreamStringError(
|
||||
"Lowering from Concrete to Bufferized Concrete failed");
|
||||
|
||||
@@ -242,13 +242,13 @@ optimizeConcrete(mlir::MLIRContext &context, mlir::ModuleOp &module,
|
||||
mlir::LogicalResult
|
||||
lowerConcreteToBConcrete(mlir::MLIRContext &context, mlir::ModuleOp &module,
|
||||
std::function<bool(mlir::Pass *)> enablePass,
|
||||
bool parallelizeLoops, bool useGPU) {
|
||||
bool parallelizeLoops, bool emitGPUOps) {
|
||||
mlir::PassManager pm(&context);
|
||||
pipelinePrinting("ConcreteToBConcrete", pm, context);
|
||||
|
||||
std::unique_ptr<Pass> conversionPass =
|
||||
mlir::concretelang::createConvertConcreteToBConcretePass(parallelizeLoops,
|
||||
useGPU);
|
||||
emitGPUOps);
|
||||
|
||||
bool passEnabled = enablePass(conversionPass.get());
|
||||
|
||||
|
||||
@@ -98,8 +98,8 @@ llvm::cl::opt<bool>
|
||||
"dialects. (Enabled by default)"),
|
||||
llvm::cl::init<bool>(true));
|
||||
|
||||
llvm::cl::opt<bool> useGPU(
|
||||
"use-gpu",
|
||||
llvm::cl::opt<bool> emitGPUOps(
|
||||
"emit-gpu-ops",
|
||||
llvm::cl::desc(
|
||||
"enable/disable generating GPU operations (Disabled by default)"),
|
||||
llvm::cl::init<bool>(false));
|
||||
@@ -289,7 +289,7 @@ cmdlineCompilationOptions() {
|
||||
options.loopParallelize = cmdline::loopParallelize;
|
||||
options.dataflowParallelize = cmdline::dataflowParallelize;
|
||||
options.optimizeConcrete = cmdline::optimizeConcrete;
|
||||
options.useGPU = cmdline::useGPU;
|
||||
options.emitGPUOps = cmdline::emitGPUOps;
|
||||
|
||||
if (!cmdline::v0Constraint.empty()) {
|
||||
if (cmdline::v0Constraint.size() != 2) {
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
// RUN: concretecompiler --passes concrete-to-bconcrete --action=dump-bconcrete --use-gpu %s 2>&1| FileCheck %s
|
||||
// RUN: concretecompiler --passes concrete-to-bconcrete --action=dump-bconcrete --emit-gpu-ops %s 2>&1| FileCheck %s
|
||||
|
||||
|
||||
//CHECK: func.func @main(%arg0: tensor<1025xi64>) -> tensor<1025xi64> {
|
||||
|
||||
@@ -137,7 +137,7 @@ static int registerEndToEndTestFromFile(std::string prefix, std::string path,
|
||||
registe("loop", loop);
|
||||
#ifdef CONCRETELANG_CUDA_SUPPORT
|
||||
mlir::concretelang::CompilationOptions gpu;
|
||||
gpu.useGPU = true;
|
||||
gpu.emitGPUOps = true;
|
||||
registe("gpu", gpu);
|
||||
#endif
|
||||
// mlir::concretelang::CompilationOptions dataflow;
|
||||
|
||||
@@ -138,7 +138,7 @@ static int registerEndToEndTestFromFile(std::string prefix, std::string path,
|
||||
registe("loop", loop);
|
||||
#ifdef CONCRETELANG_CUDA_SUPPORT
|
||||
mlir::concretelang::CompilationOptions gpu;
|
||||
gpu.useGPU = true;
|
||||
gpu.emitGPUOps = true;
|
||||
registe("gpu", gpu);
|
||||
#endif
|
||||
#ifdef CONCRETELANG_DATAFLOW_EXECUTION_ENABLED
|
||||
|
||||
@@ -70,3 +70,11 @@ if(CONCRETELANG_DATAFLOW_EXECUTION_ENABLED)
|
||||
globals.cc
|
||||
)
|
||||
endif()
|
||||
|
||||
if(CONCRETELANG_CUDA_SUPPORT)
|
||||
add_concretecompiler_unittest(
|
||||
end_to_end_gpu_test
|
||||
end_to_end_gpu_test.cc
|
||||
globals.cc
|
||||
)
|
||||
endif()
|
||||
|
||||
55
compiler/tests/end_to_end_tests/end_to_end_gpu_test.cc
Normal file
55
compiler/tests/end_to_end_tests/end_to_end_gpu_test.cc
Normal file
@@ -0,0 +1,55 @@
|
||||
|
||||
#include <cstdint>
|
||||
#include <gtest/gtest.h>
|
||||
#include <type_traits>
|
||||
|
||||
#include "end_to_end_gpu_test.h"
|
||||
#include "tests_tools/GtestEnvironment.h"
|
||||
|
||||
TEST(GPULookupTable, lut_precision2) {
|
||||
checkedJit(lambda, R"XXX(
|
||||
func.func @main(%arg0: !FHE.eint<2>) -> !FHE.eint<2> {
|
||||
%arg1 = arith.constant dense<[1, 2, 3, 0]> : tensor<4xi64>
|
||||
%1 = "FHE.apply_lookup_table"(%arg0, %arg1): (!FHE.eint<2>, tensor<4xi64>) -> (!FHE.eint<2>)
|
||||
return %1: !FHE.eint<2>
|
||||
}
|
||||
)XXX",
|
||||
"main", true);
|
||||
|
||||
ASSERT_EXPECTED_VALUE(lambda(0_u64), (uint64_t)1);
|
||||
ASSERT_EXPECTED_VALUE(lambda(1_u64), (uint64_t)2);
|
||||
ASSERT_EXPECTED_VALUE(lambda(2_u64), (uint64_t)3);
|
||||
ASSERT_EXPECTED_VALUE(lambda(3_u64), (uint64_t)0);
|
||||
}
|
||||
|
||||
TEST(GPULookupTable, lut_precision4) {
|
||||
checkedJit(lambda, R"XXX(
|
||||
func.func @main(%arg0: !FHE.eint<4>) -> !FHE.eint<4> {
|
||||
%arg1 = arith.constant dense<[1, 2, 3, 4, 5, 6, 7, 8, 7, 6, 5, 4, 3, 2, 1, 0]> : tensor<16xi64>
|
||||
%1 = "FHE.apply_lookup_table"(%arg0, %arg1): (!FHE.eint<4>, tensor<16xi64>) -> (!FHE.eint<4>)
|
||||
return %1: !FHE.eint<4>
|
||||
}
|
||||
)XXX",
|
||||
"main", true);
|
||||
|
||||
ASSERT_EXPECTED_VALUE(lambda(0_u64), (uint64_t)1);
|
||||
ASSERT_EXPECTED_VALUE(lambda(1_u64), (uint64_t)2);
|
||||
ASSERT_EXPECTED_VALUE(lambda(7_u64), (uint64_t)8);
|
||||
ASSERT_EXPECTED_VALUE(lambda(15_u64), (uint64_t)0);
|
||||
}
|
||||
|
||||
TEST(GPULookupTable, lut_precision7) {
|
||||
checkedJit(lambda, R"XXX(
|
||||
func.func @main(%arg0: !FHE.eint<7>) -> !FHE.eint<7> {
|
||||
%arg1 = arith.constant dense<[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0]> : tensor<128xi64>
|
||||
%1 = "FHE.apply_lookup_table"(%arg0, %arg1): (!FHE.eint<7>, tensor<128xi64>) -> (!FHE.eint<7>)
|
||||
return %1: !FHE.eint<7>
|
||||
}
|
||||
)XXX",
|
||||
"main", true);
|
||||
|
||||
ASSERT_EXPECTED_VALUE(lambda(0_u64), (uint64_t)1);
|
||||
ASSERT_EXPECTED_VALUE(lambda(1_u64), (uint64_t)2);
|
||||
ASSERT_EXPECTED_VALUE(lambda(120_u64), (uint64_t)121);
|
||||
ASSERT_EXPECTED_VALUE(lambda(127_u64), (uint64_t)0);
|
||||
}
|
||||
60
compiler/tests/end_to_end_tests/end_to_end_gpu_test.h
Normal file
60
compiler/tests/end_to_end_tests/end_to_end_gpu_test.h
Normal file
@@ -0,0 +1,60 @@
|
||||
#ifndef END_TO_END_GPU_TEST_H
|
||||
#define END_TO_END_GPU_TEST_H
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include "../tests_tools/keySetCache.h"
|
||||
|
||||
#include "concretelang/Support/CompilerEngine.h"
|
||||
#include "concretelang/Support/JITSupport.h"
|
||||
|
||||
#include "end_to_end_test.h"
|
||||
#include "globals.h"
|
||||
#include "tests_tools/assert.h"
|
||||
|
||||
// Jit-compiles the function specified by `func` from `src` and
|
||||
// returns the corresponding lambda. Any compilation errors are caught
|
||||
// and reult in abnormal termination.
|
||||
inline llvm::Expected<
|
||||
mlir::concretelang::ClientServer<mlir::concretelang::JITSupport>>
|
||||
internalCheckedJit(llvm::StringRef src, llvm::StringRef func = "main",
|
||||
bool useDefaultFHEConstraints = false,
|
||||
bool dataflowParallelize = false,
|
||||
bool loopParallelize = false) {
|
||||
|
||||
auto options =
|
||||
mlir::concretelang::CompilationOptions(std::string(func.data()));
|
||||
options.emitGPUOps = true;
|
||||
|
||||
if (useDefaultFHEConstraints) {
|
||||
options.v0FHEConstraints = defaultV0Constraints;
|
||||
options.optimizerConfig.strategy_v0 = true;
|
||||
}
|
||||
|
||||
// Allow loop parallelism in all cases
|
||||
options.loopParallelize = loopParallelize;
|
||||
#ifdef CONCRETELANG_DATAFLOW_EXECUTION_ENABLED
|
||||
#ifdef CONCRETELANG_DATAFLOW_TESTING_ENABLED
|
||||
options.dataflowParallelize = true;
|
||||
options.loopParallelize = true;
|
||||
#else
|
||||
options.dataflowParallelize = dataflowParallelize;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
auto lambdaOrErr =
|
||||
mlir::concretelang::ClientServer<mlir::concretelang::JITSupport>::create(
|
||||
src, options, getTestKeySetCache(), mlir::concretelang::JITSupport());
|
||||
|
||||
return lambdaOrErr;
|
||||
}
|
||||
|
||||
// Wrapper around `internalCheckedJit` that causes
|
||||
// `ASSERT_EXPECTED_SUCCESS` to use the file and line number of the
|
||||
// caller instead of `internalCheckedJit`.
|
||||
#define checkedJit(VARNAME, ...) \
|
||||
auto VARNAMEOrErr = internalCheckedJit(__VA_ARGS__); \
|
||||
ASSERT_EXPECTED_SUCCESS(VARNAMEOrErr); \
|
||||
auto VARNAME = std::move(*VARNAMEOrErr);
|
||||
|
||||
#endif
|
||||
@@ -8,6 +8,7 @@
|
||||
#include "concretelang/Support/CompilerEngine.h"
|
||||
#include "concretelang/Support/JITSupport.h"
|
||||
|
||||
#include "end_to_end_test.h"
|
||||
#include "globals.h"
|
||||
#include "tests_tools/assert.h"
|
||||
|
||||
@@ -46,16 +47,6 @@ internalCheckedJit(llvm::StringRef src, llvm::StringRef func = "main",
|
||||
return lambdaOrErr;
|
||||
}
|
||||
|
||||
// Shorthands to create integer literals of a specific type
|
||||
static inline uint8_t operator"" _u8(unsigned long long int v) { return v; }
|
||||
static inline uint16_t operator"" _u16(unsigned long long int v) { return v; }
|
||||
static inline uint32_t operator"" _u32(unsigned long long int v) { return v; }
|
||||
static inline uint64_t operator"" _u64(unsigned long long int v) { return v; }
|
||||
|
||||
// Evaluates to the number of elements of a statically initialized
|
||||
// array
|
||||
#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0]))
|
||||
|
||||
// Wrapper around `internalCheckedJit` that causes
|
||||
// `ASSERT_EXPECTED_SUCCESS` to use the file and line number of the
|
||||
// caller instead of `internalCheckedJit`.
|
||||
|
||||
16
compiler/tests/end_to_end_tests/end_to_end_test.h
Normal file
16
compiler/tests/end_to_end_tests/end_to_end_test.h
Normal file
@@ -0,0 +1,16 @@
|
||||
#ifndef END_TO_END_TEST_H
|
||||
#define END_TO_END_TEST_H
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
// Shorthands to create integer literals of a specific type
|
||||
static inline uint8_t operator"" _u8(unsigned long long int v) { return v; }
|
||||
static inline uint16_t operator"" _u16(unsigned long long int v) { return v; }
|
||||
static inline uint32_t operator"" _u32(unsigned long long int v) { return v; }
|
||||
static inline uint64_t operator"" _u64(unsigned long long int v) { return v; }
|
||||
|
||||
// Evaluates to the number of elements of a statically initialized
|
||||
// array
|
||||
#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0]))
|
||||
|
||||
#endif
|
||||
Reference in New Issue
Block a user