diff --git a/compiler/Makefile b/compiler/Makefile index d90ad9ad8..3ee64bb03 100644 --- a/compiler/Makefile +++ b/compiler/Makefile @@ -231,6 +231,14 @@ run-end-to-end-dataflow-tests: build-end-to-end-dataflow-tests $(BUILD_DIR)/tools/concretelang/tests/end_to_end_tests/end_to_end_jit_auto_parallelization $(BUILD_DIR)/tools/concretelang/tests/end_to_end_tests/end_to_end_jit_distributed +## GPU tests + +build-end-to-end-gpu-tests: build-initialized + cmake --build $(BUILD_DIR) --target end_to_end_gpu_test + +run-end-to-end-gpu-tests: build-end-to-end-gpu-tests + $(BUILD_DIR)/tools/concretelang/tests/end_to_end_tests/end_to_end_gpu_test + # benchmark build-benchmarks: build-initialized diff --git a/compiler/include/concretelang/Conversion/ConcreteToBConcrete/Pass.h b/compiler/include/concretelang/Conversion/ConcreteToBConcrete/Pass.h index aae7d3f4a..e39beaaff 100644 --- a/compiler/include/concretelang/Conversion/ConcreteToBConcrete/Pass.h +++ b/compiler/include/concretelang/Conversion/ConcreteToBConcrete/Pass.h @@ -12,7 +12,7 @@ namespace mlir { namespace concretelang { /// Create a pass to convert `Concrete` dialect to `BConcrete` dialect. std::unique_ptr> -createConvertConcreteToBConcretePass(bool loopParallelize, bool useGPU); +createConvertConcreteToBConcretePass(bool loopParallelize, bool emitGPUOps); } // namespace concretelang } // namespace mlir diff --git a/compiler/include/concretelang/Dialect/BConcrete/IR/BConcreteOps.td b/compiler/include/concretelang/Dialect/BConcrete/IR/BConcreteOps.td index bb2217e80..0af28d5d2 100644 --- a/compiler/include/concretelang/Dialect/BConcrete/IR/BConcreteOps.td +++ b/compiler/include/concretelang/Dialect/BConcrete/IR/BConcreteOps.td @@ -155,7 +155,7 @@ def BConcrete_AwaitFutureOp : // This is a different op in BConcrete just because of the way we are lowering to CAPI // When the CAPI lowering is detached from bufferization, we can remove this op, and lower -// to the appropriate CAPI (gpu or cpu) depending on the useGPU compilation option +// to the appropriate CAPI (gpu or cpu) depending on the emitGPUOps compilation option def BConcrete_BootstrapLweGPUBufferOp : BConcrete_Op<"bootstrap_lwe_gpu_buffer"> { let arguments = (ins 1DTensorOf<[I64]>:$input_ciphertext, @@ -172,7 +172,7 @@ def BConcrete_BootstrapLweGPUBufferOp : BConcrete_Op<"bootstrap_lwe_gpu_buffer"> // This is a different op in BConcrete just because of the way we are lowering to CAPI // When the CAPI lowering is detached from bufferization, we can remove this op, and lower -// to the appropriate CAPI (gpu or cpu) depending on the useGPU compilation option +// to the appropriate CAPI (gpu or cpu) depending on the emitGPUOps compilation option def BConcrete_KeySwitchLweGPUBufferOp : BConcrete_Op<"keyswitch_lwe_gpu_buffer"> { let arguments = (ins 1DTensorOf<[I64]>:$ciphertext, diff --git a/compiler/include/concretelang/Support/CompilerEngine.h b/compiler/include/concretelang/Support/CompilerEngine.h index e50c321e3..6ef2f289b 100644 --- a/compiler/include/concretelang/Support/CompilerEngine.h +++ b/compiler/include/concretelang/Support/CompilerEngine.h @@ -55,7 +55,7 @@ struct CompilationOptions { bool asyncOffload; bool optimizeConcrete; /// use GPU during execution by generating GPU operations if possible - bool useGPU; + bool emitGPUOps; llvm::Optional> fhelinalgTileSizes; llvm::Optional clientParametersFuncName; @@ -66,7 +66,7 @@ struct CompilationOptions { : v0FHEConstraints(llvm::None), verifyDiagnostics(false), autoParallelize(false), loopParallelize(false), dataflowParallelize(false), asyncOffload(false), optimizeConcrete(true), - useGPU(false), clientParametersFuncName(llvm::None), + emitGPUOps(false), clientParametersFuncName(llvm::None), optimizerConfig(optimizer::DEFAULT_CONFIG){}; CompilationOptions(std::string funcname) : CompilationOptions() { diff --git a/compiler/include/concretelang/Support/Pipeline.h b/compiler/include/concretelang/Support/Pipeline.h index 21c0276c5..ed85f5b76 100644 --- a/compiler/include/concretelang/Support/Pipeline.h +++ b/compiler/include/concretelang/Support/Pipeline.h @@ -47,7 +47,7 @@ lowerTFHEToConcrete(mlir::MLIRContext &context, mlir::ModuleOp &module, mlir::LogicalResult lowerConcreteToBConcrete(mlir::MLIRContext &context, mlir::ModuleOp &module, std::function enablePass, - bool parallelizeLoops, bool useGPU); + bool parallelizeLoops, bool emitGPUOps); mlir::LogicalResult optimizeConcrete(mlir::MLIRContext &context, mlir::ModuleOp &module, diff --git a/compiler/lib/Conversion/ConcreteToBConcrete/ConcreteToBConcrete.cpp b/compiler/lib/Conversion/ConcreteToBConcrete/ConcreteToBConcrete.cpp index a0d580165..dc138b2ab 100644 --- a/compiler/lib/Conversion/ConcreteToBConcrete/ConcreteToBConcrete.cpp +++ b/compiler/lib/Conversion/ConcreteToBConcrete/ConcreteToBConcrete.cpp @@ -48,12 +48,12 @@ struct ConcreteToBConcretePass : public ConcreteToBConcreteBase { void runOnOperation() final; ConcreteToBConcretePass() = delete; - ConcreteToBConcretePass(bool loopParallelize, bool useGPU) - : loopParallelize(loopParallelize), useGPU(useGPU){}; + ConcreteToBConcretePass(bool loopParallelize, bool emitGPUOps) + : loopParallelize(loopParallelize), emitGPUOps(emitGPUOps){}; private: bool loopParallelize; - bool useGPU; + bool emitGPUOps; }; } // namespace @@ -919,7 +919,7 @@ void ConcreteToBConcretePass::runOnOperation() { LowToBConcrete>(&getContext()); - if (this->useGPU) { + if (this->emitGPUOps) { patterns .insert> -createConvertConcreteToBConcretePass(bool loopParallelize, bool useGPU) { - return std::make_unique(loopParallelize, useGPU); +createConvertConcreteToBConcretePass(bool loopParallelize, bool emitGPUOps) { + return std::make_unique(loopParallelize, emitGPUOps); } } // namespace concretelang } // namespace mlir diff --git a/compiler/lib/Support/CompilerEngine.cpp b/compiler/lib/Support/CompilerEngine.cpp index fc5c0015f..244f4e331 100644 --- a/compiler/lib/Support/CompilerEngine.cpp +++ b/compiler/lib/Support/CompilerEngine.cpp @@ -356,7 +356,7 @@ CompilerEngine::compile(llvm::SourceMgr &sm, Target target, OptionalLib lib) { // Concrete -> BConcrete if (mlir::concretelang::pipeline::lowerConcreteToBConcrete( mlirContext, module, this->enablePass, loopParallelize, - options.useGPU) + options.emitGPUOps) .failed()) { return StreamStringError( "Lowering from Concrete to Bufferized Concrete failed"); diff --git a/compiler/lib/Support/Pipeline.cpp b/compiler/lib/Support/Pipeline.cpp index 7ca12616f..a58c1a447 100644 --- a/compiler/lib/Support/Pipeline.cpp +++ b/compiler/lib/Support/Pipeline.cpp @@ -242,13 +242,13 @@ optimizeConcrete(mlir::MLIRContext &context, mlir::ModuleOp &module, mlir::LogicalResult lowerConcreteToBConcrete(mlir::MLIRContext &context, mlir::ModuleOp &module, std::function enablePass, - bool parallelizeLoops, bool useGPU) { + bool parallelizeLoops, bool emitGPUOps) { mlir::PassManager pm(&context); pipelinePrinting("ConcreteToBConcrete", pm, context); std::unique_ptr conversionPass = mlir::concretelang::createConvertConcreteToBConcretePass(parallelizeLoops, - useGPU); + emitGPUOps); bool passEnabled = enablePass(conversionPass.get()); diff --git a/compiler/src/main.cpp b/compiler/src/main.cpp index 76b0873b8..52e0a3b87 100644 --- a/compiler/src/main.cpp +++ b/compiler/src/main.cpp @@ -98,8 +98,8 @@ llvm::cl::opt "dialects. (Enabled by default)"), llvm::cl::init(true)); -llvm::cl::opt useGPU( - "use-gpu", +llvm::cl::opt emitGPUOps( + "emit-gpu-ops", llvm::cl::desc( "enable/disable generating GPU operations (Disabled by default)"), llvm::cl::init(false)); @@ -289,7 +289,7 @@ cmdlineCompilationOptions() { options.loopParallelize = cmdline::loopParallelize; options.dataflowParallelize = cmdline::dataflowParallelize; options.optimizeConcrete = cmdline::optimizeConcrete; - options.useGPU = cmdline::useGPU; + options.emitGPUOps = cmdline::emitGPUOps; if (!cmdline::v0Constraint.empty()) { if (cmdline::v0Constraint.size() != 2) { diff --git a/compiler/tests/check_tests/Conversion/ConcreteToBConcrete/gpu_ops.mlir b/compiler/tests/check_tests/Conversion/ConcreteToBConcrete/gpu_ops.mlir index 42622555c..435f2ffb7 100644 --- a/compiler/tests/check_tests/Conversion/ConcreteToBConcrete/gpu_ops.mlir +++ b/compiler/tests/check_tests/Conversion/ConcreteToBConcrete/gpu_ops.mlir @@ -1,4 +1,4 @@ -// RUN: concretecompiler --passes concrete-to-bconcrete --action=dump-bconcrete --use-gpu %s 2>&1| FileCheck %s +// RUN: concretecompiler --passes concrete-to-bconcrete --action=dump-bconcrete --emit-gpu-ops %s 2>&1| FileCheck %s //CHECK: func.func @main(%arg0: tensor<1025xi64>) -> tensor<1025xi64> { diff --git a/compiler/tests/end_to_end_benchmarks/end_to_end_benchmark.cpp b/compiler/tests/end_to_end_benchmarks/end_to_end_benchmark.cpp index 615660af4..6db099920 100644 --- a/compiler/tests/end_to_end_benchmarks/end_to_end_benchmark.cpp +++ b/compiler/tests/end_to_end_benchmarks/end_to_end_benchmark.cpp @@ -137,7 +137,7 @@ static int registerEndToEndTestFromFile(std::string prefix, std::string path, registe("loop", loop); #ifdef CONCRETELANG_CUDA_SUPPORT mlir::concretelang::CompilationOptions gpu; - gpu.useGPU = true; + gpu.emitGPUOps = true; registe("gpu", gpu); #endif // mlir::concretelang::CompilationOptions dataflow; diff --git a/compiler/tests/end_to_end_benchmarks/end_to_end_mlbench.cpp b/compiler/tests/end_to_end_benchmarks/end_to_end_mlbench.cpp index b9a01f91f..bde8f0b74 100644 --- a/compiler/tests/end_to_end_benchmarks/end_to_end_mlbench.cpp +++ b/compiler/tests/end_to_end_benchmarks/end_to_end_mlbench.cpp @@ -138,7 +138,7 @@ static int registerEndToEndTestFromFile(std::string prefix, std::string path, registe("loop", loop); #ifdef CONCRETELANG_CUDA_SUPPORT mlir::concretelang::CompilationOptions gpu; - gpu.useGPU = true; + gpu.emitGPUOps = true; registe("gpu", gpu); #endif #ifdef CONCRETELANG_DATAFLOW_EXECUTION_ENABLED diff --git a/compiler/tests/end_to_end_tests/CMakeLists.txt b/compiler/tests/end_to_end_tests/CMakeLists.txt index 88ca6fda1..0ca05beae 100644 --- a/compiler/tests/end_to_end_tests/CMakeLists.txt +++ b/compiler/tests/end_to_end_tests/CMakeLists.txt @@ -70,3 +70,11 @@ if(CONCRETELANG_DATAFLOW_EXECUTION_ENABLED) globals.cc ) endif() + +if(CONCRETELANG_CUDA_SUPPORT) + add_concretecompiler_unittest( + end_to_end_gpu_test + end_to_end_gpu_test.cc + globals.cc + ) +endif() diff --git a/compiler/tests/end_to_end_tests/end_to_end_gpu_test.cc b/compiler/tests/end_to_end_tests/end_to_end_gpu_test.cc new file mode 100644 index 000000000..c4610c59d --- /dev/null +++ b/compiler/tests/end_to_end_tests/end_to_end_gpu_test.cc @@ -0,0 +1,55 @@ + +#include +#include +#include + +#include "end_to_end_gpu_test.h" +#include "tests_tools/GtestEnvironment.h" + +TEST(GPULookupTable, lut_precision2) { + checkedJit(lambda, R"XXX( +func.func @main(%arg0: !FHE.eint<2>) -> !FHE.eint<2> { + %arg1 = arith.constant dense<[1, 2, 3, 0]> : tensor<4xi64> + %1 = "FHE.apply_lookup_table"(%arg0, %arg1): (!FHE.eint<2>, tensor<4xi64>) -> (!FHE.eint<2>) + return %1: !FHE.eint<2> +} +)XXX", + "main", true); + + ASSERT_EXPECTED_VALUE(lambda(0_u64), (uint64_t)1); + ASSERT_EXPECTED_VALUE(lambda(1_u64), (uint64_t)2); + ASSERT_EXPECTED_VALUE(lambda(2_u64), (uint64_t)3); + ASSERT_EXPECTED_VALUE(lambda(3_u64), (uint64_t)0); +} + +TEST(GPULookupTable, lut_precision4) { + checkedJit(lambda, R"XXX( +func.func @main(%arg0: !FHE.eint<4>) -> !FHE.eint<4> { + %arg1 = arith.constant dense<[1, 2, 3, 4, 5, 6, 7, 8, 7, 6, 5, 4, 3, 2, 1, 0]> : tensor<16xi64> + %1 = "FHE.apply_lookup_table"(%arg0, %arg1): (!FHE.eint<4>, tensor<16xi64>) -> (!FHE.eint<4>) + return %1: !FHE.eint<4> +} +)XXX", + "main", true); + + ASSERT_EXPECTED_VALUE(lambda(0_u64), (uint64_t)1); + ASSERT_EXPECTED_VALUE(lambda(1_u64), (uint64_t)2); + ASSERT_EXPECTED_VALUE(lambda(7_u64), (uint64_t)8); + ASSERT_EXPECTED_VALUE(lambda(15_u64), (uint64_t)0); +} + +TEST(GPULookupTable, lut_precision7) { + checkedJit(lambda, R"XXX( +func.func @main(%arg0: !FHE.eint<7>) -> !FHE.eint<7> { + %arg1 = arith.constant dense<[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0]> : tensor<128xi64> + %1 = "FHE.apply_lookup_table"(%arg0, %arg1): (!FHE.eint<7>, tensor<128xi64>) -> (!FHE.eint<7>) + return %1: !FHE.eint<7> +} +)XXX", + "main", true); + + ASSERT_EXPECTED_VALUE(lambda(0_u64), (uint64_t)1); + ASSERT_EXPECTED_VALUE(lambda(1_u64), (uint64_t)2); + ASSERT_EXPECTED_VALUE(lambda(120_u64), (uint64_t)121); + ASSERT_EXPECTED_VALUE(lambda(127_u64), (uint64_t)0); +} diff --git a/compiler/tests/end_to_end_tests/end_to_end_gpu_test.h b/compiler/tests/end_to_end_tests/end_to_end_gpu_test.h new file mode 100644 index 000000000..3712df0dd --- /dev/null +++ b/compiler/tests/end_to_end_tests/end_to_end_gpu_test.h @@ -0,0 +1,60 @@ +#ifndef END_TO_END_GPU_TEST_H +#define END_TO_END_GPU_TEST_H + +#include + +#include "../tests_tools/keySetCache.h" + +#include "concretelang/Support/CompilerEngine.h" +#include "concretelang/Support/JITSupport.h" + +#include "end_to_end_test.h" +#include "globals.h" +#include "tests_tools/assert.h" + +// Jit-compiles the function specified by `func` from `src` and +// returns the corresponding lambda. Any compilation errors are caught +// and reult in abnormal termination. +inline llvm::Expected< + mlir::concretelang::ClientServer> +internalCheckedJit(llvm::StringRef src, llvm::StringRef func = "main", + bool useDefaultFHEConstraints = false, + bool dataflowParallelize = false, + bool loopParallelize = false) { + + auto options = + mlir::concretelang::CompilationOptions(std::string(func.data())); + options.emitGPUOps = true; + + if (useDefaultFHEConstraints) { + options.v0FHEConstraints = defaultV0Constraints; + options.optimizerConfig.strategy_v0 = true; + } + + // Allow loop parallelism in all cases + options.loopParallelize = loopParallelize; +#ifdef CONCRETELANG_DATAFLOW_EXECUTION_ENABLED +#ifdef CONCRETELANG_DATAFLOW_TESTING_ENABLED + options.dataflowParallelize = true; + options.loopParallelize = true; +#else + options.dataflowParallelize = dataflowParallelize; +#endif +#endif + + auto lambdaOrErr = + mlir::concretelang::ClientServer::create( + src, options, getTestKeySetCache(), mlir::concretelang::JITSupport()); + + return lambdaOrErr; +} + +// Wrapper around `internalCheckedJit` that causes +// `ASSERT_EXPECTED_SUCCESS` to use the file and line number of the +// caller instead of `internalCheckedJit`. +#define checkedJit(VARNAME, ...) \ + auto VARNAMEOrErr = internalCheckedJit(__VA_ARGS__); \ + ASSERT_EXPECTED_SUCCESS(VARNAMEOrErr); \ + auto VARNAME = std::move(*VARNAMEOrErr); + +#endif diff --git a/compiler/tests/end_to_end_tests/end_to_end_jit_test.h b/compiler/tests/end_to_end_tests/end_to_end_jit_test.h index 1e41dd3c2..18e15b767 100644 --- a/compiler/tests/end_to_end_tests/end_to_end_jit_test.h +++ b/compiler/tests/end_to_end_tests/end_to_end_jit_test.h @@ -8,6 +8,7 @@ #include "concretelang/Support/CompilerEngine.h" #include "concretelang/Support/JITSupport.h" +#include "end_to_end_test.h" #include "globals.h" #include "tests_tools/assert.h" @@ -46,16 +47,6 @@ internalCheckedJit(llvm::StringRef src, llvm::StringRef func = "main", return lambdaOrErr; } -// Shorthands to create integer literals of a specific type -static inline uint8_t operator"" _u8(unsigned long long int v) { return v; } -static inline uint16_t operator"" _u16(unsigned long long int v) { return v; } -static inline uint32_t operator"" _u32(unsigned long long int v) { return v; } -static inline uint64_t operator"" _u64(unsigned long long int v) { return v; } - -// Evaluates to the number of elements of a statically initialized -// array -#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0])) - // Wrapper around `internalCheckedJit` that causes // `ASSERT_EXPECTED_SUCCESS` to use the file and line number of the // caller instead of `internalCheckedJit`. diff --git a/compiler/tests/end_to_end_tests/end_to_end_test.h b/compiler/tests/end_to_end_tests/end_to_end_test.h new file mode 100644 index 000000000..29bd5bfff --- /dev/null +++ b/compiler/tests/end_to_end_tests/end_to_end_test.h @@ -0,0 +1,16 @@ +#ifndef END_TO_END_TEST_H +#define END_TO_END_TEST_H + +#include + +// Shorthands to create integer literals of a specific type +static inline uint8_t operator"" _u8(unsigned long long int v) { return v; } +static inline uint16_t operator"" _u16(unsigned long long int v) { return v; } +static inline uint32_t operator"" _u32(unsigned long long int v) { return v; } +static inline uint64_t operator"" _u64(unsigned long long int v) { return v; } + +// Evaluates to the number of elements of a statically initialized +// array +#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0])) + +#endif