// Part of the Concrete Compiler Project, under the BSD3 License with Zama // Exceptions. See // https://github.com/zama-ai/concrete-compiler-internal/blob/main/LICENSE.txt // for license information. /// This file implements the dataflow runtime. It encapsulates all of /// the underlying communication, parallelism, etc. and only exposes a /// simplified interface for code generation in runtime_api.h /// This hides the details of implementation, including of the HPX /// framework currently used, from the code generation side. #ifdef CONCRETELANG_DATAFLOW_EXECUTION_ENABLED #include #include #include #include #include #include #include #include "concretelang/Runtime/DFRuntime.hpp" #include "concretelang/Runtime/distributed_generic_task_server.hpp" #include "concretelang/Runtime/runtime_api.h" #include "concretelang/Runtime/time_util.h" namespace mlir { namespace concretelang { namespace dfr { namespace { static std::vector gcc; static hpx::lcos::barrier *_dfr_jit_phase_barrier; static hpx::lcos::barrier *_dfr_startup_barrier; static size_t num_nodes = 0; #if CONCRETELANG_TIMING_ENABLED static struct timespec init_timer, broadcast_timer, compute_timer, whole_timer; #endif } // namespace } // namespace dfr } // namespace concretelang } // namespace mlir using namespace hpx; typedef struct dfr_refcounted_future { hpx::shared_future *future; std::atomic count; bool cloned_memref_p; dfr_refcounted_future(hpx::shared_future *f, size_t c, bool clone_p) : future(f), count(c), cloned_memref_p(clone_p) {} } dfr_refcounted_future_t, *dfr_refcounted_future_p; // Ready futures are only used as inputs to tasks (never passed to // await_future), so we only need to track the references in task // creation. void *_dfr_make_ready_future(void *in, size_t memref_clone_p) { hpx::future future = hpx::make_ready_future(in); return (void *)new dfr_refcounted_future_t( new hpx::shared_future(std::move(future)), 1, memref_clone_p); } void *_dfr_await_future(void *in) { return static_cast(in)->future->get(); } void _dfr_deallocate_future(void *in) { auto drf = static_cast(in); size_t prev_count = drf->count.fetch_sub(1); if (prev_count == 1) { // If this was a memref for which a clone was needed, deallocate first. if (drf->cloned_memref_p) free( (void *)(static_cast *>(drf->future->get()) ->data)); free(drf->future->get()); delete (drf->future); delete drf; } } void _dfr_deallocate_future_data(void *in) {} // Determine where new task should run. For now just round-robin // distribution - TODO: optimise. static inline size_t _dfr_find_next_execution_locality() { static std::atomic next_locality{1}; size_t next_loc = next_locality.fetch_add(1); return next_loc % mlir::concretelang::dfr::num_nodes; } /// Runtime generic async_task. Each first NUM_PARAMS pairs of /// arguments in the variadic list corresponds to a void* pointer on a /// hpx::future and the size of data within the future. After /// that come NUM_OUTPUTS pairs of hpx::future* and size_t for /// the returns. void _dfr_create_async_task(wfnptr wfn, void *ctx, size_t num_params, size_t num_outputs, ...) { std::vector refcounted_futures; std::vector param_sizes; std::vector param_types; std::vector outputs; std::vector output_sizes; std::vector output_types; va_list args; va_start(args, num_outputs); for (size_t i = 0; i < num_outputs; ++i) { outputs.push_back(va_arg(args, void *)); output_sizes.push_back(va_arg(args, uint64_t)); output_types.push_back(va_arg(args, uint64_t)); } for (size_t i = 0; i < num_params; ++i) { refcounted_futures.push_back(va_arg(args, void *)); param_sizes.push_back(va_arg(args, uint64_t)); param_types.push_back(va_arg(args, uint64_t)); } va_end(args); // Take a reference on each future argument for (auto rcf : refcounted_futures) ((dfr_refcounted_future_p)rcf)->count.fetch_add(1); // We pass functions by name - which is not strictly necessary in // shared memory as pointers suffice, but is needed in the // distributed case where the functions need to be located/loaded on // the node. auto wfnname = mlir::concretelang::dfr::_dfr_node_level_work_function_registry ->getWorkFunctionName((void *)wfn); hpx::future> oodf; // In order to allow complete dataflow semantics for // communication/synchronization, we split tasks in two parts: an // execution body that is scheduled once all input dependences are // satisfied, which generates a future on a tuple of outputs, which // is then further split into a tuple of futures and provide // individual synchronization for each return independently. mlir::concretelang::dfr::GenericComputeClient *gcc_target = &mlir::concretelang::dfr::gcc[_dfr_find_next_execution_locality()]; switch (num_params) { case 0: oodf = std::move(hpx::dataflow( [wfnname, param_sizes, param_types, output_sizes, output_types, gcc_target, ctx]() -> hpx::future { std::vector params = {}; mlir::concretelang::dfr::OpaqueInputData oid( wfnname, params, param_sizes, param_types, output_sizes, output_types, ctx); return gcc_target->execute_task(oid); })); break; case 1: oodf = std::move(hpx::dataflow( [wfnname, param_sizes, param_types, output_sizes, output_types, gcc_target, ctx](hpx::shared_future param0) -> hpx::future { std::vector params = {param0.get()}; mlir::concretelang::dfr::OpaqueInputData oid( wfnname, params, param_sizes, param_types, output_sizes, output_types, ctx); return gcc_target->execute_task(oid); }, *((dfr_refcounted_future_p)refcounted_futures[0])->future)); break; case 2: oodf = std::move(hpx::dataflow( [wfnname, param_sizes, param_types, output_sizes, output_types, gcc_target, ctx](hpx::shared_future param0, hpx::shared_future param1) -> hpx::future { std::vector params = {param0.get(), param1.get()}; mlir::concretelang::dfr::OpaqueInputData oid( wfnname, params, param_sizes, param_types, output_sizes, output_types, ctx); return gcc_target->execute_task(oid); }, *((dfr_refcounted_future_p)refcounted_futures[0])->future, *((dfr_refcounted_future_p)refcounted_futures[1])->future)); break; case 3: oodf = std::move(hpx::dataflow( [wfnname, param_sizes, param_types, output_sizes, output_types, gcc_target, ctx](hpx::shared_future param0, hpx::shared_future param1, hpx::shared_future param2) -> hpx::future { std::vector params = {param0.get(), param1.get(), param2.get()}; mlir::concretelang::dfr::OpaqueInputData oid( wfnname, params, param_sizes, param_types, output_sizes, output_types, ctx); return gcc_target->execute_task(oid); }, *((dfr_refcounted_future_p)refcounted_futures[0])->future, *((dfr_refcounted_future_p)refcounted_futures[1])->future, *((dfr_refcounted_future_p)refcounted_futures[2])->future)); break; case 4: oodf = std::move(hpx::dataflow( [wfnname, param_sizes, param_types, output_sizes, output_types, gcc_target, ctx](hpx::shared_future param0, hpx::shared_future param1, hpx::shared_future param2, hpx::shared_future param3) -> hpx::future { std::vector params = {param0.get(), param1.get(), param2.get(), param3.get()}; mlir::concretelang::dfr::OpaqueInputData oid( wfnname, params, param_sizes, param_types, output_sizes, output_types, ctx); return gcc_target->execute_task(oid); }, *((dfr_refcounted_future_p)refcounted_futures[0])->future, *((dfr_refcounted_future_p)refcounted_futures[1])->future, *((dfr_refcounted_future_p)refcounted_futures[2])->future, *((dfr_refcounted_future_p)refcounted_futures[3])->future)); break; case 5: oodf = std::move(hpx::dataflow( [wfnname, param_sizes, param_types, output_sizes, output_types, gcc_target, ctx](hpx::shared_future param0, hpx::shared_future param1, hpx::shared_future param2, hpx::shared_future param3, hpx::shared_future param4) -> hpx::future { std::vector params = {param0.get(), param1.get(), param2.get(), param3.get(), param4.get()}; mlir::concretelang::dfr::OpaqueInputData oid( wfnname, params, param_sizes, param_types, output_sizes, output_types, ctx); return gcc_target->execute_task(oid); }, *((dfr_refcounted_future_p)refcounted_futures[0])->future, *((dfr_refcounted_future_p)refcounted_futures[1])->future, *((dfr_refcounted_future_p)refcounted_futures[2])->future, *((dfr_refcounted_future_p)refcounted_futures[3])->future, *((dfr_refcounted_future_p)refcounted_futures[4])->future)); break; case 6: oodf = std::move(hpx::dataflow( [wfnname, param_sizes, param_types, output_sizes, output_types, gcc_target, ctx](hpx::shared_future param0, hpx::shared_future param1, hpx::shared_future param2, hpx::shared_future param3, hpx::shared_future param4, hpx::shared_future param5) -> hpx::future { std::vector params = {param0.get(), param1.get(), param2.get(), param3.get(), param4.get(), param5.get()}; mlir::concretelang::dfr::OpaqueInputData oid( wfnname, params, param_sizes, param_types, output_sizes, output_types, ctx); return gcc_target->execute_task(oid); }, *((dfr_refcounted_future_p)refcounted_futures[0])->future, *((dfr_refcounted_future_p)refcounted_futures[1])->future, *((dfr_refcounted_future_p)refcounted_futures[2])->future, *((dfr_refcounted_future_p)refcounted_futures[3])->future, *((dfr_refcounted_future_p)refcounted_futures[4])->future, *((dfr_refcounted_future_p)refcounted_futures[5])->future)); break; case 7: oodf = std::move(hpx::dataflow( [wfnname, param_sizes, param_types, output_sizes, output_types, gcc_target, ctx](hpx::shared_future param0, hpx::shared_future param1, hpx::shared_future param2, hpx::shared_future param3, hpx::shared_future param4, hpx::shared_future param5, hpx::shared_future param6) -> hpx::future { std::vector params = { param0.get(), param1.get(), param2.get(), param3.get(), param4.get(), param5.get(), param6.get()}; mlir::concretelang::dfr::OpaqueInputData oid( wfnname, params, param_sizes, param_types, output_sizes, output_types, ctx); return gcc_target->execute_task(oid); }, *((dfr_refcounted_future_p)refcounted_futures[0])->future, *((dfr_refcounted_future_p)refcounted_futures[1])->future, *((dfr_refcounted_future_p)refcounted_futures[2])->future, *((dfr_refcounted_future_p)refcounted_futures[3])->future, *((dfr_refcounted_future_p)refcounted_futures[4])->future, *((dfr_refcounted_future_p)refcounted_futures[5])->future, *((dfr_refcounted_future_p)refcounted_futures[6])->future)); break; case 8: oodf = std::move(hpx::dataflow( [wfnname, param_sizes, param_types, output_sizes, output_types, gcc_target, ctx](hpx::shared_future param0, hpx::shared_future param1, hpx::shared_future param2, hpx::shared_future param3, hpx::shared_future param4, hpx::shared_future param5, hpx::shared_future param6, hpx::shared_future param7) -> hpx::future { std::vector params = { param0.get(), param1.get(), param2.get(), param3.get(), param4.get(), param5.get(), param6.get(), param7.get()}; mlir::concretelang::dfr::OpaqueInputData oid( wfnname, params, param_sizes, param_types, output_sizes, output_types, ctx); return gcc_target->execute_task(oid); }, *((dfr_refcounted_future_p)refcounted_futures[0])->future, *((dfr_refcounted_future_p)refcounted_futures[1])->future, *((dfr_refcounted_future_p)refcounted_futures[2])->future, *((dfr_refcounted_future_p)refcounted_futures[3])->future, *((dfr_refcounted_future_p)refcounted_futures[4])->future, *((dfr_refcounted_future_p)refcounted_futures[5])->future, *((dfr_refcounted_future_p)refcounted_futures[6])->future, *((dfr_refcounted_future_p)refcounted_futures[7])->future)); break; case 9: oodf = std::move(hpx::dataflow( [wfnname, param_sizes, param_types, output_sizes, output_types, gcc_target, ctx](hpx::shared_future param0, hpx::shared_future param1, hpx::shared_future param2, hpx::shared_future param3, hpx::shared_future param4, hpx::shared_future param5, hpx::shared_future param6, hpx::shared_future param7, hpx::shared_future param8) -> hpx::future { std::vector params = { param0.get(), param1.get(), param2.get(), param3.get(), param4.get(), param5.get(), param6.get(), param7.get(), param8.get()}; mlir::concretelang::dfr::OpaqueInputData oid( wfnname, params, param_sizes, param_types, output_sizes, output_types, ctx); return gcc_target->execute_task(oid); }, *((dfr_refcounted_future_p)refcounted_futures[0])->future, *((dfr_refcounted_future_p)refcounted_futures[1])->future, *((dfr_refcounted_future_p)refcounted_futures[2])->future, *((dfr_refcounted_future_p)refcounted_futures[3])->future, *((dfr_refcounted_future_p)refcounted_futures[4])->future, *((dfr_refcounted_future_p)refcounted_futures[5])->future, *((dfr_refcounted_future_p)refcounted_futures[6])->future, *((dfr_refcounted_future_p)refcounted_futures[7])->future, *((dfr_refcounted_future_p)refcounted_futures[8])->future)); break; case 10: oodf = std::move(hpx::dataflow( [wfnname, param_sizes, param_types, output_sizes, output_types, gcc_target, ctx](hpx::shared_future param0, hpx::shared_future param1, hpx::shared_future param2, hpx::shared_future param3, hpx::shared_future param4, hpx::shared_future param5, hpx::shared_future param6, hpx::shared_future param7, hpx::shared_future param8, hpx::shared_future param9) -> hpx::future { std::vector params = { param0.get(), param1.get(), param2.get(), param3.get(), param4.get(), param5.get(), param6.get(), param7.get(), param8.get(), param9.get()}; mlir::concretelang::dfr::OpaqueInputData oid( wfnname, params, param_sizes, param_types, output_sizes, output_types, ctx); return gcc_target->execute_task(oid); }, *((dfr_refcounted_future_p)refcounted_futures[0])->future, *((dfr_refcounted_future_p)refcounted_futures[1])->future, *((dfr_refcounted_future_p)refcounted_futures[2])->future, *((dfr_refcounted_future_p)refcounted_futures[3])->future, *((dfr_refcounted_future_p)refcounted_futures[4])->future, *((dfr_refcounted_future_p)refcounted_futures[5])->future, *((dfr_refcounted_future_p)refcounted_futures[6])->future, *((dfr_refcounted_future_p)refcounted_futures[7])->future, *((dfr_refcounted_future_p)refcounted_futures[8])->future, *((dfr_refcounted_future_p)refcounted_futures[9])->future)); break; case 11: oodf = std::move(hpx::dataflow( [wfnname, param_sizes, param_types, output_sizes, output_types, gcc_target, ctx](hpx::shared_future param0, hpx::shared_future param1, hpx::shared_future param2, hpx::shared_future param3, hpx::shared_future param4, hpx::shared_future param5, hpx::shared_future param6, hpx::shared_future param7, hpx::shared_future param8, hpx::shared_future param9, hpx::shared_future param10) -> hpx::future { std::vector params = { param0.get(), param1.get(), param2.get(), param3.get(), param4.get(), param5.get(), param6.get(), param7.get(), param8.get(), param9.get(), param10.get()}; mlir::concretelang::dfr::OpaqueInputData oid( wfnname, params, param_sizes, param_types, output_sizes, output_types, ctx); return gcc_target->execute_task(oid); }, *((dfr_refcounted_future_p)refcounted_futures[0])->future, *((dfr_refcounted_future_p)refcounted_futures[1])->future, *((dfr_refcounted_future_p)refcounted_futures[2])->future, *((dfr_refcounted_future_p)refcounted_futures[3])->future, *((dfr_refcounted_future_p)refcounted_futures[4])->future, *((dfr_refcounted_future_p)refcounted_futures[5])->future, *((dfr_refcounted_future_p)refcounted_futures[6])->future, *((dfr_refcounted_future_p)refcounted_futures[7])->future, *((dfr_refcounted_future_p)refcounted_futures[8])->future, *((dfr_refcounted_future_p)refcounted_futures[9])->future, *((dfr_refcounted_future_p)refcounted_futures[10])->future)); break; case 12: oodf = std::move(hpx::dataflow( [wfnname, param_sizes, param_types, output_sizes, output_types, gcc_target, ctx](hpx::shared_future param0, hpx::shared_future param1, hpx::shared_future param2, hpx::shared_future param3, hpx::shared_future param4, hpx::shared_future param5, hpx::shared_future param6, hpx::shared_future param7, hpx::shared_future param8, hpx::shared_future param9, hpx::shared_future param10, hpx::shared_future param11) -> hpx::future { std::vector params = { param0.get(), param1.get(), param2.get(), param3.get(), param4.get(), param5.get(), param6.get(), param7.get(), param8.get(), param9.get(), param10.get(), param11.get()}; mlir::concretelang::dfr::OpaqueInputData oid( wfnname, params, param_sizes, param_types, output_sizes, output_types, ctx); return gcc_target->execute_task(oid); }, *((dfr_refcounted_future_p)refcounted_futures[0])->future, *((dfr_refcounted_future_p)refcounted_futures[1])->future, *((dfr_refcounted_future_p)refcounted_futures[2])->future, *((dfr_refcounted_future_p)refcounted_futures[3])->future, *((dfr_refcounted_future_p)refcounted_futures[4])->future, *((dfr_refcounted_future_p)refcounted_futures[5])->future, *((dfr_refcounted_future_p)refcounted_futures[6])->future, *((dfr_refcounted_future_p)refcounted_futures[7])->future, *((dfr_refcounted_future_p)refcounted_futures[8])->future, *((dfr_refcounted_future_p)refcounted_futures[9])->future, *((dfr_refcounted_future_p)refcounted_futures[10])->future, *((dfr_refcounted_future_p)refcounted_futures[11])->future)); break; case 13: oodf = std::move(hpx::dataflow( [wfnname, param_sizes, param_types, output_sizes, output_types, gcc_target, ctx](hpx::shared_future param0, hpx::shared_future param1, hpx::shared_future param2, hpx::shared_future param3, hpx::shared_future param4, hpx::shared_future param5, hpx::shared_future param6, hpx::shared_future param7, hpx::shared_future param8, hpx::shared_future param9, hpx::shared_future param10, hpx::shared_future param11, hpx::shared_future param12) -> hpx::future { std::vector params = { param0.get(), param1.get(), param2.get(), param3.get(), param4.get(), param5.get(), param6.get(), param7.get(), param8.get(), param9.get(), param10.get(), param11.get(), param12.get()}; mlir::concretelang::dfr::OpaqueInputData oid( wfnname, params, param_sizes, param_types, output_sizes, output_types, ctx); return gcc_target->execute_task(oid); }, *((dfr_refcounted_future_p)refcounted_futures[0])->future, *((dfr_refcounted_future_p)refcounted_futures[1])->future, *((dfr_refcounted_future_p)refcounted_futures[2])->future, *((dfr_refcounted_future_p)refcounted_futures[3])->future, *((dfr_refcounted_future_p)refcounted_futures[4])->future, *((dfr_refcounted_future_p)refcounted_futures[5])->future, *((dfr_refcounted_future_p)refcounted_futures[6])->future, *((dfr_refcounted_future_p)refcounted_futures[7])->future, *((dfr_refcounted_future_p)refcounted_futures[8])->future, *((dfr_refcounted_future_p)refcounted_futures[9])->future, *((dfr_refcounted_future_p)refcounted_futures[10])->future, *((dfr_refcounted_future_p)refcounted_futures[11])->future, *((dfr_refcounted_future_p)refcounted_futures[12])->future)); break; case 14: oodf = std::move(hpx::dataflow( [wfnname, param_sizes, param_types, output_sizes, output_types, gcc_target, ctx](hpx::shared_future param0, hpx::shared_future param1, hpx::shared_future param2, hpx::shared_future param3, hpx::shared_future param4, hpx::shared_future param5, hpx::shared_future param6, hpx::shared_future param7, hpx::shared_future param8, hpx::shared_future param9, hpx::shared_future param10, hpx::shared_future param11, hpx::shared_future param12, hpx::shared_future param13) -> hpx::future { std::vector params = { param0.get(), param1.get(), param2.get(), param3.get(), param4.get(), param5.get(), param6.get(), param7.get(), param8.get(), param9.get(), param10.get(), param11.get(), param12.get(), param13.get()}; mlir::concretelang::dfr::OpaqueInputData oid( wfnname, params, param_sizes, param_types, output_sizes, output_types, ctx); return gcc_target->execute_task(oid); }, *((dfr_refcounted_future_p)refcounted_futures[0])->future, *((dfr_refcounted_future_p)refcounted_futures[1])->future, *((dfr_refcounted_future_p)refcounted_futures[2])->future, *((dfr_refcounted_future_p)refcounted_futures[3])->future, *((dfr_refcounted_future_p)refcounted_futures[4])->future, *((dfr_refcounted_future_p)refcounted_futures[5])->future, *((dfr_refcounted_future_p)refcounted_futures[6])->future, *((dfr_refcounted_future_p)refcounted_futures[7])->future, *((dfr_refcounted_future_p)refcounted_futures[8])->future, *((dfr_refcounted_future_p)refcounted_futures[9])->future, *((dfr_refcounted_future_p)refcounted_futures[10])->future, *((dfr_refcounted_future_p)refcounted_futures[11])->future, *((dfr_refcounted_future_p)refcounted_futures[12])->future, *((dfr_refcounted_future_p)refcounted_futures[13])->future)); break; case 15: oodf = std::move(hpx::dataflow( [wfnname, param_sizes, param_types, output_sizes, output_types, gcc_target, ctx](hpx::shared_future param0, hpx::shared_future param1, hpx::shared_future param2, hpx::shared_future param3, hpx::shared_future param4, hpx::shared_future param5, hpx::shared_future param6, hpx::shared_future param7, hpx::shared_future param8, hpx::shared_future param9, hpx::shared_future param10, hpx::shared_future param11, hpx::shared_future param12, hpx::shared_future param13, hpx::shared_future param14) -> hpx::future { std::vector params = { param0.get(), param1.get(), param2.get(), param3.get(), param4.get(), param5.get(), param6.get(), param7.get(), param8.get(), param9.get(), param10.get(), param11.get(), param12.get(), param13.get(), param14.get()}; mlir::concretelang::dfr::OpaqueInputData oid( wfnname, params, param_sizes, param_types, output_sizes, output_types, ctx); return gcc_target->execute_task(oid); }, *((dfr_refcounted_future_p)refcounted_futures[0])->future, *((dfr_refcounted_future_p)refcounted_futures[1])->future, *((dfr_refcounted_future_p)refcounted_futures[2])->future, *((dfr_refcounted_future_p)refcounted_futures[3])->future, *((dfr_refcounted_future_p)refcounted_futures[4])->future, *((dfr_refcounted_future_p)refcounted_futures[5])->future, *((dfr_refcounted_future_p)refcounted_futures[6])->future, *((dfr_refcounted_future_p)refcounted_futures[7])->future, *((dfr_refcounted_future_p)refcounted_futures[8])->future, *((dfr_refcounted_future_p)refcounted_futures[9])->future, *((dfr_refcounted_future_p)refcounted_futures[10])->future, *((dfr_refcounted_future_p)refcounted_futures[11])->future, *((dfr_refcounted_future_p)refcounted_futures[12])->future, *((dfr_refcounted_future_p)refcounted_futures[13])->future, *((dfr_refcounted_future_p)refcounted_futures[14])->future)); break; case 16: oodf = std::move(hpx::dataflow( [wfnname, param_sizes, param_types, output_sizes, output_types, gcc_target, ctx](hpx::shared_future param0, hpx::shared_future param1, hpx::shared_future param2, hpx::shared_future param3, hpx::shared_future param4, hpx::shared_future param5, hpx::shared_future param6, hpx::shared_future param7, hpx::shared_future param8, hpx::shared_future param9, hpx::shared_future param10, hpx::shared_future param11, hpx::shared_future param12, hpx::shared_future param13, hpx::shared_future param14, hpx::shared_future param15) -> hpx::future { std::vector params = { param0.get(), param1.get(), param2.get(), param3.get(), param4.get(), param5.get(), param6.get(), param7.get(), param8.get(), param9.get(), param10.get(), param11.get(), param12.get(), param13.get(), param14.get(), param15.get()}; mlir::concretelang::dfr::OpaqueInputData oid( wfnname, params, param_sizes, param_types, output_sizes, output_types, ctx); return gcc_target->execute_task(oid); }, *((dfr_refcounted_future_p)refcounted_futures[0])->future, *((dfr_refcounted_future_p)refcounted_futures[1])->future, *((dfr_refcounted_future_p)refcounted_futures[2])->future, *((dfr_refcounted_future_p)refcounted_futures[3])->future, *((dfr_refcounted_future_p)refcounted_futures[4])->future, *((dfr_refcounted_future_p)refcounted_futures[5])->future, *((dfr_refcounted_future_p)refcounted_futures[6])->future, *((dfr_refcounted_future_p)refcounted_futures[7])->future, *((dfr_refcounted_future_p)refcounted_futures[8])->future, *((dfr_refcounted_future_p)refcounted_futures[9])->future, *((dfr_refcounted_future_p)refcounted_futures[10])->future, *((dfr_refcounted_future_p)refcounted_futures[11])->future, *((dfr_refcounted_future_p)refcounted_futures[12])->future, *((dfr_refcounted_future_p)refcounted_futures[13])->future, *((dfr_refcounted_future_p)refcounted_futures[14])->future, *((dfr_refcounted_future_p)refcounted_futures[15])->future)); break; case 17: oodf = std::move(hpx::dataflow( [wfnname, param_sizes, param_types, output_sizes, output_types, gcc_target, ctx](hpx::shared_future param0, hpx::shared_future param1, hpx::shared_future param2, hpx::shared_future param3, hpx::shared_future param4, hpx::shared_future param5, hpx::shared_future param6, hpx::shared_future param7, hpx::shared_future param8, hpx::shared_future param9, hpx::shared_future param10, hpx::shared_future param11, hpx::shared_future param12, hpx::shared_future param13, hpx::shared_future param14, hpx::shared_future param15, hpx::shared_future param16) -> hpx::future { std::vector params = { param0.get(), param1.get(), param2.get(), param3.get(), param4.get(), param5.get(), param6.get(), param7.get(), param8.get(), param9.get(), param10.get(), param11.get(), param12.get(), param13.get(), param14.get(), param15.get(), param16.get()}; mlir::concretelang::dfr::OpaqueInputData oid( wfnname, params, param_sizes, param_types, output_sizes, output_types, ctx); return gcc_target->execute_task(oid); }, *((dfr_refcounted_future_p)refcounted_futures[0])->future, *((dfr_refcounted_future_p)refcounted_futures[1])->future, *((dfr_refcounted_future_p)refcounted_futures[2])->future, *((dfr_refcounted_future_p)refcounted_futures[3])->future, *((dfr_refcounted_future_p)refcounted_futures[4])->future, *((dfr_refcounted_future_p)refcounted_futures[5])->future, *((dfr_refcounted_future_p)refcounted_futures[6])->future, *((dfr_refcounted_future_p)refcounted_futures[7])->future, *((dfr_refcounted_future_p)refcounted_futures[8])->future, *((dfr_refcounted_future_p)refcounted_futures[9])->future, *((dfr_refcounted_future_p)refcounted_futures[10])->future, *((dfr_refcounted_future_p)refcounted_futures[11])->future, *((dfr_refcounted_future_p)refcounted_futures[12])->future, *((dfr_refcounted_future_p)refcounted_futures[13])->future, *((dfr_refcounted_future_p)refcounted_futures[14])->future, *((dfr_refcounted_future_p)refcounted_futures[15])->future, *((dfr_refcounted_future_p)refcounted_futures[16])->future)); break; case 18: oodf = std::move(hpx::dataflow( [wfnname, param_sizes, param_types, output_sizes, output_types, gcc_target, ctx](hpx::shared_future param0, hpx::shared_future param1, hpx::shared_future param2, hpx::shared_future param3, hpx::shared_future param4, hpx::shared_future param5, hpx::shared_future param6, hpx::shared_future param7, hpx::shared_future param8, hpx::shared_future param9, hpx::shared_future param10, hpx::shared_future param11, hpx::shared_future param12, hpx::shared_future param13, hpx::shared_future param14, hpx::shared_future param15, hpx::shared_future param16, hpx::shared_future param17) -> hpx::future { std::vector params = { param0.get(), param1.get(), param2.get(), param3.get(), param4.get(), param5.get(), param6.get(), param7.get(), param8.get(), param9.get(), param10.get(), param11.get(), param12.get(), param13.get(), param14.get(), param15.get(), param16.get(), param17.get()}; mlir::concretelang::dfr::OpaqueInputData oid( wfnname, params, param_sizes, param_types, output_sizes, output_types, ctx); return gcc_target->execute_task(oid); }, *((dfr_refcounted_future_p)refcounted_futures[0])->future, *((dfr_refcounted_future_p)refcounted_futures[1])->future, *((dfr_refcounted_future_p)refcounted_futures[2])->future, *((dfr_refcounted_future_p)refcounted_futures[3])->future, *((dfr_refcounted_future_p)refcounted_futures[4])->future, *((dfr_refcounted_future_p)refcounted_futures[5])->future, *((dfr_refcounted_future_p)refcounted_futures[6])->future, *((dfr_refcounted_future_p)refcounted_futures[7])->future, *((dfr_refcounted_future_p)refcounted_futures[8])->future, *((dfr_refcounted_future_p)refcounted_futures[9])->future, *((dfr_refcounted_future_p)refcounted_futures[10])->future, *((dfr_refcounted_future_p)refcounted_futures[11])->future, *((dfr_refcounted_future_p)refcounted_futures[12])->future, *((dfr_refcounted_future_p)refcounted_futures[13])->future, *((dfr_refcounted_future_p)refcounted_futures[14])->future, *((dfr_refcounted_future_p)refcounted_futures[15])->future, *((dfr_refcounted_future_p)refcounted_futures[16])->future, *((dfr_refcounted_future_p)refcounted_futures[17])->future)); break; case 19: oodf = std::move(hpx::dataflow( [wfnname, param_sizes, param_types, output_sizes, output_types, gcc_target, ctx](hpx::shared_future param0, hpx::shared_future param1, hpx::shared_future param2, hpx::shared_future param3, hpx::shared_future param4, hpx::shared_future param5, hpx::shared_future param6, hpx::shared_future param7, hpx::shared_future param8, hpx::shared_future param9, hpx::shared_future param10, hpx::shared_future param11, hpx::shared_future param12, hpx::shared_future param13, hpx::shared_future param14, hpx::shared_future param15, hpx::shared_future param16, hpx::shared_future param17, hpx::shared_future param18) -> hpx::future { std::vector params = { param0.get(), param1.get(), param2.get(), param3.get(), param4.get(), param5.get(), param6.get(), param7.get(), param8.get(), param9.get(), param10.get(), param11.get(), param12.get(), param13.get(), param14.get(), param15.get(), param16.get(), param17.get(), param18.get()}; mlir::concretelang::dfr::OpaqueInputData oid( wfnname, params, param_sizes, param_types, output_sizes, output_types, ctx); return gcc_target->execute_task(oid); }, *((dfr_refcounted_future_p)refcounted_futures[0])->future, *((dfr_refcounted_future_p)refcounted_futures[1])->future, *((dfr_refcounted_future_p)refcounted_futures[2])->future, *((dfr_refcounted_future_p)refcounted_futures[3])->future, *((dfr_refcounted_future_p)refcounted_futures[4])->future, *((dfr_refcounted_future_p)refcounted_futures[5])->future, *((dfr_refcounted_future_p)refcounted_futures[6])->future, *((dfr_refcounted_future_p)refcounted_futures[7])->future, *((dfr_refcounted_future_p)refcounted_futures[8])->future, *((dfr_refcounted_future_p)refcounted_futures[9])->future, *((dfr_refcounted_future_p)refcounted_futures[10])->future, *((dfr_refcounted_future_p)refcounted_futures[11])->future, *((dfr_refcounted_future_p)refcounted_futures[12])->future, *((dfr_refcounted_future_p)refcounted_futures[13])->future, *((dfr_refcounted_future_p)refcounted_futures[14])->future, *((dfr_refcounted_future_p)refcounted_futures[15])->future, *((dfr_refcounted_future_p)refcounted_futures[16])->future, *((dfr_refcounted_future_p)refcounted_futures[17])->future, *((dfr_refcounted_future_p)refcounted_futures[18])->future)); break; case 20: oodf = std::move(hpx::dataflow( [wfnname, param_sizes, param_types, output_sizes, output_types, gcc_target, ctx](hpx::shared_future param0, hpx::shared_future param1, hpx::shared_future param2, hpx::shared_future param3, hpx::shared_future param4, hpx::shared_future param5, hpx::shared_future param6, hpx::shared_future param7, hpx::shared_future param8, hpx::shared_future param9, hpx::shared_future param10, hpx::shared_future param11, hpx::shared_future param12, hpx::shared_future param13, hpx::shared_future param14, hpx::shared_future param15, hpx::shared_future param16, hpx::shared_future param17, hpx::shared_future param18, hpx::shared_future param19) -> hpx::future { std::vector params = { param0.get(), param1.get(), param2.get(), param3.get(), param4.get(), param5.get(), param6.get(), param7.get(), param8.get(), param9.get(), param10.get(), param11.get(), param12.get(), param13.get(), param14.get(), param15.get(), param16.get(), param17.get(), param18.get(), param19.get()}; mlir::concretelang::dfr::OpaqueInputData oid( wfnname, params, param_sizes, param_types, output_sizes, output_types, ctx); return gcc_target->execute_task(oid); }, *((dfr_refcounted_future_p)refcounted_futures[0])->future, *((dfr_refcounted_future_p)refcounted_futures[1])->future, *((dfr_refcounted_future_p)refcounted_futures[2])->future, *((dfr_refcounted_future_p)refcounted_futures[3])->future, *((dfr_refcounted_future_p)refcounted_futures[4])->future, *((dfr_refcounted_future_p)refcounted_futures[5])->future, *((dfr_refcounted_future_p)refcounted_futures[6])->future, *((dfr_refcounted_future_p)refcounted_futures[7])->future, *((dfr_refcounted_future_p)refcounted_futures[8])->future, *((dfr_refcounted_future_p)refcounted_futures[9])->future, *((dfr_refcounted_future_p)refcounted_futures[10])->future, *((dfr_refcounted_future_p)refcounted_futures[11])->future, *((dfr_refcounted_future_p)refcounted_futures[12])->future, *((dfr_refcounted_future_p)refcounted_futures[13])->future, *((dfr_refcounted_future_p)refcounted_futures[14])->future, *((dfr_refcounted_future_p)refcounted_futures[15])->future, *((dfr_refcounted_future_p)refcounted_futures[16])->future, *((dfr_refcounted_future_p)refcounted_futures[17])->future, *((dfr_refcounted_future_p)refcounted_futures[18])->future, *((dfr_refcounted_future_p)refcounted_futures[19])->future)); break; default: HPX_THROW_EXCEPTION(hpx::no_success, "_dfr_create_async_task", "Error: number of task parameters not supported."); } switch (num_outputs) { case 1: *((void **)outputs[0]) = (void *)new dfr_refcounted_future_t( new hpx::shared_future(hpx::dataflow( [refcounted_futures]( hpx::future oodf_in) -> void * { void *ret = oodf_in.get().outputs[0]; for (auto rcf : refcounted_futures) _dfr_deallocate_future(rcf); return ret; }, oodf)), 1, output_types[0] == mlir::concretelang::dfr::_DFR_TASK_ARG_MEMREF); break; case 2: { hpx::future> &&ft = hpx::dataflow( [refcounted_futures]( hpx::future oodf_in) -> hpx::tuple { std::vector outputs = std::move(oodf_in.get().outputs); for (auto rcf : refcounted_futures) _dfr_deallocate_future(rcf); return hpx::make_tuple<>(outputs[0], outputs[1]); }, oodf); hpx::tuple, hpx::future> &&tf = hpx::split_future(std::move(ft)); *((void **)outputs[0]) = (void *)new dfr_refcounted_future_t( new hpx::shared_future(std::move(hpx::get<0>(tf))), 1, output_types[0] == mlir::concretelang::dfr::_DFR_TASK_ARG_MEMREF); *((void **)outputs[1]) = (void *)new dfr_refcounted_future_t( new hpx::shared_future(std::move(hpx::get<1>(tf))), 1, output_types[1] == mlir::concretelang::dfr::_DFR_TASK_ARG_MEMREF); break; } case 3: { hpx::future> &&ft = hpx::dataflow( [refcounted_futures]( hpx::future oodf_in) -> hpx::tuple { std::vector outputs = std::move(oodf_in.get().outputs); for (auto rcf : refcounted_futures) _dfr_deallocate_future(rcf); return hpx::make_tuple<>(outputs[0], outputs[1], outputs[2]); }, oodf); hpx::tuple, hpx::future, hpx::future> &&tf = hpx::split_future(std::move(ft)); *((void **)outputs[0]) = (void *)new dfr_refcounted_future_t( new hpx::shared_future(std::move(hpx::get<0>(tf))), 1, output_types[0] == mlir::concretelang::dfr::_DFR_TASK_ARG_MEMREF); *((void **)outputs[1]) = (void *)new dfr_refcounted_future_t( new hpx::shared_future(std::move(hpx::get<1>(tf))), 1, output_types[1] == mlir::concretelang::dfr::_DFR_TASK_ARG_MEMREF); *((void **)outputs[2]) = (void *)new dfr_refcounted_future_t( new hpx::shared_future(std::move(hpx::get<2>(tf))), 1, output_types[2] == mlir::concretelang::dfr::_DFR_TASK_ARG_MEMREF); break; } default: HPX_THROW_EXCEPTION(hpx::no_success, "_dfr_create_async_task", "Error: number of task outputs not supported."); } } /***************************/ /* JIT execution support. */ /***************************/ void _dfr_try_initialize(); namespace mlir { namespace concretelang { namespace dfr { namespace { static bool dfr_required_p = false; static bool is_jit_p = false; static bool is_root_node_p = true; static bool use_omp_p = false; } // namespace void _dfr_set_required(bool is_required) { mlir::concretelang::dfr::dfr_required_p = is_required; if (mlir::concretelang::dfr::dfr_required_p) { _dfr_try_initialize(); } } void _dfr_set_jit(bool is_jit) { mlir::concretelang::dfr::is_jit_p = is_jit; } void _dfr_set_use_omp(bool use_omp) { mlir::concretelang::dfr::use_omp_p = use_omp; } bool _dfr_is_jit() { return mlir::concretelang::dfr::is_jit_p; } bool _dfr_is_root_node() { return mlir::concretelang::dfr::is_root_node_p; } bool _dfr_use_omp() { return mlir::concretelang::dfr::use_omp_p; } bool _dfr_is_distributed() { return num_nodes > 1; } } // namespace dfr } // namespace concretelang } // namespace mlir void _dfr_register_work_function(wfnptr wfn) { mlir::concretelang::dfr::_dfr_node_level_work_function_registry ->getWorkFunctionName((void *)wfn); } /************************************/ /* Initialization & Finalization. */ /************************************/ namespace mlir { namespace concretelang { namespace dfr { namespace { static std::atomic init_guard = {0}; static uint64_t uninitialised = 0; static uint64_t active = 1; static uint64_t terminated = 2; } // namespace } // namespace dfr } // namespace concretelang } // namespace mlir static inline void _dfr_stop_impl() { if (mlir::concretelang::dfr::_dfr_is_root_node()) hpx::apply([]() { hpx::finalize(); }); hpx::stop(); if (!mlir::concretelang::dfr::_dfr_is_root_node()) exit(EXIT_SUCCESS); } static inline void _dfr_start_impl(int argc, char *argv[]) { BEGIN_TIME(&mlir::concretelang::dfr::init_timer); mlir::concretelang::dfr::dl_handle = dlopen(nullptr, RTLD_NOW); // If OpenMP is to be used, we need to force its initialization // before thread binding occurs. Otherwise OMP threads will be bound // to the core of the thread initializing the OMP runtime. if (mlir::concretelang::dfr::_dfr_use_omp()) { #pragma omp parallel shared(mlir::concretelang::dfr::use_omp_p) { #pragma omp critical mlir::concretelang::dfr::use_omp_p = true; } } if (argc == 0) { int nCores, nOMPThreads, nHPXThreads; std::string hpxThreadNum; std::vector parameters; parameters.push_back(const_cast("__dummy_dfr_HPX_program_name__")); hwloc_topology_t topology; hwloc_topology_init(&topology); hwloc_topology_set_all_types_filter(topology, HWLOC_TYPE_FILTER_KEEP_NONE); hwloc_topology_set_type_filter(topology, HWLOC_OBJ_CORE, HWLOC_TYPE_FILTER_KEEP_ALL); hwloc_topology_load(topology); nCores = hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_CORE); if (nCores < 1) nCores = 1; // We do not directly handle this, but we should take into account // the choices made by the OpenMP runtime if we would be mixing // loop & dataflow parallelism. char *env = getenv("OMP_NUM_THREADS"); if (mlir::concretelang::dfr::_dfr_use_omp() && env != nullptr) nOMPThreads = strtoul(env, NULL, 10); else if (mlir::concretelang::dfr::_dfr_use_omp()) nOMPThreads = nCores; else nOMPThreads = 1; // Unless specified, we will consider that within each node loop // parallelism is the priority, so we would allocate either // ncores/OMP_NUM_THREADS or ncores-OMP_NUM_THREADS+1. Both make // sense depending on whether we have very regular computation or // not - the latter being more conservative in that we will // exploit all cores, at the risk of oversubscribing. Ideally the // distribution of hardware resources to the runtime systems // should be explicitly defined by the user. env = getenv("DFR_NUM_THREADS"); if (env != nullptr) { nHPXThreads = strtoul(env, NULL, 10); parameters.push_back(const_cast("--hpx:threads")); hpxThreadNum = std::to_string(nHPXThreads); parameters.push_back(const_cast(hpxThreadNum.c_str())); } else nHPXThreads = nCores + 1 - nOMPThreads; if (nHPXThreads < 1) nHPXThreads = 1; // If the user does not provide their own config file, one is by // default located at the root of the concrete-compiler directory. env = getenv("HPX_CONFIG_FILE"); // If no file is provided, try and check that the default is // available - otherwise use a basic default configuration. #ifdef HPX_DEFAULT_CONFIG_FILE if (env == nullptr) if (access(HPX_DEFAULT_CONFIG_FILE, F_OK) == 0) env = const_cast(HPX_DEFAULT_CONFIG_FILE); #endif if (env != nullptr) { parameters.push_back(const_cast("--hpx:config")); parameters.push_back(const_cast(env)); hpx::start(nullptr, parameters.size(), parameters.data()); } else { // Last resort configuration in case no config file could be // identified, provide some default values that make (some) // sense for homomorphic computations (stacks need to reflect // the size of ciphertexts rather than simple cleartext // scalars). if (std::find(parameters.begin(), parameters.end(), "--hpx:threads") == parameters.end()) { parameters.push_back(const_cast("--hpx:threads")); hpxThreadNum = std::to_string(nHPXThreads); parameters.push_back(const_cast(hpxThreadNum.c_str())); } parameters.push_back( const_cast("--hpx:ini=hpx.stacks.small_size=0x8000000")); parameters.push_back( const_cast("--hpx:ini=hpx.stacks.medium_size=0x10000000")); parameters.push_back( const_cast("--hpx:ini=hpx.stacks.large_size=0x20000000")); parameters.push_back( const_cast("--hpx:ini=hpx.stacks.huge_size=0x40000000")); hpx::start(nullptr, parameters.size(), parameters.data()); } } else { hpx::start(nullptr, argc, argv); } // Instantiate and initialise on each node mlir::concretelang::dfr::is_root_node_p = (hpx::find_here() == hpx::find_root_locality()); mlir::concretelang::dfr::num_nodes = hpx::get_num_localities().get(); new mlir::concretelang::dfr::WorkFunctionRegistry(); mlir::concretelang::dfr::_dfr_jit_phase_barrier = new hpx::lcos::barrier( "phase_barrier", mlir::concretelang::dfr::num_nodes, hpx::get_locality_id()); mlir::concretelang::dfr::_dfr_startup_barrier = new hpx::lcos::barrier( "startup_barrier", mlir::concretelang::dfr::num_nodes, hpx::get_locality_id()); if (mlir::concretelang::dfr::_dfr_is_root_node()) { // Create compute server components on each node - from the root // node only - and the corresponding compute client on the root // node. mlir::concretelang::dfr::gcc = hpx::new_( hpx::default_layout(hpx::find_all_localities()), mlir::concretelang::dfr::num_nodes) .get(); } END_TIME(&mlir::concretelang::dfr::init_timer, "Initialization"); } /* Start/stop functions to be called from within user code (or during JIT invocation). These serve to pause/resume the runtime scheduler and to clean up used resources. */ void _dfr_start(int64_t use_dfr_p, void *ctx) { BEGIN_TIME(&mlir::concretelang::dfr::whole_timer); if (use_dfr_p) { // The first invocation will initialise the runtime. As each call to // _dfr_start is matched with _dfr_stop, if this is not hte first, // we need to resume the HPX runtime. assert(mlir::concretelang::dfr::init_guard != mlir::concretelang::dfr::terminated && "DFR runtime: attempting to start runtime after it has been " "terminated"); uint64_t expected = mlir::concretelang::dfr::uninitialised; if (mlir::concretelang::dfr::init_guard.compare_exchange_strong( expected, mlir::concretelang::dfr::active)) _dfr_start_impl(0, nullptr); assert(mlir::concretelang::dfr::init_guard == mlir::concretelang::dfr::active && "DFR runtime failed to initialise"); // If this is not the root node in a non-JIT execution, then this // node should only run the scheduler for any incoming work until // termination is flagged. If this is JIT, we need to run the // cancelled function which registers the work functions. if (!mlir::concretelang::dfr::_dfr_is_root_node() && !mlir::concretelang::dfr::_dfr_is_jit()) _dfr_stop_impl(); } // If DFR is used and a runtime context is needed, and execution is // distributed, then broadcast from root to all compute nodes. if (use_dfr_p && (mlir::concretelang::dfr::num_nodes > 1) && (ctx || !mlir::concretelang::dfr::_dfr_is_root_node())) { BEGIN_TIME(&mlir::concretelang::dfr::broadcast_timer); new mlir::concretelang::dfr::RuntimeContextManager(); mlir::concretelang::dfr::_dfr_node_level_runtime_context_manager ->setContext(ctx); // If this is not JIT, then the remote nodes never reach _dfr_stop, // so root should not instantiate this barrier. if (mlir::concretelang::dfr::_dfr_is_root_node() && mlir::concretelang::dfr::_dfr_is_jit()) mlir::concretelang::dfr::_dfr_startup_barrier->wait(); END_TIME(&mlir::concretelang::dfr::broadcast_timer, "Key broadcasting"); } BEGIN_TIME(&mlir::concretelang::dfr::compute_timer); } // This function cannot be used to terminate the runtime as it is // non-decidable if another computation phase will follow. Instead the // _dfr_terminate function provides this facility and is normally // called on exit from "main" when not using the main wrapper library. void _dfr_stop(int64_t use_dfr_p) { if (use_dfr_p) { if (mlir::concretelang::dfr::num_nodes > 1) { // Non-root nodes synchronize here with the root to mark the point // where the root is free to send work out (only needed in JIT). if (!mlir::concretelang::dfr::_dfr_is_root_node()) mlir::concretelang::dfr::_dfr_startup_barrier->wait(); // The barrier is only needed to synchronize the different // computation phases when the compute nodes need to generate and // register new work functions in each phase. // TODO: this barrier may be removed based on how work function // registration is handled - but it is unlikely to result in much // gain as the root node would be waiting for the end of computation // on all remote nodes before reaching here anyway (dataflow // dependences). if (mlir::concretelang::dfr::_dfr_is_jit()) { mlir::concretelang::dfr::_dfr_jit_phase_barrier->wait(); } mlir::concretelang::dfr::_dfr_node_level_runtime_context_manager ->clearContext(); } } END_TIME(&mlir::concretelang::dfr::compute_timer, "Compute"); END_TIME(&mlir::concretelang::dfr::whole_timer, "Total execution"); } void _dfr_try_initialize() { // Initialize and immediately suspend the HPX runtime if not yet done. uint64_t expected = mlir::concretelang::dfr::uninitialised; if (mlir::concretelang::dfr::init_guard.compare_exchange_strong( expected, mlir::concretelang::dfr::active)) { _dfr_start_impl(0, nullptr); } assert(mlir::concretelang::dfr::init_guard == mlir::concretelang::dfr::active && "DFR runtime failed to initialise"); } void _dfr_terminate() { uint64_t expected = mlir::concretelang::dfr::active; if (mlir::concretelang::dfr::init_guard.compare_exchange_strong( expected, mlir::concretelang::dfr::terminated)) _dfr_stop_impl(); assert((mlir::concretelang::dfr::init_guard == mlir::concretelang::dfr::terminated || mlir::concretelang::dfr::init_guard == mlir::concretelang::dfr::uninitialised) && "DFR runtime failed to terminate"); } /*******************/ /* Main wrapper. */ /*******************/ extern "C" { extern int main(int argc, char *argv[]) __attribute__((weak)); extern int __real_main(int argc, char *argv[]) __attribute__((weak)); int __wrap_main(int argc, char *argv[]) { int r; _dfr_try_initialize(); // Run the actual main function. Within there should be a call to // _dfr_start to resume execution of the HPX scheduler if needed. r = __real_main(argc, argv); _dfr_terminate(); return r; } } /**********************/ /* Debug interface. */ /**********************/ size_t _dfr_debug_get_node_id() { return hpx::get_locality_id(); } size_t _dfr_debug_get_worker_id() { return hpx::get_worker_thread_num(); } void _dfr_debug_print_task(const char *name, size_t inputs, size_t outputs) { // clang-format off hpx::cout << "Task \"" << name << "\t\"" << " [" << inputs << " inputs, " << outputs << " outputs]" << " Executing on Node/Worker: " << _dfr_debug_get_node_id() << " / " << _dfr_debug_get_worker_id() << "\n" << std::flush; // clang-format on } /// Generic utility function for printing debug info void _dfr_print_debug(size_t val) { hpx::cout << "_dfr_print_debug : " << val << "\n" << std::flush; } #else // CONCRETELANG_DATAFLOW_EXECUTION_ENABLED #include "concretelang/Runtime/DFRuntime.hpp" #include "concretelang/Runtime/time_util.h" namespace mlir { namespace concretelang { namespace dfr { namespace { static bool is_jit_p = false; static bool use_omp_p = false; static size_t num_nodes = 1; #if CONCRETELANG_TIMING_ENABLED static struct timespec compute_timer; #endif } // namespace void _dfr_set_required(bool is_required) {} void _dfr_set_jit(bool p) { is_jit_p = p; } void _dfr_set_use_omp(bool use_omp) { use_omp_p = use_omp; } bool _dfr_is_jit() { return is_jit_p; } bool _dfr_is_root_node() { return true; } bool _dfr_use_omp() { return use_omp_p; } bool _dfr_is_distributed() { return num_nodes > 1; } } // namespace dfr } // namespace concretelang } // namespace mlir void _dfr_start(int64_t use_dfr_p, void *ctx) { BEGIN_TIME(&mlir::concretelang::dfr::compute_timer); } void _dfr_stop(int64_t use_dfr_p) { END_TIME(&mlir::concretelang::dfr::compute_timer, "Compute"); } void _dfr_terminate() {} #endif