From 1bb3d04059d2e7737545489e3446117ff72d2902 Mon Sep 17 00:00:00 2001 From: Antoniu Pop Date: Fri, 22 Jul 2022 15:49:54 +0100 Subject: [PATCH] fix(dfr): broadcast evaluation keys early to avoid locking in HPX helper threads. --- .../distributed_generic_task_server.hpp | 55 +- .../concretelang/Runtime/key_manager.hpp | 224 ++----- .../concretelang/Runtime/runtime_api.h | 1 + .../RT/Analysis/LowerDataflowTasksToRT.cpp | 53 +- compiler/lib/Runtime/DFRuntime.cpp | 595 ++++++++---------- 5 files changed, 377 insertions(+), 551 deletions(-) diff --git a/compiler/include/concretelang/Runtime/distributed_generic_task_server.hpp b/compiler/include/concretelang/Runtime/distributed_generic_task_server.hpp index 394b45858..31fd60b0a 100644 --- a/compiler/include/concretelang/Runtime/distributed_generic_task_server.hpp +++ b/compiler/include/concretelang/Runtime/distributed_generic_task_server.hpp @@ -69,21 +69,19 @@ struct OpaqueInputData { std::vector _param_sizes, std::vector _param_types, std::vector _output_sizes, - std::vector _output_types, bool _alloc_p = false) + std::vector _output_types) : wfn_name(_wfn_name), params(std::move(_params)), param_sizes(std::move(_param_sizes)), param_types(std::move(_param_types)), output_sizes(std::move(_output_sizes)), - output_types(std::move(_output_types)), alloc_p(_alloc_p), - source_locality(hpx::find_here()), ksk_id(0), bsk_id(0) {} + output_types(std::move(_output_types)), ksk_id(0), bsk_id(0) {} OpaqueInputData(const OpaqueInputData &oid) : wfn_name(std::move(oid.wfn_name)), params(std::move(oid.params)), param_sizes(std::move(oid.param_sizes)), param_types(std::move(oid.param_types)), output_sizes(std::move(oid.output_sizes)), - output_types(std::move(oid.output_types)), alloc_p(oid.alloc_p), - source_locality(oid.source_locality), ksk_id(oid.ksk_id), + output_types(std::move(oid.output_types)), ksk_id(oid.ksk_id), bsk_id(oid.bsk_id) {} friend class hpx::serialization::access; @@ -91,7 +89,6 @@ struct OpaqueInputData { ar >> wfn_name; ar >> param_sizes >> param_types; ar >> output_sizes >> output_types; - ar >> source_locality; for (size_t p = 0; p < param_sizes.size(); ++p) { char *param; _dfr_checked_aligned_alloc((void **)¶m, 64, param_sizes[p]); @@ -118,15 +115,13 @@ struct OpaqueInputData { static_cast *>(params[p])->data = data; } break; case _DFR_TASK_ARG_CONTEXT: { - ar >> bsk_id >> ksk_id; - + // The copied pointer is meaningless - TODO: if the context + // can change dynamically (e.g., different evaluation keys) + // then this needs updating by passing key ids and retrieving + // adequate keys for the context. delete ((char *)params[p]); - // TODO: this might be relaxed with newer versions of HPX. - // Do not set the context here as remote operations are - // unstable when initiated within a HPX helper thread. params[p] = - (void *) - _dfr_node_level_runtime_context_manager->getContextAddress(); + (void *)_dfr_node_level_runtime_context_manager->getContext(); } break; case _DFR_TASK_ARG_UNRANKED_MEMREF: default: @@ -134,14 +129,12 @@ struct OpaqueInputData { "Error: invalid task argument type."); } } - alloc_p = true; } template void save(Archive &ar, const unsigned int version) const { ar << wfn_name; ar << param_sizes << param_types; ar << output_sizes << output_types; - ar << source_locality; for (size_t p = 0; p < params.size(); ++p) { // Save the first level of the data structure - if the parameter // is a tensor/memref, there is a second level. @@ -161,18 +154,8 @@ struct OpaqueInputData { mref.data + mref.offset * elementSize, size * elementSize); } break; case _DFR_TASK_ARG_CONTEXT: { - mlir::concretelang::RuntimeContext *context = - *static_cast(params[p]); - LweKeyswitchKey_u64 *ksk = get_keyswitch_key_u64(context); - LweBootstrapKey_u64 *bsk = get_bootstrap_key_u64(context); - - assert(bsk != nullptr && ksk != nullptr && "Missing context keys"); - std::cout << "Registering Key ids " << (uint64_t)ksk << " " - << (uint64_t)bsk << "\n" - << std::flush; - _dfr_register_bsk(bsk, (uint64_t)bsk); - _dfr_register_ksk(ksk, (uint64_t)ksk); - ar << (uint64_t)bsk << (uint64_t)ksk; + // Nothing to do now - TODO: pass key ids if these are not + // unique for a computation. } break; case _DFR_TASK_ARG_UNRANKED_MEMREF: default: @@ -189,8 +172,6 @@ struct OpaqueInputData { std::vector param_types; std::vector output_sizes; std::vector output_types; - bool alloc_p = false; - hpx::naming::id_type source_locality; uint64_t ksk_id; uint64_t bsk_id; }; @@ -199,13 +180,13 @@ struct OpaqueOutputData { OpaqueOutputData() = default; OpaqueOutputData(std::vector outputs, std::vector output_sizes, - std::vector output_types, bool alloc_p = false) + std::vector output_types) : outputs(std::move(outputs)), output_sizes(std::move(output_sizes)), - output_types(std::move(output_types)), alloc_p(alloc_p) {} + output_types(std::move(output_types)) {} OpaqueOutputData(const OpaqueOutputData &ood) : outputs(std::move(ood.outputs)), output_sizes(std::move(ood.output_sizes)), - output_types(std::move(ood.output_types)), alloc_p(ood.alloc_p) {} + output_types(std::move(ood.output_types)) {} friend class hpx::serialization::access; template void load(Archive &ar, const unsigned int version) { @@ -246,7 +227,6 @@ struct OpaqueOutputData { "Error: invalid task argument type."); } } - alloc_p = true; } template void save(Archive &ar, const unsigned int version) const { @@ -283,7 +263,6 @@ struct OpaqueOutputData { std::vector outputs; std::vector output_sizes; std::vector output_types; - bool alloc_p = false; }; struct GenericComputeServer : component_base { @@ -295,12 +274,6 @@ struct GenericComputeServer : component_base { inputs.wfn_name); std::vector outputs; - if (inputs.source_locality != hpx::find_here() && - (inputs.ksk_id || inputs.bsk_id)) { - _dfr_node_level_runtime_context_manager->getContext( - inputs.ksk_id, inputs.bsk_id, inputs.source_locality); - } - _dfr_debug_print_task(inputs.wfn_name.c_str(), inputs.params.size(), inputs.output_sizes.size()); hpx::cout << std::flush; @@ -735,7 +708,7 @@ struct GenericComputeServer : component_base { } return OpaqueOutputData(std::move(outputs), std::move(inputs.output_sizes), - std::move(inputs.output_types), inputs.alloc_p); + std::move(inputs.output_types)); } HPX_DEFINE_COMPONENT_ACTION(GenericComputeServer, execute_task); diff --git a/compiler/include/concretelang/Runtime/key_manager.hpp b/compiler/include/concretelang/Runtime/key_manager.hpp index 660b60fd9..d7b73a772 100644 --- a/compiler/include/concretelang/Runtime/key_manager.hpp +++ b/compiler/include/concretelang/Runtime/key_manager.hpp @@ -29,14 +29,9 @@ template struct KeyManager; struct RuntimeContextManager; namespace { static void *dl_handle; -static KeyManager *_dfr_node_level_bsk_manager; -static KeyManager *_dfr_node_level_ksk_manager; static RuntimeContextManager *_dfr_node_level_runtime_context_manager; } // namespace -void _dfr_register_bsk(LweBootstrapKey_u64 *key, uint64_t key_id); -void _dfr_register_ksk(LweKeyswitchKey_u64 *key, uint64_t key_id); - template struct KeyWrapper { LweKeyType *key; @@ -44,6 +39,10 @@ template struct KeyWrapper { KeyWrapper(LweKeyType *key) : key(key) {} KeyWrapper(KeyWrapper &&moved) noexcept : key(moved.key) {} KeyWrapper(const KeyWrapper &kw) : key(kw.key) {} + KeyWrapper &operator=(const KeyWrapper &rhs) { + this->key = rhs.key; + return *this; + } friend class hpx::serialization::access; template void save(Archive &ar, const unsigned int version) const; @@ -51,6 +50,12 @@ template struct KeyWrapper { HPX_SERIALIZATION_SPLIT_MEMBER() }; +template +bool operator==(const KeyWrapper &lhs, + const KeyWrapper &rhs) { + return lhs.key == rhs.key; +} + template <> template void KeyWrapper::save(Archive &ar, @@ -91,137 +96,6 @@ void KeyWrapper::load(Archive &ar, key = deserialize_lwe_keyswitching_key_u64(buffer); } -template struct KeyManager { - KeyManager() {} - LweKeyType *get_key(hpx::naming::id_type loc, const uint64_t key_id); - - KeyWrapper fetch_key(const uint64_t key_id) { - std::lock_guard guard(keystore_guard); - - auto keyit = keystore.find(key_id); - if (keyit != keystore.end()) - return keyit->second; - // If this node does not contain this key, this is an error - // (location was supplied as source for this key). - HPX_THROW_EXCEPTION( - hpx::no_success, "fetch_key", - "Error: could not find key to be fetched on source location."); - } - - void register_key(LweKeyType *key, uint64_t key_id) { - std::lock_guard guard(keystore_guard); - auto keyit = keystore.find(key_id); - if (keyit == keystore.end()) { - keyit = keystore - .insert(std::pair>( - key_id, KeyWrapper(key))) - .first; - if (keyit == keystore.end()) { - HPX_THROW_EXCEPTION(hpx::no_success, "_dfr_register_key", - "Error: could not register new key."); - } - } - } - - void clear_keys() { - std::lock_guard guard(keystore_guard); - keystore.clear(); - } - -private: - std::mutex keystore_guard; - std::map> keystore; -}; - -KeyWrapper _dfr_fetch_bsk(uint64_t key_id) { - return _dfr_node_level_bsk_manager->fetch_key(key_id); -} - -KeyWrapper _dfr_fetch_ksk(uint64_t key_id) { - return _dfr_node_level_ksk_manager->fetch_key(key_id); -} - -} // namespace dfr -} // namespace concretelang -} // namespace mlir - -HPX_PLAIN_ACTION(mlir::concretelang::dfr::_dfr_fetch_ksk, _dfr_fetch_ksk_action) -HPX_PLAIN_ACTION(mlir::concretelang::dfr::_dfr_fetch_bsk, _dfr_fetch_bsk_action) - -namespace mlir { -namespace concretelang { -namespace dfr { - -template <> KeyManager::KeyManager() { - _dfr_node_level_bsk_manager = this; -} - -template <> -LweBootstrapKey_u64 * -KeyManager::get_key(hpx::naming::id_type loc, - const uint64_t key_id) { - keystore_guard.lock(); - auto keyit = keystore.find(key_id); - keystore_guard.unlock(); - - if (keyit == keystore.end()) { - _dfr_fetch_bsk_action fetch; - KeyWrapper &&bskw = fetch(loc, key_id); - if (bskw.key == nullptr) { - HPX_THROW_EXCEPTION(hpx::no_success, "_dfr_get_key", - "Error: Bootstrap key not found on root node."); - } else { - _dfr_register_bsk(bskw.key, key_id); - } - return bskw.key; - } - return keyit->second.key; -} - -template <> KeyManager::KeyManager() { - _dfr_node_level_ksk_manager = this; -} - -template <> -LweKeyswitchKey_u64 * -KeyManager::get_key(hpx::naming::id_type loc, - const uint64_t key_id) { - keystore_guard.lock(); - auto keyit = keystore.find(key_id); - keystore_guard.unlock(); - - if (keyit == keystore.end()) { - _dfr_fetch_ksk_action fetch; - KeyWrapper &&kskw = fetch(loc, key_id); - if (kskw.key == nullptr) { - HPX_THROW_EXCEPTION(hpx::no_success, "_dfr_get_key", - "Error: Keyswitching key not found on root node."); - } else { - _dfr_register_ksk(kskw.key, key_id); - } - return kskw.key; - } - return keyit->second.key; -} - -/************************/ -/* Key management API. */ -/************************/ - -void _dfr_register_bsk(LweBootstrapKey_u64 *key, uint64_t key_id) { - _dfr_node_level_bsk_manager->register_key(key, key_id); -} -void _dfr_register_ksk(LweKeyswitchKey_u64 *key, uint64_t key_id) { - _dfr_node_level_ksk_manager->register_key(key, key_id); -} - -LweBootstrapKey_u64 *_dfr_get_bsk(hpx::naming::id_type loc, uint64_t key_id) { - return _dfr_node_level_bsk_manager->get_key(loc, key_id); -} -LweKeyswitchKey_u64 *_dfr_get_ksk(hpx::naming::id_type loc, uint64_t key_id) { - return _dfr_node_level_ksk_manager->get_key(loc, key_id); -} - /************************/ /* Context management. */ /************************/ @@ -230,58 +104,52 @@ struct RuntimeContextManager { // TODO: this is only ok so long as we don't change keys. Once we // use multiple keys, should have a map. RuntimeContext *context; - std::mutex context_guard; - uint64_t ksk_id; - uint64_t bsk_id; RuntimeContextManager() { - ksk_id = 0; - bsk_id = 0; context = nullptr; _dfr_node_level_runtime_context_manager = this; } - RuntimeContext *getContext(uint64_t ksk, uint64_t bsk, - hpx::naming::id_type source_locality) { - std::cout << "GetContext on node " << hpx::get_locality_id() - << " with context " << context << " " << bsk_id << " " << ksk_id - << "\n" - << std::flush; - if (context != nullptr) { - std::cout << "simil " << ksk_id << " " << ksk << " " << bsk_id << " " - << bsk << "\n" - << std::flush; - assert(ksk == ksk_id && bsk == bsk_id && - "Context manager can only used with single keys for now."); + void setContext(void *ctx) { + assert(context == nullptr && + "Only one RuntimeContext can be used at a time."); + + // Root node broadcasts the evaluation keys and each remote + // instantiates a local RuntimeContext. + if (_dfr_is_root_node()) { + RuntimeContext *context = (RuntimeContext *)ctx; + LweKeyswitchKey_u64 *ksk = get_keyswitch_key_u64(context); + LweBootstrapKey_u64 *bsk = get_bootstrap_key_u64(context); + + auto kskFut = hpx::collectives::broadcast_to( + "ksk_keystore", KeyWrapper(ksk)); + auto bskFut = hpx::collectives::broadcast_to( + "bsk_keystore", KeyWrapper(bsk)); } else { - assert(ksk_id == 0 && bsk_id == 0 && - "Context empty but context manager has key ids."); - LweKeyswitchKey_u64 *keySwitchKey = _dfr_get_ksk(source_locality, ksk); - LweBootstrapKey_u64 *bootstrapKey = _dfr_get_bsk(source_locality, bsk); - std::lock_guard guard(context_guard); - if (context == nullptr) { - auto ctx = new RuntimeContext(); - ctx->evaluationKeys = ::concretelang::clientlib::EvaluationKeys( - std::shared_ptr<::concretelang::clientlib::LweKeyswitchKey>( - new ::concretelang::clientlib::LweKeyswitchKey(keySwitchKey)), - std::shared_ptr<::concretelang::clientlib::LweBootstrapKey>( - new ::concretelang::clientlib::LweBootstrapKey(bootstrapKey))); - ksk_id = ksk; - bsk_id = bsk; - context = ctx; - std::cout << "Fetching Key ids " << ksk_id << " " << bsk_id << "\n" - << std::flush; - } else { - std::cout << " GOT context after LOCK on node " - << hpx::get_locality_id() << " with context " << context - << " " << bsk_id << " " << ksk_id << "\n" - << std::flush; - } + auto kskFut = + hpx::collectives::broadcast_from>( + "ksk_keystore"); + auto bskFut = + hpx::collectives::broadcast_from>( + "bsk_keystore"); + + context = new mlir::concretelang::RuntimeContext(); + context->evaluationKeys = ::concretelang::clientlib::EvaluationKeys( + std::shared_ptr<::concretelang::clientlib::LweKeyswitchKey>( + new ::concretelang::clientlib::LweKeyswitchKey(kskFut.get().key)), + std::shared_ptr<::concretelang::clientlib::LweBootstrapKey>( + new ::concretelang::clientlib::LweBootstrapKey( + bskFut.get().key))); } - return context; } - RuntimeContext **getContextAddress() { return &context; } + RuntimeContext **getContext() { return &context; } + + void clearContext() { + if (context != nullptr) + delete context; + context = nullptr; + } }; } // namespace dfr diff --git a/compiler/include/concretelang/Runtime/runtime_api.h b/compiler/include/concretelang/Runtime/runtime_api.h index 7e19bb2d4..c8ccc2c63 100644 --- a/compiler/include/concretelang/Runtime/runtime_api.h +++ b/compiler/include/concretelang/Runtime/runtime_api.h @@ -26,6 +26,7 @@ void _dfr_deallocate_future(void *); void _dfr_deallocate_future_data(void *); /* Initialisation & termination. */ +void _dfr_start_c(void *); void _dfr_start(); void _dfr_stop(); diff --git a/compiler/lib/Dialect/RT/Analysis/LowerDataflowTasksToRT.cpp b/compiler/lib/Dialect/RT/Analysis/LowerDataflowTasksToRT.cpp index e47951cd3..e9071be0f 100644 --- a/compiler/lib/Dialect/RT/Analysis/LowerDataflowTasksToRT.cpp +++ b/compiler/lib/Dialect/RT/Analysis/LowerDataflowTasksToRT.cpp @@ -5,6 +5,7 @@ #include +#include #include #include #include @@ -422,6 +423,17 @@ struct LowerDataflowTasksPass }); for (auto entryPoint : entryPoints) { + // Check if this entry point uses a context - do this before we + // remove arguments in remote nodes + int ctxIndex = -1; + for (auto arg : llvm::enumerate(entryPoint.getArguments())) + if (arg.value() + .getType() + .isa()) { + ctxIndex = arg.index(); + break; + } + // If this is a JIT invocation and we're not on the root node, // we do not need to do any computation, only register all work // functions with the runtime system @@ -455,20 +467,37 @@ struct LowerDataflowTasksPass if (!workFunctions.empty()) { OpBuilder builder(entryPoint.getBody()); builder.setInsertionPointToStart(&entryPoint.getBody().front()); - auto dfrStartFunOp = mlir::LLVM::lookupOrCreateFn( - module, "_dfr_start", {}, - LLVM::LLVMVoidType::get(entryPoint->getContext())); - builder.create(entryPoint.getLoc(), dfrStartFunOp, - mlir::ValueRange(), - ArrayRef()); + if (ctxIndex >= 0) { + auto startFunTy = + (dfr::_dfr_is_root_node()) + ? mlir::FunctionType::get( + entryPoint->getContext(), + {entryPoint.getArgument(ctxIndex).getType()}, {}) + : mlir::FunctionType::get(entryPoint->getContext(), {}, {}); + (void)insertForwardDeclaration(entryPoint, builder, "_dfr_start_c", + startFunTy); + builder.create( + entryPoint.getLoc(), "_dfr_start_c", mlir::TypeRange(), + (dfr::_dfr_is_root_node()) ? entryPoint.getArgument(ctxIndex) + : mlir::ValueRange()); + } else { + auto startFunTy = + mlir::FunctionType::get(entryPoint->getContext(), {}, {}); + (void)insertForwardDeclaration(entryPoint, builder, "_dfr_start", + startFunTy); + builder.create(entryPoint.getLoc(), "_dfr_start", + mlir::TypeRange(), + mlir::ValueRange()); + } builder.setInsertionPoint(entryPoint.getBody().back().getTerminator()); - auto dfrStopFunOp = mlir::LLVM::lookupOrCreateFn( - module, "_dfr_stop", {}, - LLVM::LLVMVoidType::get(entryPoint->getContext())); - builder.create(entryPoint.getLoc(), dfrStopFunOp, - mlir::ValueRange(), - ArrayRef()); + auto stopFunTy = + mlir::FunctionType::get(entryPoint->getContext(), {}, {}); + (void)insertForwardDeclaration(entryPoint, builder, "_dfr_stop", + stopFunTy); + builder.create(entryPoint.getLoc(), "_dfr_stop", + mlir::TypeRange(), + mlir::ValueRange()); } } } diff --git a/compiler/lib/Runtime/DFRuntime.cpp b/compiler/lib/Runtime/DFRuntime.cpp index 0a22d6696..23d123c85 100644 --- a/compiler/lib/Runtime/DFRuntime.cpp +++ b/compiler/lib/Runtime/DFRuntime.cpp @@ -28,8 +28,8 @@ namespace concretelang { namespace dfr { namespace { static std::vector gcc; -static hpx::lcos::barrier *_dfr_jit_workfunction_registration_barrier; static hpx::lcos::barrier *_dfr_jit_phase_barrier; +static hpx::lcos::barrier *_dfr_startup_barrier; static size_t num_nodes = 0; } // namespace } // namespace dfr @@ -93,7 +93,6 @@ static inline size_t _dfr_find_next_execution_locality() { /// the returns. void _dfr_create_async_task(wfnptr wfn, size_t num_params, size_t num_outputs, ...) { - // std::vector params; std::vector refcounted_futures; std::vector param_sizes; std::vector param_types; @@ -133,50 +132,47 @@ void _dfr_create_async_task(wfnptr wfn, size_t num_params, size_t num_outputs, // satisfied, which generates a future on a tuple of outputs, which // is then further split into a tuple of futures and provide // individual synchronization for each return independently. + mlir::concretelang::dfr::GenericComputeClient *gcc_target = + &mlir::concretelang::dfr::gcc[_dfr_find_next_execution_locality()]; switch (num_params) { case 0: oodf = std::move(hpx::dataflow( - [wfnname, param_sizes, param_types, output_sizes, output_types]() + [wfnname, param_sizes, param_types, output_sizes, output_types, + gcc_target]() -> hpx::future { std::vector params = {}; mlir::concretelang::dfr::OpaqueInputData oid( wfnname, params, param_sizes, param_types, output_sizes, output_types); - return mlir::concretelang::dfr::gcc - [_dfr_find_next_execution_locality()] - .execute_task(oid); + return gcc_target->execute_task(oid); })); break; case 1: oodf = std::move(hpx::dataflow( - [wfnname, param_sizes, param_types, output_sizes, - output_types](hpx::shared_future param0) + [wfnname, param_sizes, param_types, output_sizes, output_types, + gcc_target](hpx::shared_future param0) -> hpx::future { std::vector params = {param0.get()}; mlir::concretelang::dfr::OpaqueInputData oid( wfnname, params, param_sizes, param_types, output_sizes, output_types); - return mlir::concretelang::dfr::gcc - [_dfr_find_next_execution_locality()] - .execute_task(oid); + return gcc_target->execute_task(oid); }, *((dfr_refcounted_future_p)refcounted_futures[0])->future)); break; case 2: oodf = std::move(hpx::dataflow( - [wfnname, param_sizes, param_types, output_sizes, - output_types](hpx::shared_future param0, - hpx::shared_future param1) + [wfnname, param_sizes, param_types, output_sizes, output_types, + gcc_target](hpx::shared_future param0, + hpx::shared_future param1) -> hpx::future { std::vector params = {param0.get(), param1.get()}; mlir::concretelang::dfr::OpaqueInputData oid( wfnname, params, param_sizes, param_types, output_sizes, output_types); - return mlir::concretelang::dfr::gcc - [_dfr_find_next_execution_locality()] - .execute_task(oid); + return gcc_target->execute_task(oid); }, *((dfr_refcounted_future_p)refcounted_futures[0])->future, *((dfr_refcounted_future_p)refcounted_futures[1])->future)); @@ -184,19 +180,17 @@ void _dfr_create_async_task(wfnptr wfn, size_t num_params, size_t num_outputs, case 3: oodf = std::move(hpx::dataflow( - [wfnname, param_sizes, param_types, output_sizes, - output_types](hpx::shared_future param0, - hpx::shared_future param1, - hpx::shared_future param2) + [wfnname, param_sizes, param_types, output_sizes, output_types, + gcc_target](hpx::shared_future param0, + hpx::shared_future param1, + hpx::shared_future param2) -> hpx::future { std::vector params = {param0.get(), param1.get(), param2.get()}; mlir::concretelang::dfr::OpaqueInputData oid( wfnname, params, param_sizes, param_types, output_sizes, output_types); - return mlir::concretelang::dfr::gcc - [_dfr_find_next_execution_locality()] - .execute_task(oid); + return gcc_target->execute_task(oid); }, *((dfr_refcounted_future_p)refcounted_futures[0])->future, *((dfr_refcounted_future_p)refcounted_futures[1])->future, @@ -205,20 +199,18 @@ void _dfr_create_async_task(wfnptr wfn, size_t num_params, size_t num_outputs, case 4: oodf = std::move(hpx::dataflow( - [wfnname, param_sizes, param_types, output_sizes, - output_types](hpx::shared_future param0, - hpx::shared_future param1, - hpx::shared_future param2, - hpx::shared_future param3) + [wfnname, param_sizes, param_types, output_sizes, output_types, + gcc_target](hpx::shared_future param0, + hpx::shared_future param1, + hpx::shared_future param2, + hpx::shared_future param3) -> hpx::future { std::vector params = {param0.get(), param1.get(), param2.get(), param3.get()}; mlir::concretelang::dfr::OpaqueInputData oid( wfnname, params, param_sizes, param_types, output_sizes, output_types); - return mlir::concretelang::dfr::gcc - [_dfr_find_next_execution_locality()] - .execute_task(oid); + return gcc_target->execute_task(oid); }, *((dfr_refcounted_future_p)refcounted_futures[0])->future, *((dfr_refcounted_future_p)refcounted_futures[1])->future, @@ -228,12 +220,12 @@ void _dfr_create_async_task(wfnptr wfn, size_t num_params, size_t num_outputs, case 5: oodf = std::move(hpx::dataflow( - [wfnname, param_sizes, param_types, output_sizes, - output_types](hpx::shared_future param0, - hpx::shared_future param1, - hpx::shared_future param2, - hpx::shared_future param3, - hpx::shared_future param4) + [wfnname, param_sizes, param_types, output_sizes, output_types, + gcc_target](hpx::shared_future param0, + hpx::shared_future param1, + hpx::shared_future param2, + hpx::shared_future param3, + hpx::shared_future param4) -> hpx::future { std::vector params = {param0.get(), param1.get(), param2.get(), param3.get(), @@ -241,9 +233,7 @@ void _dfr_create_async_task(wfnptr wfn, size_t num_params, size_t num_outputs, mlir::concretelang::dfr::OpaqueInputData oid( wfnname, params, param_sizes, param_types, output_sizes, output_types); - return mlir::concretelang::dfr::gcc - [_dfr_find_next_execution_locality()] - .execute_task(oid); + return gcc_target->execute_task(oid); }, *((dfr_refcounted_future_p)refcounted_futures[0])->future, *((dfr_refcounted_future_p)refcounted_futures[1])->future, @@ -254,13 +244,13 @@ void _dfr_create_async_task(wfnptr wfn, size_t num_params, size_t num_outputs, case 6: oodf = std::move(hpx::dataflow( - [wfnname, param_sizes, param_types, output_sizes, - output_types](hpx::shared_future param0, - hpx::shared_future param1, - hpx::shared_future param2, - hpx::shared_future param3, - hpx::shared_future param4, - hpx::shared_future param5) + [wfnname, param_sizes, param_types, output_sizes, output_types, + gcc_target](hpx::shared_future param0, + hpx::shared_future param1, + hpx::shared_future param2, + hpx::shared_future param3, + hpx::shared_future param4, + hpx::shared_future param5) -> hpx::future { std::vector params = {param0.get(), param1.get(), param2.get(), param3.get(), @@ -268,9 +258,7 @@ void _dfr_create_async_task(wfnptr wfn, size_t num_params, size_t num_outputs, mlir::concretelang::dfr::OpaqueInputData oid( wfnname, params, param_sizes, param_types, output_sizes, output_types); - return mlir::concretelang::dfr::gcc - [_dfr_find_next_execution_locality()] - .execute_task(oid); + return gcc_target->execute_task(oid); }, *((dfr_refcounted_future_p)refcounted_futures[0])->future, *((dfr_refcounted_future_p)refcounted_futures[1])->future, @@ -282,14 +270,14 @@ void _dfr_create_async_task(wfnptr wfn, size_t num_params, size_t num_outputs, case 7: oodf = std::move(hpx::dataflow( - [wfnname, param_sizes, param_types, output_sizes, - output_types](hpx::shared_future param0, - hpx::shared_future param1, - hpx::shared_future param2, - hpx::shared_future param3, - hpx::shared_future param4, - hpx::shared_future param5, - hpx::shared_future param6) + [wfnname, param_sizes, param_types, output_sizes, output_types, + gcc_target](hpx::shared_future param0, + hpx::shared_future param1, + hpx::shared_future param2, + hpx::shared_future param3, + hpx::shared_future param4, + hpx::shared_future param5, + hpx::shared_future param6) -> hpx::future { std::vector params = { param0.get(), param1.get(), param2.get(), param3.get(), @@ -297,9 +285,7 @@ void _dfr_create_async_task(wfnptr wfn, size_t num_params, size_t num_outputs, mlir::concretelang::dfr::OpaqueInputData oid( wfnname, params, param_sizes, param_types, output_sizes, output_types); - return mlir::concretelang::dfr::gcc - [_dfr_find_next_execution_locality()] - .execute_task(oid); + return gcc_target->execute_task(oid); }, *((dfr_refcounted_future_p)refcounted_futures[0])->future, *((dfr_refcounted_future_p)refcounted_futures[1])->future, @@ -312,15 +298,15 @@ void _dfr_create_async_task(wfnptr wfn, size_t num_params, size_t num_outputs, case 8: oodf = std::move(hpx::dataflow( - [wfnname, param_sizes, param_types, output_sizes, - output_types](hpx::shared_future param0, - hpx::shared_future param1, - hpx::shared_future param2, - hpx::shared_future param3, - hpx::shared_future param4, - hpx::shared_future param5, - hpx::shared_future param6, - hpx::shared_future param7) + [wfnname, param_sizes, param_types, output_sizes, output_types, + gcc_target](hpx::shared_future param0, + hpx::shared_future param1, + hpx::shared_future param2, + hpx::shared_future param3, + hpx::shared_future param4, + hpx::shared_future param5, + hpx::shared_future param6, + hpx::shared_future param7) -> hpx::future { std::vector params = { param0.get(), param1.get(), param2.get(), param3.get(), @@ -328,9 +314,7 @@ void _dfr_create_async_task(wfnptr wfn, size_t num_params, size_t num_outputs, mlir::concretelang::dfr::OpaqueInputData oid( wfnname, params, param_sizes, param_types, output_sizes, output_types); - return mlir::concretelang::dfr::gcc - [_dfr_find_next_execution_locality()] - .execute_task(oid); + return gcc_target->execute_task(oid); }, *((dfr_refcounted_future_p)refcounted_futures[0])->future, *((dfr_refcounted_future_p)refcounted_futures[1])->future, @@ -344,16 +328,16 @@ void _dfr_create_async_task(wfnptr wfn, size_t num_params, size_t num_outputs, case 9: oodf = std::move(hpx::dataflow( - [wfnname, param_sizes, param_types, output_sizes, - output_types](hpx::shared_future param0, - hpx::shared_future param1, - hpx::shared_future param2, - hpx::shared_future param3, - hpx::shared_future param4, - hpx::shared_future param5, - hpx::shared_future param6, - hpx::shared_future param7, - hpx::shared_future param8) + [wfnname, param_sizes, param_types, output_sizes, output_types, + gcc_target](hpx::shared_future param0, + hpx::shared_future param1, + hpx::shared_future param2, + hpx::shared_future param3, + hpx::shared_future param4, + hpx::shared_future param5, + hpx::shared_future param6, + hpx::shared_future param7, + hpx::shared_future param8) -> hpx::future { std::vector params = { param0.get(), param1.get(), param2.get(), @@ -362,9 +346,7 @@ void _dfr_create_async_task(wfnptr wfn, size_t num_params, size_t num_outputs, mlir::concretelang::dfr::OpaqueInputData oid( wfnname, params, param_sizes, param_types, output_sizes, output_types); - return mlir::concretelang::dfr::gcc - [_dfr_find_next_execution_locality()] - .execute_task(oid); + return gcc_target->execute_task(oid); }, *((dfr_refcounted_future_p)refcounted_futures[0])->future, *((dfr_refcounted_future_p)refcounted_futures[1])->future, @@ -379,17 +361,17 @@ void _dfr_create_async_task(wfnptr wfn, size_t num_params, size_t num_outputs, case 10: oodf = std::move(hpx::dataflow( - [wfnname, param_sizes, param_types, output_sizes, - output_types](hpx::shared_future param0, - hpx::shared_future param1, - hpx::shared_future param2, - hpx::shared_future param3, - hpx::shared_future param4, - hpx::shared_future param5, - hpx::shared_future param6, - hpx::shared_future param7, - hpx::shared_future param8, - hpx::shared_future param9) + [wfnname, param_sizes, param_types, output_sizes, output_types, + gcc_target](hpx::shared_future param0, + hpx::shared_future param1, + hpx::shared_future param2, + hpx::shared_future param3, + hpx::shared_future param4, + hpx::shared_future param5, + hpx::shared_future param6, + hpx::shared_future param7, + hpx::shared_future param8, + hpx::shared_future param9) -> hpx::future { std::vector params = { param0.get(), param1.get(), param2.get(), param3.get(), @@ -398,9 +380,7 @@ void _dfr_create_async_task(wfnptr wfn, size_t num_params, size_t num_outputs, mlir::concretelang::dfr::OpaqueInputData oid( wfnname, params, param_sizes, param_types, output_sizes, output_types); - return mlir::concretelang::dfr::gcc - [_dfr_find_next_execution_locality()] - .execute_task(oid); + return gcc_target->execute_task(oid); }, *((dfr_refcounted_future_p)refcounted_futures[0])->future, *((dfr_refcounted_future_p)refcounted_futures[1])->future, @@ -416,18 +396,18 @@ void _dfr_create_async_task(wfnptr wfn, size_t num_params, size_t num_outputs, case 11: oodf = std::move(hpx::dataflow( - [wfnname, param_sizes, param_types, output_sizes, - output_types](hpx::shared_future param0, - hpx::shared_future param1, - hpx::shared_future param2, - hpx::shared_future param3, - hpx::shared_future param4, - hpx::shared_future param5, - hpx::shared_future param6, - hpx::shared_future param7, - hpx::shared_future param8, - hpx::shared_future param9, - hpx::shared_future param10) + [wfnname, param_sizes, param_types, output_sizes, output_types, + gcc_target](hpx::shared_future param0, + hpx::shared_future param1, + hpx::shared_future param2, + hpx::shared_future param3, + hpx::shared_future param4, + hpx::shared_future param5, + hpx::shared_future param6, + hpx::shared_future param7, + hpx::shared_future param8, + hpx::shared_future param9, + hpx::shared_future param10) -> hpx::future { std::vector params = { param0.get(), param1.get(), param2.get(), param3.get(), @@ -436,9 +416,7 @@ void _dfr_create_async_task(wfnptr wfn, size_t num_params, size_t num_outputs, mlir::concretelang::dfr::OpaqueInputData oid( wfnname, params, param_sizes, param_types, output_sizes, output_types); - return mlir::concretelang::dfr::gcc - [_dfr_find_next_execution_locality()] - .execute_task(oid); + return gcc_target->execute_task(oid); }, *((dfr_refcounted_future_p)refcounted_futures[0])->future, *((dfr_refcounted_future_p)refcounted_futures[1])->future, @@ -455,19 +433,19 @@ void _dfr_create_async_task(wfnptr wfn, size_t num_params, size_t num_outputs, case 12: oodf = std::move(hpx::dataflow( - [wfnname, param_sizes, param_types, output_sizes, - output_types](hpx::shared_future param0, - hpx::shared_future param1, - hpx::shared_future param2, - hpx::shared_future param3, - hpx::shared_future param4, - hpx::shared_future param5, - hpx::shared_future param6, - hpx::shared_future param7, - hpx::shared_future param8, - hpx::shared_future param9, - hpx::shared_future param10, - hpx::shared_future param11) + [wfnname, param_sizes, param_types, output_sizes, output_types, + gcc_target](hpx::shared_future param0, + hpx::shared_future param1, + hpx::shared_future param2, + hpx::shared_future param3, + hpx::shared_future param4, + hpx::shared_future param5, + hpx::shared_future param6, + hpx::shared_future param7, + hpx::shared_future param8, + hpx::shared_future param9, + hpx::shared_future param10, + hpx::shared_future param11) -> hpx::future { std::vector params = { param0.get(), param1.get(), param2.get(), param3.get(), @@ -476,9 +454,7 @@ void _dfr_create_async_task(wfnptr wfn, size_t num_params, size_t num_outputs, mlir::concretelang::dfr::OpaqueInputData oid( wfnname, params, param_sizes, param_types, output_sizes, output_types); - return mlir::concretelang::dfr::gcc - [_dfr_find_next_execution_locality()] - .execute_task(oid); + return gcc_target->execute_task(oid); }, *((dfr_refcounted_future_p)refcounted_futures[0])->future, *((dfr_refcounted_future_p)refcounted_futures[1])->future, @@ -496,20 +472,20 @@ void _dfr_create_async_task(wfnptr wfn, size_t num_params, size_t num_outputs, case 13: oodf = std::move(hpx::dataflow( - [wfnname, param_sizes, param_types, output_sizes, - output_types](hpx::shared_future param0, - hpx::shared_future param1, - hpx::shared_future param2, - hpx::shared_future param3, - hpx::shared_future param4, - hpx::shared_future param5, - hpx::shared_future param6, - hpx::shared_future param7, - hpx::shared_future param8, - hpx::shared_future param9, - hpx::shared_future param10, - hpx::shared_future param11, - hpx::shared_future param12) + [wfnname, param_sizes, param_types, output_sizes, output_types, + gcc_target](hpx::shared_future param0, + hpx::shared_future param1, + hpx::shared_future param2, + hpx::shared_future param3, + hpx::shared_future param4, + hpx::shared_future param5, + hpx::shared_future param6, + hpx::shared_future param7, + hpx::shared_future param8, + hpx::shared_future param9, + hpx::shared_future param10, + hpx::shared_future param11, + hpx::shared_future param12) -> hpx::future { std::vector params = { param0.get(), param1.get(), param2.get(), param3.get(), @@ -519,9 +495,7 @@ void _dfr_create_async_task(wfnptr wfn, size_t num_params, size_t num_outputs, mlir::concretelang::dfr::OpaqueInputData oid( wfnname, params, param_sizes, param_types, output_sizes, output_types); - return mlir::concretelang::dfr::gcc - [_dfr_find_next_execution_locality()] - .execute_task(oid); + return gcc_target->execute_task(oid); }, *((dfr_refcounted_future_p)refcounted_futures[0])->future, *((dfr_refcounted_future_p)refcounted_futures[1])->future, @@ -540,21 +514,21 @@ void _dfr_create_async_task(wfnptr wfn, size_t num_params, size_t num_outputs, case 14: oodf = std::move(hpx::dataflow( - [wfnname, param_sizes, param_types, output_sizes, - output_types](hpx::shared_future param0, - hpx::shared_future param1, - hpx::shared_future param2, - hpx::shared_future param3, - hpx::shared_future param4, - hpx::shared_future param5, - hpx::shared_future param6, - hpx::shared_future param7, - hpx::shared_future param8, - hpx::shared_future param9, - hpx::shared_future param10, - hpx::shared_future param11, - hpx::shared_future param12, - hpx::shared_future param13) + [wfnname, param_sizes, param_types, output_sizes, output_types, + gcc_target](hpx::shared_future param0, + hpx::shared_future param1, + hpx::shared_future param2, + hpx::shared_future param3, + hpx::shared_future param4, + hpx::shared_future param5, + hpx::shared_future param6, + hpx::shared_future param7, + hpx::shared_future param8, + hpx::shared_future param9, + hpx::shared_future param10, + hpx::shared_future param11, + hpx::shared_future param12, + hpx::shared_future param13) -> hpx::future { std::vector params = { param0.get(), param1.get(), param2.get(), param3.get(), @@ -564,9 +538,7 @@ void _dfr_create_async_task(wfnptr wfn, size_t num_params, size_t num_outputs, mlir::concretelang::dfr::OpaqueInputData oid( wfnname, params, param_sizes, param_types, output_sizes, output_types); - return mlir::concretelang::dfr::gcc - [_dfr_find_next_execution_locality()] - .execute_task(oid); + return gcc_target->execute_task(oid); }, *((dfr_refcounted_future_p)refcounted_futures[0])->future, *((dfr_refcounted_future_p)refcounted_futures[1])->future, @@ -586,22 +558,22 @@ void _dfr_create_async_task(wfnptr wfn, size_t num_params, size_t num_outputs, case 15: oodf = std::move(hpx::dataflow( - [wfnname, param_sizes, param_types, output_sizes, - output_types](hpx::shared_future param0, - hpx::shared_future param1, - hpx::shared_future param2, - hpx::shared_future param3, - hpx::shared_future param4, - hpx::shared_future param5, - hpx::shared_future param6, - hpx::shared_future param7, - hpx::shared_future param8, - hpx::shared_future param9, - hpx::shared_future param10, - hpx::shared_future param11, - hpx::shared_future param12, - hpx::shared_future param13, - hpx::shared_future param14) + [wfnname, param_sizes, param_types, output_sizes, output_types, + gcc_target](hpx::shared_future param0, + hpx::shared_future param1, + hpx::shared_future param2, + hpx::shared_future param3, + hpx::shared_future param4, + hpx::shared_future param5, + hpx::shared_future param6, + hpx::shared_future param7, + hpx::shared_future param8, + hpx::shared_future param9, + hpx::shared_future param10, + hpx::shared_future param11, + hpx::shared_future param12, + hpx::shared_future param13, + hpx::shared_future param14) -> hpx::future { std::vector params = { param0.get(), param1.get(), param2.get(), param3.get(), @@ -611,9 +583,7 @@ void _dfr_create_async_task(wfnptr wfn, size_t num_params, size_t num_outputs, mlir::concretelang::dfr::OpaqueInputData oid( wfnname, params, param_sizes, param_types, output_sizes, output_types); - return mlir::concretelang::dfr::gcc - [_dfr_find_next_execution_locality()] - .execute_task(oid); + return gcc_target->execute_task(oid); }, *((dfr_refcounted_future_p)refcounted_futures[0])->future, *((dfr_refcounted_future_p)refcounted_futures[1])->future, @@ -634,23 +604,23 @@ void _dfr_create_async_task(wfnptr wfn, size_t num_params, size_t num_outputs, case 16: oodf = std::move(hpx::dataflow( - [wfnname, param_sizes, param_types, output_sizes, - output_types](hpx::shared_future param0, - hpx::shared_future param1, - hpx::shared_future param2, - hpx::shared_future param3, - hpx::shared_future param4, - hpx::shared_future param5, - hpx::shared_future param6, - hpx::shared_future param7, - hpx::shared_future param8, - hpx::shared_future param9, - hpx::shared_future param10, - hpx::shared_future param11, - hpx::shared_future param12, - hpx::shared_future param13, - hpx::shared_future param14, - hpx::shared_future param15) + [wfnname, param_sizes, param_types, output_sizes, output_types, + gcc_target](hpx::shared_future param0, + hpx::shared_future param1, + hpx::shared_future param2, + hpx::shared_future param3, + hpx::shared_future param4, + hpx::shared_future param5, + hpx::shared_future param6, + hpx::shared_future param7, + hpx::shared_future param8, + hpx::shared_future param9, + hpx::shared_future param10, + hpx::shared_future param11, + hpx::shared_future param12, + hpx::shared_future param13, + hpx::shared_future param14, + hpx::shared_future param15) -> hpx::future { std::vector params = { param0.get(), param1.get(), param2.get(), param3.get(), @@ -660,9 +630,7 @@ void _dfr_create_async_task(wfnptr wfn, size_t num_params, size_t num_outputs, mlir::concretelang::dfr::OpaqueInputData oid( wfnname, params, param_sizes, param_types, output_sizes, output_types); - return mlir::concretelang::dfr::gcc - [_dfr_find_next_execution_locality()] - .execute_task(oid); + return gcc_target->execute_task(oid); }, *((dfr_refcounted_future_p)refcounted_futures[0])->future, *((dfr_refcounted_future_p)refcounted_futures[1])->future, @@ -684,24 +652,24 @@ void _dfr_create_async_task(wfnptr wfn, size_t num_params, size_t num_outputs, case 17: oodf = std::move(hpx::dataflow( - [wfnname, param_sizes, param_types, output_sizes, - output_types](hpx::shared_future param0, - hpx::shared_future param1, - hpx::shared_future param2, - hpx::shared_future param3, - hpx::shared_future param4, - hpx::shared_future param5, - hpx::shared_future param6, - hpx::shared_future param7, - hpx::shared_future param8, - hpx::shared_future param9, - hpx::shared_future param10, - hpx::shared_future param11, - hpx::shared_future param12, - hpx::shared_future param13, - hpx::shared_future param14, - hpx::shared_future param15, - hpx::shared_future param16) + [wfnname, param_sizes, param_types, output_sizes, output_types, + gcc_target](hpx::shared_future param0, + hpx::shared_future param1, + hpx::shared_future param2, + hpx::shared_future param3, + hpx::shared_future param4, + hpx::shared_future param5, + hpx::shared_future param6, + hpx::shared_future param7, + hpx::shared_future param8, + hpx::shared_future param9, + hpx::shared_future param10, + hpx::shared_future param11, + hpx::shared_future param12, + hpx::shared_future param13, + hpx::shared_future param14, + hpx::shared_future param15, + hpx::shared_future param16) -> hpx::future { std::vector params = { param0.get(), param1.get(), param2.get(), param3.get(), @@ -712,9 +680,7 @@ void _dfr_create_async_task(wfnptr wfn, size_t num_params, size_t num_outputs, mlir::concretelang::dfr::OpaqueInputData oid( wfnname, params, param_sizes, param_types, output_sizes, output_types); - return mlir::concretelang::dfr::gcc - [_dfr_find_next_execution_locality()] - .execute_task(oid); + return gcc_target->execute_task(oid); }, *((dfr_refcounted_future_p)refcounted_futures[0])->future, *((dfr_refcounted_future_p)refcounted_futures[1])->future, @@ -737,25 +703,25 @@ void _dfr_create_async_task(wfnptr wfn, size_t num_params, size_t num_outputs, case 18: oodf = std::move(hpx::dataflow( - [wfnname, param_sizes, param_types, output_sizes, - output_types](hpx::shared_future param0, - hpx::shared_future param1, - hpx::shared_future param2, - hpx::shared_future param3, - hpx::shared_future param4, - hpx::shared_future param5, - hpx::shared_future param6, - hpx::shared_future param7, - hpx::shared_future param8, - hpx::shared_future param9, - hpx::shared_future param10, - hpx::shared_future param11, - hpx::shared_future param12, - hpx::shared_future param13, - hpx::shared_future param14, - hpx::shared_future param15, - hpx::shared_future param16, - hpx::shared_future param17) + [wfnname, param_sizes, param_types, output_sizes, output_types, + gcc_target](hpx::shared_future param0, + hpx::shared_future param1, + hpx::shared_future param2, + hpx::shared_future param3, + hpx::shared_future param4, + hpx::shared_future param5, + hpx::shared_future param6, + hpx::shared_future param7, + hpx::shared_future param8, + hpx::shared_future param9, + hpx::shared_future param10, + hpx::shared_future param11, + hpx::shared_future param12, + hpx::shared_future param13, + hpx::shared_future param14, + hpx::shared_future param15, + hpx::shared_future param16, + hpx::shared_future param17) -> hpx::future { std::vector params = { param0.get(), param1.get(), param2.get(), param3.get(), @@ -766,9 +732,7 @@ void _dfr_create_async_task(wfnptr wfn, size_t num_params, size_t num_outputs, mlir::concretelang::dfr::OpaqueInputData oid( wfnname, params, param_sizes, param_types, output_sizes, output_types); - return mlir::concretelang::dfr::gcc - [_dfr_find_next_execution_locality()] - .execute_task(oid); + return gcc_target->execute_task(oid); }, *((dfr_refcounted_future_p)refcounted_futures[0])->future, *((dfr_refcounted_future_p)refcounted_futures[1])->future, @@ -792,26 +756,26 @@ void _dfr_create_async_task(wfnptr wfn, size_t num_params, size_t num_outputs, case 19: oodf = std::move(hpx::dataflow( - [wfnname, param_sizes, param_types, output_sizes, - output_types](hpx::shared_future param0, - hpx::shared_future param1, - hpx::shared_future param2, - hpx::shared_future param3, - hpx::shared_future param4, - hpx::shared_future param5, - hpx::shared_future param6, - hpx::shared_future param7, - hpx::shared_future param8, - hpx::shared_future param9, - hpx::shared_future param10, - hpx::shared_future param11, - hpx::shared_future param12, - hpx::shared_future param13, - hpx::shared_future param14, - hpx::shared_future param15, - hpx::shared_future param16, - hpx::shared_future param17, - hpx::shared_future param18) + [wfnname, param_sizes, param_types, output_sizes, output_types, + gcc_target](hpx::shared_future param0, + hpx::shared_future param1, + hpx::shared_future param2, + hpx::shared_future param3, + hpx::shared_future param4, + hpx::shared_future param5, + hpx::shared_future param6, + hpx::shared_future param7, + hpx::shared_future param8, + hpx::shared_future param9, + hpx::shared_future param10, + hpx::shared_future param11, + hpx::shared_future param12, + hpx::shared_future param13, + hpx::shared_future param14, + hpx::shared_future param15, + hpx::shared_future param16, + hpx::shared_future param17, + hpx::shared_future param18) -> hpx::future { std::vector params = { param0.get(), param1.get(), param2.get(), param3.get(), @@ -822,9 +786,7 @@ void _dfr_create_async_task(wfnptr wfn, size_t num_params, size_t num_outputs, mlir::concretelang::dfr::OpaqueInputData oid( wfnname, params, param_sizes, param_types, output_sizes, output_types); - return mlir::concretelang::dfr::gcc - [_dfr_find_next_execution_locality()] - .execute_task(oid); + return gcc_target->execute_task(oid); }, *((dfr_refcounted_future_p)refcounted_futures[0])->future, *((dfr_refcounted_future_p)refcounted_futures[1])->future, @@ -849,27 +811,27 @@ void _dfr_create_async_task(wfnptr wfn, size_t num_params, size_t num_outputs, case 20: oodf = std::move(hpx::dataflow( - [wfnname, param_sizes, param_types, output_sizes, - output_types](hpx::shared_future param0, - hpx::shared_future param1, - hpx::shared_future param2, - hpx::shared_future param3, - hpx::shared_future param4, - hpx::shared_future param5, - hpx::shared_future param6, - hpx::shared_future param7, - hpx::shared_future param8, - hpx::shared_future param9, - hpx::shared_future param10, - hpx::shared_future param11, - hpx::shared_future param12, - hpx::shared_future param13, - hpx::shared_future param14, - hpx::shared_future param15, - hpx::shared_future param16, - hpx::shared_future param17, - hpx::shared_future param18, - hpx::shared_future param19) + [wfnname, param_sizes, param_types, output_sizes, output_types, + gcc_target](hpx::shared_future param0, + hpx::shared_future param1, + hpx::shared_future param2, + hpx::shared_future param3, + hpx::shared_future param4, + hpx::shared_future param5, + hpx::shared_future param6, + hpx::shared_future param7, + hpx::shared_future param8, + hpx::shared_future param9, + hpx::shared_future param10, + hpx::shared_future param11, + hpx::shared_future param12, + hpx::shared_future param13, + hpx::shared_future param14, + hpx::shared_future param15, + hpx::shared_future param16, + hpx::shared_future param17, + hpx::shared_future param18, + hpx::shared_future param19) -> hpx::future { std::vector params = { param0.get(), param1.get(), param2.get(), param3.get(), @@ -880,9 +842,7 @@ void _dfr_create_async_task(wfnptr wfn, size_t num_params, size_t num_outputs, mlir::concretelang::dfr::OpaqueInputData oid( wfnname, params, param_sizes, param_types, output_sizes, output_types); - return mlir::concretelang::dfr::gcc - [_dfr_find_next_execution_locality()] - .execute_task(oid); + return gcc_target->execute_task(oid); }, *((dfr_refcounted_future_p)refcounted_futures[0])->future, *((dfr_refcounted_future_p)refcounted_futures[1])->future, @@ -1146,17 +1106,13 @@ static inline void _dfr_start_impl(int argc, char *argv[]) { (hpx::find_here() == hpx::find_root_locality()); mlir::concretelang::dfr::num_nodes = hpx::get_num_localities().get(); - new mlir::concretelang::dfr::KeyManager(); - new mlir::concretelang::dfr::KeyManager(); - new mlir::concretelang::dfr::RuntimeContextManager(); new mlir::concretelang::dfr::WorkFunctionRegistry(); - mlir::concretelang::dfr::_dfr_jit_workfunction_registration_barrier = - new hpx::lcos::barrier("wait_register_remote_work_functions", - mlir::concretelang::dfr::num_nodes, - hpx::get_locality_id()); mlir::concretelang::dfr::_dfr_jit_phase_barrier = new hpx::lcos::barrier( "phase_barrier", mlir::concretelang::dfr::num_nodes, hpx::get_locality_id()); + mlir::concretelang::dfr::_dfr_startup_barrier = new hpx::lcos::barrier( + "startup_barrier", mlir::concretelang::dfr::num_nodes, + hpx::get_locality_id()); if (mlir::concretelang::dfr::_dfr_is_root_node()) { // Create compute server components on each node - from the root @@ -1197,14 +1153,21 @@ void _dfr_start() { if (!mlir::concretelang::dfr::_dfr_is_root_node() && !mlir::concretelang::dfr::_dfr_is_jit()) _dfr_stop_impl(); +} - // TODO: conditional -- If this is the root node, and this is JIT - // execution, we need to wait for the compute nodes to compile and - // register work functions +// Startup entry point when a RuntimeContext is used +void _dfr_start_c(void *ctx) { + _dfr_start(); + + new mlir::concretelang::dfr::RuntimeContextManager(); + mlir::concretelang::dfr::_dfr_node_level_runtime_context_manager->setContext( + ctx); + + // If this is not JIT, then the remote nodes never reach _dfr_stop, + // so root should not instantiate this barrier. if (mlir::concretelang::dfr::_dfr_is_root_node() && - mlir::concretelang::dfr::_dfr_is_jit()) { - mlir::concretelang::dfr::_dfr_jit_workfunction_registration_barrier->wait(); - } + mlir::concretelang::dfr::_dfr_is_jit()) + mlir::concretelang::dfr::_dfr_startup_barrier->wait(); } // This function cannot be used to terminate the runtime as it is @@ -1213,13 +1176,9 @@ void _dfr_start() { // called on exit from "main" when not using the main wrapper library. void _dfr_stop() { // Non-root nodes synchronize here with the root to mark the point - // where the root is free to send work out. - // TODO: optimize this by moving synchro to local remote nodes - // waiting in the scheduler for registration. - if (!mlir::concretelang::dfr:: - _dfr_is_root_node() /*&& _dfr_is_jit() /** implicitly true*/) { - mlir::concretelang::dfr::_dfr_jit_workfunction_registration_barrier->wait(); - } + // where the root is free to send work out (only needed in JIT). + if (!mlir::concretelang::dfr::_dfr_is_root_node()) + mlir::concretelang::dfr::_dfr_startup_barrier->wait(); // The barrier is only needed to synchronize the different // computation phases when the compute nodes need to generate and @@ -1234,10 +1193,6 @@ void _dfr_stop() { mlir::concretelang::dfr::_dfr_jit_phase_barrier->wait(); } - // TODO: until we have better unique identifiers for keys it is - // safer to drop them in-between phases. - mlir::concretelang::dfr::_dfr_node_level_bsk_manager->clear_keys(); - mlir::concretelang::dfr::_dfr_node_level_ksk_manager->clear_keys(); mlir::concretelang::dfr::_dfr_node_level_runtime_context_manager ->clearContext(); }