fix(dfr): broadcast evaluation keys early to avoid locking in HPX helper threads.

This commit is contained in:
Antoniu Pop
2022-07-22 15:49:54 +01:00
committed by Antoniu Pop
parent fbca52f4a0
commit 1bb3d04059
5 changed files with 377 additions and 551 deletions

View File

@@ -69,21 +69,19 @@ struct OpaqueInputData {
std::vector<size_t> _param_sizes,
std::vector<uint64_t> _param_types,
std::vector<size_t> _output_sizes,
std::vector<uint64_t> _output_types, bool _alloc_p = false)
std::vector<uint64_t> _output_types)
: wfn_name(_wfn_name), params(std::move(_params)),
param_sizes(std::move(_param_sizes)),
param_types(std::move(_param_types)),
output_sizes(std::move(_output_sizes)),
output_types(std::move(_output_types)), alloc_p(_alloc_p),
source_locality(hpx::find_here()), ksk_id(0), bsk_id(0) {}
output_types(std::move(_output_types)), ksk_id(0), bsk_id(0) {}
OpaqueInputData(const OpaqueInputData &oid)
: wfn_name(std::move(oid.wfn_name)), params(std::move(oid.params)),
param_sizes(std::move(oid.param_sizes)),
param_types(std::move(oid.param_types)),
output_sizes(std::move(oid.output_sizes)),
output_types(std::move(oid.output_types)), alloc_p(oid.alloc_p),
source_locality(oid.source_locality), ksk_id(oid.ksk_id),
output_types(std::move(oid.output_types)), ksk_id(oid.ksk_id),
bsk_id(oid.bsk_id) {}
friend class hpx::serialization::access;
@@ -91,7 +89,6 @@ struct OpaqueInputData {
ar >> wfn_name;
ar >> param_sizes >> param_types;
ar >> output_sizes >> output_types;
ar >> source_locality;
for (size_t p = 0; p < param_sizes.size(); ++p) {
char *param;
_dfr_checked_aligned_alloc((void **)&param, 64, param_sizes[p]);
@@ -118,15 +115,13 @@ struct OpaqueInputData {
static_cast<StridedMemRefType<char, 1> *>(params[p])->data = data;
} break;
case _DFR_TASK_ARG_CONTEXT: {
ar >> bsk_id >> ksk_id;
// The copied pointer is meaningless - TODO: if the context
// can change dynamically (e.g., different evaluation keys)
// then this needs updating by passing key ids and retrieving
// adequate keys for the context.
delete ((char *)params[p]);
// TODO: this might be relaxed with newer versions of HPX.
// Do not set the context here as remote operations are
// unstable when initiated within a HPX helper thread.
params[p] =
(void *)
_dfr_node_level_runtime_context_manager->getContextAddress();
(void *)_dfr_node_level_runtime_context_manager->getContext();
} break;
case _DFR_TASK_ARG_UNRANKED_MEMREF:
default:
@@ -134,14 +129,12 @@ struct OpaqueInputData {
"Error: invalid task argument type.");
}
}
alloc_p = true;
}
template <class Archive>
void save(Archive &ar, const unsigned int version) const {
ar << wfn_name;
ar << param_sizes << param_types;
ar << output_sizes << output_types;
ar << source_locality;
for (size_t p = 0; p < params.size(); ++p) {
// Save the first level of the data structure - if the parameter
// is a tensor/memref, there is a second level.
@@ -161,18 +154,8 @@ struct OpaqueInputData {
mref.data + mref.offset * elementSize, size * elementSize);
} break;
case _DFR_TASK_ARG_CONTEXT: {
mlir::concretelang::RuntimeContext *context =
*static_cast<mlir::concretelang::RuntimeContext **>(params[p]);
LweKeyswitchKey_u64 *ksk = get_keyswitch_key_u64(context);
LweBootstrapKey_u64 *bsk = get_bootstrap_key_u64(context);
assert(bsk != nullptr && ksk != nullptr && "Missing context keys");
std::cout << "Registering Key ids " << (uint64_t)ksk << " "
<< (uint64_t)bsk << "\n"
<< std::flush;
_dfr_register_bsk(bsk, (uint64_t)bsk);
_dfr_register_ksk(ksk, (uint64_t)ksk);
ar << (uint64_t)bsk << (uint64_t)ksk;
// Nothing to do now - TODO: pass key ids if these are not
// unique for a computation.
} break;
case _DFR_TASK_ARG_UNRANKED_MEMREF:
default:
@@ -189,8 +172,6 @@ struct OpaqueInputData {
std::vector<uint64_t> param_types;
std::vector<size_t> output_sizes;
std::vector<uint64_t> output_types;
bool alloc_p = false;
hpx::naming::id_type source_locality;
uint64_t ksk_id;
uint64_t bsk_id;
};
@@ -199,13 +180,13 @@ struct OpaqueOutputData {
OpaqueOutputData() = default;
OpaqueOutputData(std::vector<void *> outputs,
std::vector<size_t> output_sizes,
std::vector<uint64_t> output_types, bool alloc_p = false)
std::vector<uint64_t> output_types)
: outputs(std::move(outputs)), output_sizes(std::move(output_sizes)),
output_types(std::move(output_types)), alloc_p(alloc_p) {}
output_types(std::move(output_types)) {}
OpaqueOutputData(const OpaqueOutputData &ood)
: outputs(std::move(ood.outputs)),
output_sizes(std::move(ood.output_sizes)),
output_types(std::move(ood.output_types)), alloc_p(ood.alloc_p) {}
output_types(std::move(ood.output_types)) {}
friend class hpx::serialization::access;
template <class Archive> void load(Archive &ar, const unsigned int version) {
@@ -246,7 +227,6 @@ struct OpaqueOutputData {
"Error: invalid task argument type.");
}
}
alloc_p = true;
}
template <class Archive>
void save(Archive &ar, const unsigned int version) const {
@@ -283,7 +263,6 @@ struct OpaqueOutputData {
std::vector<void *> outputs;
std::vector<size_t> output_sizes;
std::vector<uint64_t> output_types;
bool alloc_p = false;
};
struct GenericComputeServer : component_base<GenericComputeServer> {
@@ -295,12 +274,6 @@ struct GenericComputeServer : component_base<GenericComputeServer> {
inputs.wfn_name);
std::vector<void *> outputs;
if (inputs.source_locality != hpx::find_here() &&
(inputs.ksk_id || inputs.bsk_id)) {
_dfr_node_level_runtime_context_manager->getContext(
inputs.ksk_id, inputs.bsk_id, inputs.source_locality);
}
_dfr_debug_print_task(inputs.wfn_name.c_str(), inputs.params.size(),
inputs.output_sizes.size());
hpx::cout << std::flush;
@@ -735,7 +708,7 @@ struct GenericComputeServer : component_base<GenericComputeServer> {
}
return OpaqueOutputData(std::move(outputs), std::move(inputs.output_sizes),
std::move(inputs.output_types), inputs.alloc_p);
std::move(inputs.output_types));
}
HPX_DEFINE_COMPONENT_ACTION(GenericComputeServer, execute_task);

View File

@@ -29,14 +29,9 @@ template <typename T> struct KeyManager;
struct RuntimeContextManager;
namespace {
static void *dl_handle;
static KeyManager<LweBootstrapKey_u64> *_dfr_node_level_bsk_manager;
static KeyManager<LweKeyswitchKey_u64> *_dfr_node_level_ksk_manager;
static RuntimeContextManager *_dfr_node_level_runtime_context_manager;
} // namespace
void _dfr_register_bsk(LweBootstrapKey_u64 *key, uint64_t key_id);
void _dfr_register_ksk(LweKeyswitchKey_u64 *key, uint64_t key_id);
template <typename LweKeyType> struct KeyWrapper {
LweKeyType *key;
@@ -44,6 +39,10 @@ template <typename LweKeyType> struct KeyWrapper {
KeyWrapper(LweKeyType *key) : key(key) {}
KeyWrapper(KeyWrapper &&moved) noexcept : key(moved.key) {}
KeyWrapper(const KeyWrapper &kw) : key(kw.key) {}
KeyWrapper &operator=(const KeyWrapper &rhs) {
this->key = rhs.key;
return *this;
}
friend class hpx::serialization::access;
template <class Archive>
void save(Archive &ar, const unsigned int version) const;
@@ -51,6 +50,12 @@ template <typename LweKeyType> struct KeyWrapper {
HPX_SERIALIZATION_SPLIT_MEMBER()
};
template <typename LweKeyType>
bool operator==(const KeyWrapper<LweKeyType> &lhs,
const KeyWrapper<LweKeyType> &rhs) {
return lhs.key == rhs.key;
}
template <>
template <class Archive>
void KeyWrapper<LweBootstrapKey_u64>::save(Archive &ar,
@@ -91,137 +96,6 @@ void KeyWrapper<LweKeyswitchKey_u64>::load(Archive &ar,
key = deserialize_lwe_keyswitching_key_u64(buffer);
}
template <typename LweKeyType> struct KeyManager {
KeyManager() {}
LweKeyType *get_key(hpx::naming::id_type loc, const uint64_t key_id);
KeyWrapper<LweKeyType> fetch_key(const uint64_t key_id) {
std::lock_guard<std::mutex> guard(keystore_guard);
auto keyit = keystore.find(key_id);
if (keyit != keystore.end())
return keyit->second;
// If this node does not contain this key, this is an error
// (location was supplied as source for this key).
HPX_THROW_EXCEPTION(
hpx::no_success, "fetch_key",
"Error: could not find key to be fetched on source location.");
}
void register_key(LweKeyType *key, uint64_t key_id) {
std::lock_guard<std::mutex> guard(keystore_guard);
auto keyit = keystore.find(key_id);
if (keyit == keystore.end()) {
keyit = keystore
.insert(std::pair<uint64_t, KeyWrapper<LweKeyType>>(
key_id, KeyWrapper<LweKeyType>(key)))
.first;
if (keyit == keystore.end()) {
HPX_THROW_EXCEPTION(hpx::no_success, "_dfr_register_key",
"Error: could not register new key.");
}
}
}
void clear_keys() {
std::lock_guard<std::mutex> guard(keystore_guard);
keystore.clear();
}
private:
std::mutex keystore_guard;
std::map<uint64_t, KeyWrapper<LweKeyType>> keystore;
};
KeyWrapper<LweBootstrapKey_u64> _dfr_fetch_bsk(uint64_t key_id) {
return _dfr_node_level_bsk_manager->fetch_key(key_id);
}
KeyWrapper<LweKeyswitchKey_u64> _dfr_fetch_ksk(uint64_t key_id) {
return _dfr_node_level_ksk_manager->fetch_key(key_id);
}
} // namespace dfr
} // namespace concretelang
} // namespace mlir
HPX_PLAIN_ACTION(mlir::concretelang::dfr::_dfr_fetch_ksk, _dfr_fetch_ksk_action)
HPX_PLAIN_ACTION(mlir::concretelang::dfr::_dfr_fetch_bsk, _dfr_fetch_bsk_action)
namespace mlir {
namespace concretelang {
namespace dfr {
template <> KeyManager<LweBootstrapKey_u64>::KeyManager() {
_dfr_node_level_bsk_manager = this;
}
template <>
LweBootstrapKey_u64 *
KeyManager<LweBootstrapKey_u64>::get_key(hpx::naming::id_type loc,
const uint64_t key_id) {
keystore_guard.lock();
auto keyit = keystore.find(key_id);
keystore_guard.unlock();
if (keyit == keystore.end()) {
_dfr_fetch_bsk_action fetch;
KeyWrapper<LweBootstrapKey_u64> &&bskw = fetch(loc, key_id);
if (bskw.key == nullptr) {
HPX_THROW_EXCEPTION(hpx::no_success, "_dfr_get_key",
"Error: Bootstrap key not found on root node.");
} else {
_dfr_register_bsk(bskw.key, key_id);
}
return bskw.key;
}
return keyit->second.key;
}
template <> KeyManager<LweKeyswitchKey_u64>::KeyManager() {
_dfr_node_level_ksk_manager = this;
}
template <>
LweKeyswitchKey_u64 *
KeyManager<LweKeyswitchKey_u64>::get_key(hpx::naming::id_type loc,
const uint64_t key_id) {
keystore_guard.lock();
auto keyit = keystore.find(key_id);
keystore_guard.unlock();
if (keyit == keystore.end()) {
_dfr_fetch_ksk_action fetch;
KeyWrapper<LweKeyswitchKey_u64> &&kskw = fetch(loc, key_id);
if (kskw.key == nullptr) {
HPX_THROW_EXCEPTION(hpx::no_success, "_dfr_get_key",
"Error: Keyswitching key not found on root node.");
} else {
_dfr_register_ksk(kskw.key, key_id);
}
return kskw.key;
}
return keyit->second.key;
}
/************************/
/* Key management API. */
/************************/
void _dfr_register_bsk(LweBootstrapKey_u64 *key, uint64_t key_id) {
_dfr_node_level_bsk_manager->register_key(key, key_id);
}
void _dfr_register_ksk(LweKeyswitchKey_u64 *key, uint64_t key_id) {
_dfr_node_level_ksk_manager->register_key(key, key_id);
}
LweBootstrapKey_u64 *_dfr_get_bsk(hpx::naming::id_type loc, uint64_t key_id) {
return _dfr_node_level_bsk_manager->get_key(loc, key_id);
}
LweKeyswitchKey_u64 *_dfr_get_ksk(hpx::naming::id_type loc, uint64_t key_id) {
return _dfr_node_level_ksk_manager->get_key(loc, key_id);
}
/************************/
/* Context management. */
/************************/
@@ -230,58 +104,52 @@ struct RuntimeContextManager {
// TODO: this is only ok so long as we don't change keys. Once we
// use multiple keys, should have a map.
RuntimeContext *context;
std::mutex context_guard;
uint64_t ksk_id;
uint64_t bsk_id;
RuntimeContextManager() {
ksk_id = 0;
bsk_id = 0;
context = nullptr;
_dfr_node_level_runtime_context_manager = this;
}
RuntimeContext *getContext(uint64_t ksk, uint64_t bsk,
hpx::naming::id_type source_locality) {
std::cout << "GetContext on node " << hpx::get_locality_id()
<< " with context " << context << " " << bsk_id << " " << ksk_id
<< "\n"
<< std::flush;
if (context != nullptr) {
std::cout << "simil " << ksk_id << " " << ksk << " " << bsk_id << " "
<< bsk << "\n"
<< std::flush;
assert(ksk == ksk_id && bsk == bsk_id &&
"Context manager can only used with single keys for now.");
void setContext(void *ctx) {
assert(context == nullptr &&
"Only one RuntimeContext can be used at a time.");
// Root node broadcasts the evaluation keys and each remote
// instantiates a local RuntimeContext.
if (_dfr_is_root_node()) {
RuntimeContext *context = (RuntimeContext *)ctx;
LweKeyswitchKey_u64 *ksk = get_keyswitch_key_u64(context);
LweBootstrapKey_u64 *bsk = get_bootstrap_key_u64(context);
auto kskFut = hpx::collectives::broadcast_to(
"ksk_keystore", KeyWrapper<LweKeyswitchKey_u64>(ksk));
auto bskFut = hpx::collectives::broadcast_to(
"bsk_keystore", KeyWrapper<LweBootstrapKey_u64>(bsk));
} else {
assert(ksk_id == 0 && bsk_id == 0 &&
"Context empty but context manager has key ids.");
LweKeyswitchKey_u64 *keySwitchKey = _dfr_get_ksk(source_locality, ksk);
LweBootstrapKey_u64 *bootstrapKey = _dfr_get_bsk(source_locality, bsk);
std::lock_guard<std::mutex> guard(context_guard);
if (context == nullptr) {
auto ctx = new RuntimeContext();
ctx->evaluationKeys = ::concretelang::clientlib::EvaluationKeys(
std::shared_ptr<::concretelang::clientlib::LweKeyswitchKey>(
new ::concretelang::clientlib::LweKeyswitchKey(keySwitchKey)),
std::shared_ptr<::concretelang::clientlib::LweBootstrapKey>(
new ::concretelang::clientlib::LweBootstrapKey(bootstrapKey)));
ksk_id = ksk;
bsk_id = bsk;
context = ctx;
std::cout << "Fetching Key ids " << ksk_id << " " << bsk_id << "\n"
<< std::flush;
} else {
std::cout << " GOT context after LOCK on node "
<< hpx::get_locality_id() << " with context " << context
<< " " << bsk_id << " " << ksk_id << "\n"
<< std::flush;
}
auto kskFut =
hpx::collectives::broadcast_from<KeyWrapper<LweKeyswitchKey_u64>>(
"ksk_keystore");
auto bskFut =
hpx::collectives::broadcast_from<KeyWrapper<LweBootstrapKey_u64>>(
"bsk_keystore");
context = new mlir::concretelang::RuntimeContext();
context->evaluationKeys = ::concretelang::clientlib::EvaluationKeys(
std::shared_ptr<::concretelang::clientlib::LweKeyswitchKey>(
new ::concretelang::clientlib::LweKeyswitchKey(kskFut.get().key)),
std::shared_ptr<::concretelang::clientlib::LweBootstrapKey>(
new ::concretelang::clientlib::LweBootstrapKey(
bskFut.get().key)));
}
return context;
}
RuntimeContext **getContextAddress() { return &context; }
RuntimeContext **getContext() { return &context; }
void clearContext() {
if (context != nullptr)
delete context;
context = nullptr;
}
};
} // namespace dfr

View File

@@ -26,6 +26,7 @@ void _dfr_deallocate_future(void *);
void _dfr_deallocate_future_data(void *);
/* Initialisation & termination. */
void _dfr_start_c(void *);
void _dfr_start();
void _dfr_stop();

View File

@@ -5,6 +5,7 @@
#include <iostream>
#include <concretelang/Conversion/Tools.h>
#include <concretelang/Dialect/Concrete/IR/ConcreteDialect.h>
#include <concretelang/Dialect/Concrete/IR/ConcreteOps.h>
#include <concretelang/Dialect/Concrete/IR/ConcreteTypes.h>
@@ -422,6 +423,17 @@ struct LowerDataflowTasksPass
});
for (auto entryPoint : entryPoints) {
// Check if this entry point uses a context - do this before we
// remove arguments in remote nodes
int ctxIndex = -1;
for (auto arg : llvm::enumerate(entryPoint.getArguments()))
if (arg.value()
.getType()
.isa<mlir::concretelang::Concrete::ContextType>()) {
ctxIndex = arg.index();
break;
}
// If this is a JIT invocation and we're not on the root node,
// we do not need to do any computation, only register all work
// functions with the runtime system
@@ -455,20 +467,37 @@ struct LowerDataflowTasksPass
if (!workFunctions.empty()) {
OpBuilder builder(entryPoint.getBody());
builder.setInsertionPointToStart(&entryPoint.getBody().front());
auto dfrStartFunOp = mlir::LLVM::lookupOrCreateFn(
module, "_dfr_start", {},
LLVM::LLVMVoidType::get(entryPoint->getContext()));
builder.create<LLVM::CallOp>(entryPoint.getLoc(), dfrStartFunOp,
mlir::ValueRange(),
ArrayRef<NamedAttribute>());
if (ctxIndex >= 0) {
auto startFunTy =
(dfr::_dfr_is_root_node())
? mlir::FunctionType::get(
entryPoint->getContext(),
{entryPoint.getArgument(ctxIndex).getType()}, {})
: mlir::FunctionType::get(entryPoint->getContext(), {}, {});
(void)insertForwardDeclaration(entryPoint, builder, "_dfr_start_c",
startFunTy);
builder.create<mlir::func::CallOp>(
entryPoint.getLoc(), "_dfr_start_c", mlir::TypeRange(),
(dfr::_dfr_is_root_node()) ? entryPoint.getArgument(ctxIndex)
: mlir::ValueRange());
} else {
auto startFunTy =
mlir::FunctionType::get(entryPoint->getContext(), {}, {});
(void)insertForwardDeclaration(entryPoint, builder, "_dfr_start",
startFunTy);
builder.create<mlir::func::CallOp>(entryPoint.getLoc(), "_dfr_start",
mlir::TypeRange(),
mlir::ValueRange());
}
builder.setInsertionPoint(entryPoint.getBody().back().getTerminator());
auto dfrStopFunOp = mlir::LLVM::lookupOrCreateFn(
module, "_dfr_stop", {},
LLVM::LLVMVoidType::get(entryPoint->getContext()));
builder.create<LLVM::CallOp>(entryPoint.getLoc(), dfrStopFunOp,
mlir::ValueRange(),
ArrayRef<NamedAttribute>());
auto stopFunTy =
mlir::FunctionType::get(entryPoint->getContext(), {}, {});
(void)insertForwardDeclaration(entryPoint, builder, "_dfr_stop",
stopFunTy);
builder.create<mlir::func::CallOp>(entryPoint.getLoc(), "_dfr_stop",
mlir::TypeRange(),
mlir::ValueRange());
}
}
}

View File

@@ -28,8 +28,8 @@ namespace concretelang {
namespace dfr {
namespace {
static std::vector<GenericComputeClient> gcc;
static hpx::lcos::barrier *_dfr_jit_workfunction_registration_barrier;
static hpx::lcos::barrier *_dfr_jit_phase_barrier;
static hpx::lcos::barrier *_dfr_startup_barrier;
static size_t num_nodes = 0;
} // namespace
} // namespace dfr
@@ -93,7 +93,6 @@ static inline size_t _dfr_find_next_execution_locality() {
/// the returns.
void _dfr_create_async_task(wfnptr wfn, size_t num_params, size_t num_outputs,
...) {
// std::vector<void *> params;
std::vector<void *> refcounted_futures;
std::vector<size_t> param_sizes;
std::vector<uint64_t> param_types;
@@ -133,50 +132,47 @@ void _dfr_create_async_task(wfnptr wfn, size_t num_params, size_t num_outputs,
// satisfied, which generates a future on a tuple of outputs, which
// is then further split into a tuple of futures and provide
// individual synchronization for each return independently.
mlir::concretelang::dfr::GenericComputeClient *gcc_target =
&mlir::concretelang::dfr::gcc[_dfr_find_next_execution_locality()];
switch (num_params) {
case 0:
oodf = std::move(hpx::dataflow(
[wfnname, param_sizes, param_types, output_sizes, output_types]()
[wfnname, param_sizes, param_types, output_sizes, output_types,
gcc_target]()
-> hpx::future<mlir::concretelang::dfr::OpaqueOutputData> {
std::vector<void *> params = {};
mlir::concretelang::dfr::OpaqueInputData oid(
wfnname, params, param_sizes, param_types, output_sizes,
output_types);
return mlir::concretelang::dfr::gcc
[_dfr_find_next_execution_locality()]
.execute_task(oid);
return gcc_target->execute_task(oid);
}));
break;
case 1:
oodf = std::move(hpx::dataflow(
[wfnname, param_sizes, param_types, output_sizes,
output_types](hpx::shared_future<void *> param0)
[wfnname, param_sizes, param_types, output_sizes, output_types,
gcc_target](hpx::shared_future<void *> param0)
-> hpx::future<mlir::concretelang::dfr::OpaqueOutputData> {
std::vector<void *> params = {param0.get()};
mlir::concretelang::dfr::OpaqueInputData oid(
wfnname, params, param_sizes, param_types, output_sizes,
output_types);
return mlir::concretelang::dfr::gcc
[_dfr_find_next_execution_locality()]
.execute_task(oid);
return gcc_target->execute_task(oid);
},
*((dfr_refcounted_future_p)refcounted_futures[0])->future));
break;
case 2:
oodf = std::move(hpx::dataflow(
[wfnname, param_sizes, param_types, output_sizes,
output_types](hpx::shared_future<void *> param0,
hpx::shared_future<void *> param1)
[wfnname, param_sizes, param_types, output_sizes, output_types,
gcc_target](hpx::shared_future<void *> param0,
hpx::shared_future<void *> param1)
-> hpx::future<mlir::concretelang::dfr::OpaqueOutputData> {
std::vector<void *> params = {param0.get(), param1.get()};
mlir::concretelang::dfr::OpaqueInputData oid(
wfnname, params, param_sizes, param_types, output_sizes,
output_types);
return mlir::concretelang::dfr::gcc
[_dfr_find_next_execution_locality()]
.execute_task(oid);
return gcc_target->execute_task(oid);
},
*((dfr_refcounted_future_p)refcounted_futures[0])->future,
*((dfr_refcounted_future_p)refcounted_futures[1])->future));
@@ -184,19 +180,17 @@ void _dfr_create_async_task(wfnptr wfn, size_t num_params, size_t num_outputs,
case 3:
oodf = std::move(hpx::dataflow(
[wfnname, param_sizes, param_types, output_sizes,
output_types](hpx::shared_future<void *> param0,
hpx::shared_future<void *> param1,
hpx::shared_future<void *> param2)
[wfnname, param_sizes, param_types, output_sizes, output_types,
gcc_target](hpx::shared_future<void *> param0,
hpx::shared_future<void *> param1,
hpx::shared_future<void *> param2)
-> hpx::future<mlir::concretelang::dfr::OpaqueOutputData> {
std::vector<void *> params = {param0.get(), param1.get(),
param2.get()};
mlir::concretelang::dfr::OpaqueInputData oid(
wfnname, params, param_sizes, param_types, output_sizes,
output_types);
return mlir::concretelang::dfr::gcc
[_dfr_find_next_execution_locality()]
.execute_task(oid);
return gcc_target->execute_task(oid);
},
*((dfr_refcounted_future_p)refcounted_futures[0])->future,
*((dfr_refcounted_future_p)refcounted_futures[1])->future,
@@ -205,20 +199,18 @@ void _dfr_create_async_task(wfnptr wfn, size_t num_params, size_t num_outputs,
case 4:
oodf = std::move(hpx::dataflow(
[wfnname, param_sizes, param_types, output_sizes,
output_types](hpx::shared_future<void *> param0,
hpx::shared_future<void *> param1,
hpx::shared_future<void *> param2,
hpx::shared_future<void *> param3)
[wfnname, param_sizes, param_types, output_sizes, output_types,
gcc_target](hpx::shared_future<void *> param0,
hpx::shared_future<void *> param1,
hpx::shared_future<void *> param2,
hpx::shared_future<void *> param3)
-> hpx::future<mlir::concretelang::dfr::OpaqueOutputData> {
std::vector<void *> params = {param0.get(), param1.get(),
param2.get(), param3.get()};
mlir::concretelang::dfr::OpaqueInputData oid(
wfnname, params, param_sizes, param_types, output_sizes,
output_types);
return mlir::concretelang::dfr::gcc
[_dfr_find_next_execution_locality()]
.execute_task(oid);
return gcc_target->execute_task(oid);
},
*((dfr_refcounted_future_p)refcounted_futures[0])->future,
*((dfr_refcounted_future_p)refcounted_futures[1])->future,
@@ -228,12 +220,12 @@ void _dfr_create_async_task(wfnptr wfn, size_t num_params, size_t num_outputs,
case 5:
oodf = std::move(hpx::dataflow(
[wfnname, param_sizes, param_types, output_sizes,
output_types](hpx::shared_future<void *> param0,
hpx::shared_future<void *> param1,
hpx::shared_future<void *> param2,
hpx::shared_future<void *> param3,
hpx::shared_future<void *> param4)
[wfnname, param_sizes, param_types, output_sizes, output_types,
gcc_target](hpx::shared_future<void *> param0,
hpx::shared_future<void *> param1,
hpx::shared_future<void *> param2,
hpx::shared_future<void *> param3,
hpx::shared_future<void *> param4)
-> hpx::future<mlir::concretelang::dfr::OpaqueOutputData> {
std::vector<void *> params = {param0.get(), param1.get(),
param2.get(), param3.get(),
@@ -241,9 +233,7 @@ void _dfr_create_async_task(wfnptr wfn, size_t num_params, size_t num_outputs,
mlir::concretelang::dfr::OpaqueInputData oid(
wfnname, params, param_sizes, param_types, output_sizes,
output_types);
return mlir::concretelang::dfr::gcc
[_dfr_find_next_execution_locality()]
.execute_task(oid);
return gcc_target->execute_task(oid);
},
*((dfr_refcounted_future_p)refcounted_futures[0])->future,
*((dfr_refcounted_future_p)refcounted_futures[1])->future,
@@ -254,13 +244,13 @@ void _dfr_create_async_task(wfnptr wfn, size_t num_params, size_t num_outputs,
case 6:
oodf = std::move(hpx::dataflow(
[wfnname, param_sizes, param_types, output_sizes,
output_types](hpx::shared_future<void *> param0,
hpx::shared_future<void *> param1,
hpx::shared_future<void *> param2,
hpx::shared_future<void *> param3,
hpx::shared_future<void *> param4,
hpx::shared_future<void *> param5)
[wfnname, param_sizes, param_types, output_sizes, output_types,
gcc_target](hpx::shared_future<void *> param0,
hpx::shared_future<void *> param1,
hpx::shared_future<void *> param2,
hpx::shared_future<void *> param3,
hpx::shared_future<void *> param4,
hpx::shared_future<void *> param5)
-> hpx::future<mlir::concretelang::dfr::OpaqueOutputData> {
std::vector<void *> params = {param0.get(), param1.get(),
param2.get(), param3.get(),
@@ -268,9 +258,7 @@ void _dfr_create_async_task(wfnptr wfn, size_t num_params, size_t num_outputs,
mlir::concretelang::dfr::OpaqueInputData oid(
wfnname, params, param_sizes, param_types, output_sizes,
output_types);
return mlir::concretelang::dfr::gcc
[_dfr_find_next_execution_locality()]
.execute_task(oid);
return gcc_target->execute_task(oid);
},
*((dfr_refcounted_future_p)refcounted_futures[0])->future,
*((dfr_refcounted_future_p)refcounted_futures[1])->future,
@@ -282,14 +270,14 @@ void _dfr_create_async_task(wfnptr wfn, size_t num_params, size_t num_outputs,
case 7:
oodf = std::move(hpx::dataflow(
[wfnname, param_sizes, param_types, output_sizes,
output_types](hpx::shared_future<void *> param0,
hpx::shared_future<void *> param1,
hpx::shared_future<void *> param2,
hpx::shared_future<void *> param3,
hpx::shared_future<void *> param4,
hpx::shared_future<void *> param5,
hpx::shared_future<void *> param6)
[wfnname, param_sizes, param_types, output_sizes, output_types,
gcc_target](hpx::shared_future<void *> param0,
hpx::shared_future<void *> param1,
hpx::shared_future<void *> param2,
hpx::shared_future<void *> param3,
hpx::shared_future<void *> param4,
hpx::shared_future<void *> param5,
hpx::shared_future<void *> param6)
-> hpx::future<mlir::concretelang::dfr::OpaqueOutputData> {
std::vector<void *> params = {
param0.get(), param1.get(), param2.get(), param3.get(),
@@ -297,9 +285,7 @@ void _dfr_create_async_task(wfnptr wfn, size_t num_params, size_t num_outputs,
mlir::concretelang::dfr::OpaqueInputData oid(
wfnname, params, param_sizes, param_types, output_sizes,
output_types);
return mlir::concretelang::dfr::gcc
[_dfr_find_next_execution_locality()]
.execute_task(oid);
return gcc_target->execute_task(oid);
},
*((dfr_refcounted_future_p)refcounted_futures[0])->future,
*((dfr_refcounted_future_p)refcounted_futures[1])->future,
@@ -312,15 +298,15 @@ void _dfr_create_async_task(wfnptr wfn, size_t num_params, size_t num_outputs,
case 8:
oodf = std::move(hpx::dataflow(
[wfnname, param_sizes, param_types, output_sizes,
output_types](hpx::shared_future<void *> param0,
hpx::shared_future<void *> param1,
hpx::shared_future<void *> param2,
hpx::shared_future<void *> param3,
hpx::shared_future<void *> param4,
hpx::shared_future<void *> param5,
hpx::shared_future<void *> param6,
hpx::shared_future<void *> param7)
[wfnname, param_sizes, param_types, output_sizes, output_types,
gcc_target](hpx::shared_future<void *> param0,
hpx::shared_future<void *> param1,
hpx::shared_future<void *> param2,
hpx::shared_future<void *> param3,
hpx::shared_future<void *> param4,
hpx::shared_future<void *> param5,
hpx::shared_future<void *> param6,
hpx::shared_future<void *> param7)
-> hpx::future<mlir::concretelang::dfr::OpaqueOutputData> {
std::vector<void *> params = {
param0.get(), param1.get(), param2.get(), param3.get(),
@@ -328,9 +314,7 @@ void _dfr_create_async_task(wfnptr wfn, size_t num_params, size_t num_outputs,
mlir::concretelang::dfr::OpaqueInputData oid(
wfnname, params, param_sizes, param_types, output_sizes,
output_types);
return mlir::concretelang::dfr::gcc
[_dfr_find_next_execution_locality()]
.execute_task(oid);
return gcc_target->execute_task(oid);
},
*((dfr_refcounted_future_p)refcounted_futures[0])->future,
*((dfr_refcounted_future_p)refcounted_futures[1])->future,
@@ -344,16 +328,16 @@ void _dfr_create_async_task(wfnptr wfn, size_t num_params, size_t num_outputs,
case 9:
oodf = std::move(hpx::dataflow(
[wfnname, param_sizes, param_types, output_sizes,
output_types](hpx::shared_future<void *> param0,
hpx::shared_future<void *> param1,
hpx::shared_future<void *> param2,
hpx::shared_future<void *> param3,
hpx::shared_future<void *> param4,
hpx::shared_future<void *> param5,
hpx::shared_future<void *> param6,
hpx::shared_future<void *> param7,
hpx::shared_future<void *> param8)
[wfnname, param_sizes, param_types, output_sizes, output_types,
gcc_target](hpx::shared_future<void *> param0,
hpx::shared_future<void *> param1,
hpx::shared_future<void *> param2,
hpx::shared_future<void *> param3,
hpx::shared_future<void *> param4,
hpx::shared_future<void *> param5,
hpx::shared_future<void *> param6,
hpx::shared_future<void *> param7,
hpx::shared_future<void *> param8)
-> hpx::future<mlir::concretelang::dfr::OpaqueOutputData> {
std::vector<void *> params = {
param0.get(), param1.get(), param2.get(),
@@ -362,9 +346,7 @@ void _dfr_create_async_task(wfnptr wfn, size_t num_params, size_t num_outputs,
mlir::concretelang::dfr::OpaqueInputData oid(
wfnname, params, param_sizes, param_types, output_sizes,
output_types);
return mlir::concretelang::dfr::gcc
[_dfr_find_next_execution_locality()]
.execute_task(oid);
return gcc_target->execute_task(oid);
},
*((dfr_refcounted_future_p)refcounted_futures[0])->future,
*((dfr_refcounted_future_p)refcounted_futures[1])->future,
@@ -379,17 +361,17 @@ void _dfr_create_async_task(wfnptr wfn, size_t num_params, size_t num_outputs,
case 10:
oodf = std::move(hpx::dataflow(
[wfnname, param_sizes, param_types, output_sizes,
output_types](hpx::shared_future<void *> param0,
hpx::shared_future<void *> param1,
hpx::shared_future<void *> param2,
hpx::shared_future<void *> param3,
hpx::shared_future<void *> param4,
hpx::shared_future<void *> param5,
hpx::shared_future<void *> param6,
hpx::shared_future<void *> param7,
hpx::shared_future<void *> param8,
hpx::shared_future<void *> param9)
[wfnname, param_sizes, param_types, output_sizes, output_types,
gcc_target](hpx::shared_future<void *> param0,
hpx::shared_future<void *> param1,
hpx::shared_future<void *> param2,
hpx::shared_future<void *> param3,
hpx::shared_future<void *> param4,
hpx::shared_future<void *> param5,
hpx::shared_future<void *> param6,
hpx::shared_future<void *> param7,
hpx::shared_future<void *> param8,
hpx::shared_future<void *> param9)
-> hpx::future<mlir::concretelang::dfr::OpaqueOutputData> {
std::vector<void *> params = {
param0.get(), param1.get(), param2.get(), param3.get(),
@@ -398,9 +380,7 @@ void _dfr_create_async_task(wfnptr wfn, size_t num_params, size_t num_outputs,
mlir::concretelang::dfr::OpaqueInputData oid(
wfnname, params, param_sizes, param_types, output_sizes,
output_types);
return mlir::concretelang::dfr::gcc
[_dfr_find_next_execution_locality()]
.execute_task(oid);
return gcc_target->execute_task(oid);
},
*((dfr_refcounted_future_p)refcounted_futures[0])->future,
*((dfr_refcounted_future_p)refcounted_futures[1])->future,
@@ -416,18 +396,18 @@ void _dfr_create_async_task(wfnptr wfn, size_t num_params, size_t num_outputs,
case 11:
oodf = std::move(hpx::dataflow(
[wfnname, param_sizes, param_types, output_sizes,
output_types](hpx::shared_future<void *> param0,
hpx::shared_future<void *> param1,
hpx::shared_future<void *> param2,
hpx::shared_future<void *> param3,
hpx::shared_future<void *> param4,
hpx::shared_future<void *> param5,
hpx::shared_future<void *> param6,
hpx::shared_future<void *> param7,
hpx::shared_future<void *> param8,
hpx::shared_future<void *> param9,
hpx::shared_future<void *> param10)
[wfnname, param_sizes, param_types, output_sizes, output_types,
gcc_target](hpx::shared_future<void *> param0,
hpx::shared_future<void *> param1,
hpx::shared_future<void *> param2,
hpx::shared_future<void *> param3,
hpx::shared_future<void *> param4,
hpx::shared_future<void *> param5,
hpx::shared_future<void *> param6,
hpx::shared_future<void *> param7,
hpx::shared_future<void *> param8,
hpx::shared_future<void *> param9,
hpx::shared_future<void *> param10)
-> hpx::future<mlir::concretelang::dfr::OpaqueOutputData> {
std::vector<void *> params = {
param0.get(), param1.get(), param2.get(), param3.get(),
@@ -436,9 +416,7 @@ void _dfr_create_async_task(wfnptr wfn, size_t num_params, size_t num_outputs,
mlir::concretelang::dfr::OpaqueInputData oid(
wfnname, params, param_sizes, param_types, output_sizes,
output_types);
return mlir::concretelang::dfr::gcc
[_dfr_find_next_execution_locality()]
.execute_task(oid);
return gcc_target->execute_task(oid);
},
*((dfr_refcounted_future_p)refcounted_futures[0])->future,
*((dfr_refcounted_future_p)refcounted_futures[1])->future,
@@ -455,19 +433,19 @@ void _dfr_create_async_task(wfnptr wfn, size_t num_params, size_t num_outputs,
case 12:
oodf = std::move(hpx::dataflow(
[wfnname, param_sizes, param_types, output_sizes,
output_types](hpx::shared_future<void *> param0,
hpx::shared_future<void *> param1,
hpx::shared_future<void *> param2,
hpx::shared_future<void *> param3,
hpx::shared_future<void *> param4,
hpx::shared_future<void *> param5,
hpx::shared_future<void *> param6,
hpx::shared_future<void *> param7,
hpx::shared_future<void *> param8,
hpx::shared_future<void *> param9,
hpx::shared_future<void *> param10,
hpx::shared_future<void *> param11)
[wfnname, param_sizes, param_types, output_sizes, output_types,
gcc_target](hpx::shared_future<void *> param0,
hpx::shared_future<void *> param1,
hpx::shared_future<void *> param2,
hpx::shared_future<void *> param3,
hpx::shared_future<void *> param4,
hpx::shared_future<void *> param5,
hpx::shared_future<void *> param6,
hpx::shared_future<void *> param7,
hpx::shared_future<void *> param8,
hpx::shared_future<void *> param9,
hpx::shared_future<void *> param10,
hpx::shared_future<void *> param11)
-> hpx::future<mlir::concretelang::dfr::OpaqueOutputData> {
std::vector<void *> params = {
param0.get(), param1.get(), param2.get(), param3.get(),
@@ -476,9 +454,7 @@ void _dfr_create_async_task(wfnptr wfn, size_t num_params, size_t num_outputs,
mlir::concretelang::dfr::OpaqueInputData oid(
wfnname, params, param_sizes, param_types, output_sizes,
output_types);
return mlir::concretelang::dfr::gcc
[_dfr_find_next_execution_locality()]
.execute_task(oid);
return gcc_target->execute_task(oid);
},
*((dfr_refcounted_future_p)refcounted_futures[0])->future,
*((dfr_refcounted_future_p)refcounted_futures[1])->future,
@@ -496,20 +472,20 @@ void _dfr_create_async_task(wfnptr wfn, size_t num_params, size_t num_outputs,
case 13:
oodf = std::move(hpx::dataflow(
[wfnname, param_sizes, param_types, output_sizes,
output_types](hpx::shared_future<void *> param0,
hpx::shared_future<void *> param1,
hpx::shared_future<void *> param2,
hpx::shared_future<void *> param3,
hpx::shared_future<void *> param4,
hpx::shared_future<void *> param5,
hpx::shared_future<void *> param6,
hpx::shared_future<void *> param7,
hpx::shared_future<void *> param8,
hpx::shared_future<void *> param9,
hpx::shared_future<void *> param10,
hpx::shared_future<void *> param11,
hpx::shared_future<void *> param12)
[wfnname, param_sizes, param_types, output_sizes, output_types,
gcc_target](hpx::shared_future<void *> param0,
hpx::shared_future<void *> param1,
hpx::shared_future<void *> param2,
hpx::shared_future<void *> param3,
hpx::shared_future<void *> param4,
hpx::shared_future<void *> param5,
hpx::shared_future<void *> param6,
hpx::shared_future<void *> param7,
hpx::shared_future<void *> param8,
hpx::shared_future<void *> param9,
hpx::shared_future<void *> param10,
hpx::shared_future<void *> param11,
hpx::shared_future<void *> param12)
-> hpx::future<mlir::concretelang::dfr::OpaqueOutputData> {
std::vector<void *> params = {
param0.get(), param1.get(), param2.get(), param3.get(),
@@ -519,9 +495,7 @@ void _dfr_create_async_task(wfnptr wfn, size_t num_params, size_t num_outputs,
mlir::concretelang::dfr::OpaqueInputData oid(
wfnname, params, param_sizes, param_types, output_sizes,
output_types);
return mlir::concretelang::dfr::gcc
[_dfr_find_next_execution_locality()]
.execute_task(oid);
return gcc_target->execute_task(oid);
},
*((dfr_refcounted_future_p)refcounted_futures[0])->future,
*((dfr_refcounted_future_p)refcounted_futures[1])->future,
@@ -540,21 +514,21 @@ void _dfr_create_async_task(wfnptr wfn, size_t num_params, size_t num_outputs,
case 14:
oodf = std::move(hpx::dataflow(
[wfnname, param_sizes, param_types, output_sizes,
output_types](hpx::shared_future<void *> param0,
hpx::shared_future<void *> param1,
hpx::shared_future<void *> param2,
hpx::shared_future<void *> param3,
hpx::shared_future<void *> param4,
hpx::shared_future<void *> param5,
hpx::shared_future<void *> param6,
hpx::shared_future<void *> param7,
hpx::shared_future<void *> param8,
hpx::shared_future<void *> param9,
hpx::shared_future<void *> param10,
hpx::shared_future<void *> param11,
hpx::shared_future<void *> param12,
hpx::shared_future<void *> param13)
[wfnname, param_sizes, param_types, output_sizes, output_types,
gcc_target](hpx::shared_future<void *> param0,
hpx::shared_future<void *> param1,
hpx::shared_future<void *> param2,
hpx::shared_future<void *> param3,
hpx::shared_future<void *> param4,
hpx::shared_future<void *> param5,
hpx::shared_future<void *> param6,
hpx::shared_future<void *> param7,
hpx::shared_future<void *> param8,
hpx::shared_future<void *> param9,
hpx::shared_future<void *> param10,
hpx::shared_future<void *> param11,
hpx::shared_future<void *> param12,
hpx::shared_future<void *> param13)
-> hpx::future<mlir::concretelang::dfr::OpaqueOutputData> {
std::vector<void *> params = {
param0.get(), param1.get(), param2.get(), param3.get(),
@@ -564,9 +538,7 @@ void _dfr_create_async_task(wfnptr wfn, size_t num_params, size_t num_outputs,
mlir::concretelang::dfr::OpaqueInputData oid(
wfnname, params, param_sizes, param_types, output_sizes,
output_types);
return mlir::concretelang::dfr::gcc
[_dfr_find_next_execution_locality()]
.execute_task(oid);
return gcc_target->execute_task(oid);
},
*((dfr_refcounted_future_p)refcounted_futures[0])->future,
*((dfr_refcounted_future_p)refcounted_futures[1])->future,
@@ -586,22 +558,22 @@ void _dfr_create_async_task(wfnptr wfn, size_t num_params, size_t num_outputs,
case 15:
oodf = std::move(hpx::dataflow(
[wfnname, param_sizes, param_types, output_sizes,
output_types](hpx::shared_future<void *> param0,
hpx::shared_future<void *> param1,
hpx::shared_future<void *> param2,
hpx::shared_future<void *> param3,
hpx::shared_future<void *> param4,
hpx::shared_future<void *> param5,
hpx::shared_future<void *> param6,
hpx::shared_future<void *> param7,
hpx::shared_future<void *> param8,
hpx::shared_future<void *> param9,
hpx::shared_future<void *> param10,
hpx::shared_future<void *> param11,
hpx::shared_future<void *> param12,
hpx::shared_future<void *> param13,
hpx::shared_future<void *> param14)
[wfnname, param_sizes, param_types, output_sizes, output_types,
gcc_target](hpx::shared_future<void *> param0,
hpx::shared_future<void *> param1,
hpx::shared_future<void *> param2,
hpx::shared_future<void *> param3,
hpx::shared_future<void *> param4,
hpx::shared_future<void *> param5,
hpx::shared_future<void *> param6,
hpx::shared_future<void *> param7,
hpx::shared_future<void *> param8,
hpx::shared_future<void *> param9,
hpx::shared_future<void *> param10,
hpx::shared_future<void *> param11,
hpx::shared_future<void *> param12,
hpx::shared_future<void *> param13,
hpx::shared_future<void *> param14)
-> hpx::future<mlir::concretelang::dfr::OpaqueOutputData> {
std::vector<void *> params = {
param0.get(), param1.get(), param2.get(), param3.get(),
@@ -611,9 +583,7 @@ void _dfr_create_async_task(wfnptr wfn, size_t num_params, size_t num_outputs,
mlir::concretelang::dfr::OpaqueInputData oid(
wfnname, params, param_sizes, param_types, output_sizes,
output_types);
return mlir::concretelang::dfr::gcc
[_dfr_find_next_execution_locality()]
.execute_task(oid);
return gcc_target->execute_task(oid);
},
*((dfr_refcounted_future_p)refcounted_futures[0])->future,
*((dfr_refcounted_future_p)refcounted_futures[1])->future,
@@ -634,23 +604,23 @@ void _dfr_create_async_task(wfnptr wfn, size_t num_params, size_t num_outputs,
case 16:
oodf = std::move(hpx::dataflow(
[wfnname, param_sizes, param_types, output_sizes,
output_types](hpx::shared_future<void *> param0,
hpx::shared_future<void *> param1,
hpx::shared_future<void *> param2,
hpx::shared_future<void *> param3,
hpx::shared_future<void *> param4,
hpx::shared_future<void *> param5,
hpx::shared_future<void *> param6,
hpx::shared_future<void *> param7,
hpx::shared_future<void *> param8,
hpx::shared_future<void *> param9,
hpx::shared_future<void *> param10,
hpx::shared_future<void *> param11,
hpx::shared_future<void *> param12,
hpx::shared_future<void *> param13,
hpx::shared_future<void *> param14,
hpx::shared_future<void *> param15)
[wfnname, param_sizes, param_types, output_sizes, output_types,
gcc_target](hpx::shared_future<void *> param0,
hpx::shared_future<void *> param1,
hpx::shared_future<void *> param2,
hpx::shared_future<void *> param3,
hpx::shared_future<void *> param4,
hpx::shared_future<void *> param5,
hpx::shared_future<void *> param6,
hpx::shared_future<void *> param7,
hpx::shared_future<void *> param8,
hpx::shared_future<void *> param9,
hpx::shared_future<void *> param10,
hpx::shared_future<void *> param11,
hpx::shared_future<void *> param12,
hpx::shared_future<void *> param13,
hpx::shared_future<void *> param14,
hpx::shared_future<void *> param15)
-> hpx::future<mlir::concretelang::dfr::OpaqueOutputData> {
std::vector<void *> params = {
param0.get(), param1.get(), param2.get(), param3.get(),
@@ -660,9 +630,7 @@ void _dfr_create_async_task(wfnptr wfn, size_t num_params, size_t num_outputs,
mlir::concretelang::dfr::OpaqueInputData oid(
wfnname, params, param_sizes, param_types, output_sizes,
output_types);
return mlir::concretelang::dfr::gcc
[_dfr_find_next_execution_locality()]
.execute_task(oid);
return gcc_target->execute_task(oid);
},
*((dfr_refcounted_future_p)refcounted_futures[0])->future,
*((dfr_refcounted_future_p)refcounted_futures[1])->future,
@@ -684,24 +652,24 @@ void _dfr_create_async_task(wfnptr wfn, size_t num_params, size_t num_outputs,
case 17:
oodf = std::move(hpx::dataflow(
[wfnname, param_sizes, param_types, output_sizes,
output_types](hpx::shared_future<void *> param0,
hpx::shared_future<void *> param1,
hpx::shared_future<void *> param2,
hpx::shared_future<void *> param3,
hpx::shared_future<void *> param4,
hpx::shared_future<void *> param5,
hpx::shared_future<void *> param6,
hpx::shared_future<void *> param7,
hpx::shared_future<void *> param8,
hpx::shared_future<void *> param9,
hpx::shared_future<void *> param10,
hpx::shared_future<void *> param11,
hpx::shared_future<void *> param12,
hpx::shared_future<void *> param13,
hpx::shared_future<void *> param14,
hpx::shared_future<void *> param15,
hpx::shared_future<void *> param16)
[wfnname, param_sizes, param_types, output_sizes, output_types,
gcc_target](hpx::shared_future<void *> param0,
hpx::shared_future<void *> param1,
hpx::shared_future<void *> param2,
hpx::shared_future<void *> param3,
hpx::shared_future<void *> param4,
hpx::shared_future<void *> param5,
hpx::shared_future<void *> param6,
hpx::shared_future<void *> param7,
hpx::shared_future<void *> param8,
hpx::shared_future<void *> param9,
hpx::shared_future<void *> param10,
hpx::shared_future<void *> param11,
hpx::shared_future<void *> param12,
hpx::shared_future<void *> param13,
hpx::shared_future<void *> param14,
hpx::shared_future<void *> param15,
hpx::shared_future<void *> param16)
-> hpx::future<mlir::concretelang::dfr::OpaqueOutputData> {
std::vector<void *> params = {
param0.get(), param1.get(), param2.get(), param3.get(),
@@ -712,9 +680,7 @@ void _dfr_create_async_task(wfnptr wfn, size_t num_params, size_t num_outputs,
mlir::concretelang::dfr::OpaqueInputData oid(
wfnname, params, param_sizes, param_types, output_sizes,
output_types);
return mlir::concretelang::dfr::gcc
[_dfr_find_next_execution_locality()]
.execute_task(oid);
return gcc_target->execute_task(oid);
},
*((dfr_refcounted_future_p)refcounted_futures[0])->future,
*((dfr_refcounted_future_p)refcounted_futures[1])->future,
@@ -737,25 +703,25 @@ void _dfr_create_async_task(wfnptr wfn, size_t num_params, size_t num_outputs,
case 18:
oodf = std::move(hpx::dataflow(
[wfnname, param_sizes, param_types, output_sizes,
output_types](hpx::shared_future<void *> param0,
hpx::shared_future<void *> param1,
hpx::shared_future<void *> param2,
hpx::shared_future<void *> param3,
hpx::shared_future<void *> param4,
hpx::shared_future<void *> param5,
hpx::shared_future<void *> param6,
hpx::shared_future<void *> param7,
hpx::shared_future<void *> param8,
hpx::shared_future<void *> param9,
hpx::shared_future<void *> param10,
hpx::shared_future<void *> param11,
hpx::shared_future<void *> param12,
hpx::shared_future<void *> param13,
hpx::shared_future<void *> param14,
hpx::shared_future<void *> param15,
hpx::shared_future<void *> param16,
hpx::shared_future<void *> param17)
[wfnname, param_sizes, param_types, output_sizes, output_types,
gcc_target](hpx::shared_future<void *> param0,
hpx::shared_future<void *> param1,
hpx::shared_future<void *> param2,
hpx::shared_future<void *> param3,
hpx::shared_future<void *> param4,
hpx::shared_future<void *> param5,
hpx::shared_future<void *> param6,
hpx::shared_future<void *> param7,
hpx::shared_future<void *> param8,
hpx::shared_future<void *> param9,
hpx::shared_future<void *> param10,
hpx::shared_future<void *> param11,
hpx::shared_future<void *> param12,
hpx::shared_future<void *> param13,
hpx::shared_future<void *> param14,
hpx::shared_future<void *> param15,
hpx::shared_future<void *> param16,
hpx::shared_future<void *> param17)
-> hpx::future<mlir::concretelang::dfr::OpaqueOutputData> {
std::vector<void *> params = {
param0.get(), param1.get(), param2.get(), param3.get(),
@@ -766,9 +732,7 @@ void _dfr_create_async_task(wfnptr wfn, size_t num_params, size_t num_outputs,
mlir::concretelang::dfr::OpaqueInputData oid(
wfnname, params, param_sizes, param_types, output_sizes,
output_types);
return mlir::concretelang::dfr::gcc
[_dfr_find_next_execution_locality()]
.execute_task(oid);
return gcc_target->execute_task(oid);
},
*((dfr_refcounted_future_p)refcounted_futures[0])->future,
*((dfr_refcounted_future_p)refcounted_futures[1])->future,
@@ -792,26 +756,26 @@ void _dfr_create_async_task(wfnptr wfn, size_t num_params, size_t num_outputs,
case 19:
oodf = std::move(hpx::dataflow(
[wfnname, param_sizes, param_types, output_sizes,
output_types](hpx::shared_future<void *> param0,
hpx::shared_future<void *> param1,
hpx::shared_future<void *> param2,
hpx::shared_future<void *> param3,
hpx::shared_future<void *> param4,
hpx::shared_future<void *> param5,
hpx::shared_future<void *> param6,
hpx::shared_future<void *> param7,
hpx::shared_future<void *> param8,
hpx::shared_future<void *> param9,
hpx::shared_future<void *> param10,
hpx::shared_future<void *> param11,
hpx::shared_future<void *> param12,
hpx::shared_future<void *> param13,
hpx::shared_future<void *> param14,
hpx::shared_future<void *> param15,
hpx::shared_future<void *> param16,
hpx::shared_future<void *> param17,
hpx::shared_future<void *> param18)
[wfnname, param_sizes, param_types, output_sizes, output_types,
gcc_target](hpx::shared_future<void *> param0,
hpx::shared_future<void *> param1,
hpx::shared_future<void *> param2,
hpx::shared_future<void *> param3,
hpx::shared_future<void *> param4,
hpx::shared_future<void *> param5,
hpx::shared_future<void *> param6,
hpx::shared_future<void *> param7,
hpx::shared_future<void *> param8,
hpx::shared_future<void *> param9,
hpx::shared_future<void *> param10,
hpx::shared_future<void *> param11,
hpx::shared_future<void *> param12,
hpx::shared_future<void *> param13,
hpx::shared_future<void *> param14,
hpx::shared_future<void *> param15,
hpx::shared_future<void *> param16,
hpx::shared_future<void *> param17,
hpx::shared_future<void *> param18)
-> hpx::future<mlir::concretelang::dfr::OpaqueOutputData> {
std::vector<void *> params = {
param0.get(), param1.get(), param2.get(), param3.get(),
@@ -822,9 +786,7 @@ void _dfr_create_async_task(wfnptr wfn, size_t num_params, size_t num_outputs,
mlir::concretelang::dfr::OpaqueInputData oid(
wfnname, params, param_sizes, param_types, output_sizes,
output_types);
return mlir::concretelang::dfr::gcc
[_dfr_find_next_execution_locality()]
.execute_task(oid);
return gcc_target->execute_task(oid);
},
*((dfr_refcounted_future_p)refcounted_futures[0])->future,
*((dfr_refcounted_future_p)refcounted_futures[1])->future,
@@ -849,27 +811,27 @@ void _dfr_create_async_task(wfnptr wfn, size_t num_params, size_t num_outputs,
case 20:
oodf = std::move(hpx::dataflow(
[wfnname, param_sizes, param_types, output_sizes,
output_types](hpx::shared_future<void *> param0,
hpx::shared_future<void *> param1,
hpx::shared_future<void *> param2,
hpx::shared_future<void *> param3,
hpx::shared_future<void *> param4,
hpx::shared_future<void *> param5,
hpx::shared_future<void *> param6,
hpx::shared_future<void *> param7,
hpx::shared_future<void *> param8,
hpx::shared_future<void *> param9,
hpx::shared_future<void *> param10,
hpx::shared_future<void *> param11,
hpx::shared_future<void *> param12,
hpx::shared_future<void *> param13,
hpx::shared_future<void *> param14,
hpx::shared_future<void *> param15,
hpx::shared_future<void *> param16,
hpx::shared_future<void *> param17,
hpx::shared_future<void *> param18,
hpx::shared_future<void *> param19)
[wfnname, param_sizes, param_types, output_sizes, output_types,
gcc_target](hpx::shared_future<void *> param0,
hpx::shared_future<void *> param1,
hpx::shared_future<void *> param2,
hpx::shared_future<void *> param3,
hpx::shared_future<void *> param4,
hpx::shared_future<void *> param5,
hpx::shared_future<void *> param6,
hpx::shared_future<void *> param7,
hpx::shared_future<void *> param8,
hpx::shared_future<void *> param9,
hpx::shared_future<void *> param10,
hpx::shared_future<void *> param11,
hpx::shared_future<void *> param12,
hpx::shared_future<void *> param13,
hpx::shared_future<void *> param14,
hpx::shared_future<void *> param15,
hpx::shared_future<void *> param16,
hpx::shared_future<void *> param17,
hpx::shared_future<void *> param18,
hpx::shared_future<void *> param19)
-> hpx::future<mlir::concretelang::dfr::OpaqueOutputData> {
std::vector<void *> params = {
param0.get(), param1.get(), param2.get(), param3.get(),
@@ -880,9 +842,7 @@ void _dfr_create_async_task(wfnptr wfn, size_t num_params, size_t num_outputs,
mlir::concretelang::dfr::OpaqueInputData oid(
wfnname, params, param_sizes, param_types, output_sizes,
output_types);
return mlir::concretelang::dfr::gcc
[_dfr_find_next_execution_locality()]
.execute_task(oid);
return gcc_target->execute_task(oid);
},
*((dfr_refcounted_future_p)refcounted_futures[0])->future,
*((dfr_refcounted_future_p)refcounted_futures[1])->future,
@@ -1146,17 +1106,13 @@ static inline void _dfr_start_impl(int argc, char *argv[]) {
(hpx::find_here() == hpx::find_root_locality());
mlir::concretelang::dfr::num_nodes = hpx::get_num_localities().get();
new mlir::concretelang::dfr::KeyManager<LweBootstrapKey_u64>();
new mlir::concretelang::dfr::KeyManager<LweKeyswitchKey_u64>();
new mlir::concretelang::dfr::RuntimeContextManager();
new mlir::concretelang::dfr::WorkFunctionRegistry();
mlir::concretelang::dfr::_dfr_jit_workfunction_registration_barrier =
new hpx::lcos::barrier("wait_register_remote_work_functions",
mlir::concretelang::dfr::num_nodes,
hpx::get_locality_id());
mlir::concretelang::dfr::_dfr_jit_phase_barrier = new hpx::lcos::barrier(
"phase_barrier", mlir::concretelang::dfr::num_nodes,
hpx::get_locality_id());
mlir::concretelang::dfr::_dfr_startup_barrier = new hpx::lcos::barrier(
"startup_barrier", mlir::concretelang::dfr::num_nodes,
hpx::get_locality_id());
if (mlir::concretelang::dfr::_dfr_is_root_node()) {
// Create compute server components on each node - from the root
@@ -1197,14 +1153,21 @@ void _dfr_start() {
if (!mlir::concretelang::dfr::_dfr_is_root_node() &&
!mlir::concretelang::dfr::_dfr_is_jit())
_dfr_stop_impl();
}
// TODO: conditional -- If this is the root node, and this is JIT
// execution, we need to wait for the compute nodes to compile and
// register work functions
// Startup entry point when a RuntimeContext is used
void _dfr_start_c(void *ctx) {
_dfr_start();
new mlir::concretelang::dfr::RuntimeContextManager();
mlir::concretelang::dfr::_dfr_node_level_runtime_context_manager->setContext(
ctx);
// If this is not JIT, then the remote nodes never reach _dfr_stop,
// so root should not instantiate this barrier.
if (mlir::concretelang::dfr::_dfr_is_root_node() &&
mlir::concretelang::dfr::_dfr_is_jit()) {
mlir::concretelang::dfr::_dfr_jit_workfunction_registration_barrier->wait();
}
mlir::concretelang::dfr::_dfr_is_jit())
mlir::concretelang::dfr::_dfr_startup_barrier->wait();
}
// This function cannot be used to terminate the runtime as it is
@@ -1213,13 +1176,9 @@ void _dfr_start() {
// called on exit from "main" when not using the main wrapper library.
void _dfr_stop() {
// Non-root nodes synchronize here with the root to mark the point
// where the root is free to send work out.
// TODO: optimize this by moving synchro to local remote nodes
// waiting in the scheduler for registration.
if (!mlir::concretelang::dfr::
_dfr_is_root_node() /*&& _dfr_is_jit() /** implicitly true*/) {
mlir::concretelang::dfr::_dfr_jit_workfunction_registration_barrier->wait();
}
// where the root is free to send work out (only needed in JIT).
if (!mlir::concretelang::dfr::_dfr_is_root_node())
mlir::concretelang::dfr::_dfr_startup_barrier->wait();
// The barrier is only needed to synchronize the different
// computation phases when the compute nodes need to generate and
@@ -1234,10 +1193,6 @@ void _dfr_stop() {
mlir::concretelang::dfr::_dfr_jit_phase_barrier->wait();
}
// TODO: until we have better unique identifiers for keys it is
// safer to drop them in-between phases.
mlir::concretelang::dfr::_dfr_node_level_bsk_manager->clear_keys();
mlir::concretelang::dfr::_dfr_node_level_ksk_manager->clear_keys();
mlir::concretelang::dfr::_dfr_node_level_runtime_context_manager
->clearContext();
}