feat(dfr): add timing measurements.

This commit is contained in:
Antoniu Pop
2022-07-29 08:57:51 +01:00
committed by Antoniu Pop
parent dd2b2b9ce9
commit ba60d74da0
6 changed files with 222 additions and 84 deletions

View File

@@ -76,6 +76,7 @@ endif()
#-------------------------------------------------------------------------------
option(CONCRETELANG_PARALLEL_EXECUTION_ENABLED "Enables parallel execution for ConcreteLang." ON)
option(CONCRETELANG_TIMING_ENABLED "Enables execution timing." ON)
if(CONCRETELANG_PARALLEL_EXECUTION_ENABLED)
message(STATUS "ConcreteLang parallel execution enabled.")
@@ -92,6 +93,14 @@ else()
message(STATUS "ConcreteLang parallel execution disabled.")
endif()
if(CONCRETELANG_TIMING_ENABLED)
add_compile_options(
-DCONCRETELANG_TIMING_ENABLED
)
else()
message(STATUS "ConcreteLang execution timing disabled.")
endif()
#-------------------------------------------------------------------------------
# Unit tests
#-------------------------------------------------------------------------------

View File

@@ -2,6 +2,7 @@ BUILD_DIR=./build
Python3_EXECUTABLE?=
BINDINGS_PYTHON_ENABLED=ON
PARALLEL_EXECUTION_ENABLED=OFF
TIMING_ENABLED=OFF
CC_COMPILER=
CXX_COMPILER=
@@ -58,6 +59,7 @@ $(BUILD_DIR)/configured.stamp:
-DMLIR_ENABLE_BINDINGS_PYTHON=$(BINDINGS_PYTHON_ENABLED) \
-DCONCRETELANG_BINDINGS_PYTHON_ENABLED=$(BINDINGS_PYTHON_ENABLED) \
-DCONCRETELANG_PARALLEL_EXECUTION_ENABLED=$(PARALLEL_EXECUTION_ENABLED) \
-DCONCRETELANG_TIMING_ENABLED=$(TIMING_ENABLED) \
-DCONCRETE_FFI_RELEASE=${CONCRETE_PROJECT}/target/release \
-DHPX_DIR=${HPX_INSTALL_DIR}/lib/cmake/HPX \
-DLLVM_EXTERNAL_PROJECTS=concretelang \

View File

@@ -27,8 +27,8 @@ void _dfr_deallocate_future_data(void *);
/* Initialisation & termination. */
void _dfr_start_c(void *);
void _dfr_start();
void _dfr_stop();
void _dfr_start(int);
void _dfr_stop(int);
void _dfr_terminate();
}

View File

@@ -0,0 +1,95 @@
// Part of the Concrete Compiler Project, under the BSD3 License with Zama
// Exceptions. See
// https://github.com/zama-ai/concrete-compiler-internal/blob/main/LICENSE.txt
// for license information.
#ifndef CONCRETELANG_DFR_TIME_UTIL_H
#define CONCRETELANG_DFR_TIME_UTIL_H
#if CONCRETELANG_TIMING_ENABLED
#include <assert.h>
#include <iostream>
#include <time.h>
#include "concretelang/Runtime/DFRuntime.hpp"
#define TIME_UTIL_CLOCK CLOCK_MONOTONIC
static inline int timespec_diff(struct timespec *, const struct timespec *,
const struct timespec *);
#define BEGIN_TIME(p) \
do { \
assert(clock_gettime(TIME_UTIL_CLOCK, (p)) == 0); \
} while (0)
#if CONCRETELANG_PARALLEL_EXECUTION_ENABLED
#define END_TIME(p, m) \
do { \
struct timespec _end_time_tv; \
assert(clock_gettime(TIME_UTIL_CLOCK, &_end_time_tv) == 0); \
assert(timespec_diff((p), &_end_time_tv, (p)) == 0); \
std::cout << "[NODE \t" << _dfr_debug_get_node_id() << "] \t" << (m) \
<< " time : \t" << (p)->tv_sec << "." << (p)->tv_nsec \
<< " seconds.\n" \
<< std::flush; \
} while (0)
#else
#define END_TIME(p, m) \
do { \
struct timespec _end_time_tv; \
assert(clock_gettime(TIME_UTIL_CLOCK, &_end_time_tv) == 0); \
assert(timespec_diff((p), &_end_time_tv, (p)) == 0); \
std::cout << (m) << " time : \t" << (p)->tv_sec << "." << (p)->tv_nsec \
<< " seconds.\n" \
<< std::flush; \
} while (0)
#endif
static inline double get_thread_cpu_time(void) {
struct timespec _tv;
double _t;
assert(clock_gettime(CLOCK_THREAD_CPUTIME_ID, &_tv) == 0);
_t = _tv.tv_sec;
_t += _tv.tv_nsec * 1e-9;
return _t;
}
static inline int timespec_diff(struct timespec *_result,
const struct timespec *_px,
const struct timespec *_py) {
struct timespec _x, _y;
_x = *_px;
_y = *_py;
/* Perform the carry for the later subtraction by updating y. */
if (_x.tv_nsec < _y.tv_nsec) {
long _ns = (_y.tv_nsec - _x.tv_nsec) / 1000000000L + 1;
_y.tv_nsec -= 1000000000L * _ns;
_y.tv_sec += _ns;
}
if (_x.tv_nsec - _y.tv_nsec > 1000000000L) {
long _ns = (_x.tv_nsec - _y.tv_nsec) / 1000000000L;
_y.tv_nsec += 1000000000L * _ns;
_y.tv_sec -= _ns;
}
/* Compute the time remaining to wait. tv_nsec is certainly
positive. */
_result->tv_sec = _x.tv_sec - _y.tv_sec;
_result->tv_nsec = _x.tv_nsec - _y.tv_nsec;
/* Return 1 if result is negative. */
return _x.tv_sec < _y.tv_sec;
}
#else // CONCRETELANG_TIMING_ENABLED
#define BEGIN_TIME(p)
#define END_TIME(p, m)
#endif // CONCRETELANG_TIMING_ENABLED
#endif

View File

@@ -464,41 +464,40 @@ struct LowerDataflowTasksPass
registerWorkFunction(entryPoint, wf);
// Issue _dfr_start/stop calls for this function
if (!workFunctions.empty()) {
OpBuilder builder(entryPoint.getBody());
builder.setInsertionPointToStart(&entryPoint.getBody().front());
OpBuilder builder(entryPoint.getBody());
builder.setInsertionPointToStart(&entryPoint.getBody().front());
int useDFR = (workFunctions.empty()) ? 0 : 1;
Value useDFRVal = builder.create<arith::ConstantOp>(
entryPoint.getLoc(), builder.getI64IntegerAttr(useDFR));
if (ctxIndex >= 0) {
auto startFunTy =
(dfr::_dfr_is_root_node())
? mlir::FunctionType::get(
entryPoint->getContext(),
{entryPoint.getArgument(ctxIndex).getType()}, {})
: mlir::FunctionType::get(entryPoint->getContext(), {}, {});
(void)insertForwardDeclaration(entryPoint, builder, "_dfr_start_c",
startFunTy);
builder.create<mlir::func::CallOp>(
entryPoint.getLoc(), "_dfr_start_c", mlir::TypeRange(),
(dfr::_dfr_is_root_node()) ? entryPoint.getArgument(ctxIndex)
: mlir::ValueRange());
} else {
auto startFunTy =
mlir::FunctionType::get(entryPoint->getContext(), {}, {});
(void)insertForwardDeclaration(entryPoint, builder, "_dfr_start",
startFunTy);
builder.create<mlir::func::CallOp>(entryPoint.getLoc(), "_dfr_start",
mlir::TypeRange(),
mlir::ValueRange());
}
builder.setInsertionPoint(entryPoint.getBody().back().getTerminator());
auto stopFunTy =
mlir::FunctionType::get(entryPoint->getContext(), {}, {});
(void)insertForwardDeclaration(entryPoint, builder, "_dfr_stop",
stopFunTy);
builder.create<mlir::func::CallOp>(entryPoint.getLoc(), "_dfr_stop",
mlir::TypeRange(),
mlir::ValueRange());
if (ctxIndex >= 0) {
auto startFunTy =
(dfr::_dfr_is_root_node())
? mlir::FunctionType::get(
entryPoint->getContext(),
{entryPoint.getArgument(ctxIndex).getType()}, {})
: mlir::FunctionType::get(entryPoint->getContext(), {}, {});
(void)insertForwardDeclaration(entryPoint, builder, "_dfr_start_c",
startFunTy);
builder.create<mlir::func::CallOp>(
entryPoint.getLoc(), "_dfr_start_c", mlir::TypeRange(),
(dfr::_dfr_is_root_node()) ? entryPoint.getArgument(ctxIndex)
: mlir::ValueRange());
} else {
auto startFunTy = mlir::FunctionType::get(entryPoint->getContext(),
{useDFRVal.getType()}, {});
(void)insertForwardDeclaration(entryPoint, builder, "_dfr_start",
startFunTy);
builder.create<mlir::func::CallOp>(entryPoint.getLoc(), "_dfr_start",
mlir::TypeRange(), useDFRVal);
}
builder.setInsertionPoint(entryPoint.getBody().back().getTerminator());
auto stopFunTy = mlir::FunctionType::get(entryPoint->getContext(),
{useDFRVal.getType()}, {});
(void)insertForwardDeclaration(entryPoint, builder, "_dfr_stop",
stopFunTy);
builder.create<mlir::func::CallOp>(entryPoint.getLoc(), "_dfr_stop",
mlir::TypeRange(), useDFRVal);
}
}
LowerDataflowTasksPass(bool debug) : debug(debug){};

View File

@@ -22,6 +22,7 @@
#include "concretelang/Runtime/DFRuntime.hpp"
#include "concretelang/Runtime/distributed_generic_task_server.hpp"
#include "concretelang/Runtime/runtime_api.h"
#include "concretelang/Runtime/time_util.h"
namespace mlir {
namespace concretelang {
@@ -31,6 +32,7 @@ static std::vector<GenericComputeClient> gcc;
static hpx::lcos::barrier *_dfr_jit_phase_barrier;
static hpx::lcos::barrier *_dfr_startup_barrier;
static size_t num_nodes = 0;
static struct timespec init_timer, broadcast_timer, compute_timer, whole_timer;
} // namespace
} // namespace dfr
} // namespace concretelang
@@ -1129,72 +1131,92 @@ static inline void _dfr_start_impl(int argc, char *argv[]) {
/* Start/stop functions to be called from within user code (or during
JIT invocation). These serve to pause/resume the runtime
scheduler and to clean up used resources. */
void _dfr_start() {
// The first invocation will initialise the runtime. As each call to
// _dfr_start is matched with _dfr_stop, if this is not hte first,
// we need to resume the HPX runtime.
assert(
mlir::concretelang::dfr::init_guard !=
mlir::concretelang::dfr::terminated &&
"DFR runtime: attempting to start runtime after it has been terminated");
uint64_t expected = mlir::concretelang::dfr::uninitialised;
if (mlir::concretelang::dfr::init_guard.compare_exchange_strong(
expected, mlir::concretelang::dfr::active))
_dfr_start_impl(0, nullptr);
void _dfr_start(int use_dfr_p) {
BEGIN_TIME(&mlir::concretelang::dfr::whole_timer);
if (use_dfr_p) {
BEGIN_TIME(&mlir::concretelang::dfr::init_timer);
// The first invocation will initialise the runtime. As each call to
// _dfr_start is matched with _dfr_stop, if this is not hte first,
// we need to resume the HPX runtime.
assert(mlir::concretelang::dfr::init_guard !=
mlir::concretelang::dfr::terminated &&
"DFR runtime: attempting to start runtime after it has been "
"terminated");
uint64_t expected = mlir::concretelang::dfr::uninitialised;
if (mlir::concretelang::dfr::init_guard.compare_exchange_strong(
expected, mlir::concretelang::dfr::active))
_dfr_start_impl(0, nullptr);
END_TIME(&mlir::concretelang::dfr::init_timer, "Initialization");
assert(mlir::concretelang::dfr::init_guard ==
mlir::concretelang::dfr::active &&
"DFR runtime failed to initialise");
assert(mlir::concretelang::dfr::init_guard ==
mlir::concretelang::dfr::active &&
"DFR runtime failed to initialise");
// If this is not the root node in a non-JIT execution, then this
// node should only run the scheduler for any incoming work until
// termination is flagged. If this is JIT, we need to run the
// cancelled function which registers the work functions.
if (!mlir::concretelang::dfr::_dfr_is_root_node() &&
!mlir::concretelang::dfr::_dfr_is_jit())
_dfr_stop_impl();
if (use_dfr_p == 1) {
BEGIN_TIME(&mlir::concretelang::dfr::compute_timer);
}
// If this is not the root node in a non-JIT execution, then this
// node should only run the scheduler for any incoming work until
// termination is flagged. If this is JIT, we need to run the
// cancelled function which registers the work functions.
if (!mlir::concretelang::dfr::_dfr_is_root_node() &&
!mlir::concretelang::dfr::_dfr_is_jit())
_dfr_stop_impl();
}
}
// Startup entry point when a RuntimeContext is used
void _dfr_start_c(void *ctx) {
_dfr_start();
_dfr_start(2);
new mlir::concretelang::dfr::RuntimeContextManager();
mlir::concretelang::dfr::_dfr_node_level_runtime_context_manager->setContext(
ctx);
if (mlir::concretelang::dfr::num_nodes > 1) {
BEGIN_TIME(&mlir::concretelang::dfr::broadcast_timer);
new mlir::concretelang::dfr::RuntimeContextManager();
mlir::concretelang::dfr::_dfr_node_level_runtime_context_manager
->setContext(ctx);
// If this is not JIT, then the remote nodes never reach _dfr_stop,
// so root should not instantiate this barrier.
if (mlir::concretelang::dfr::_dfr_is_root_node() &&
mlir::concretelang::dfr::_dfr_is_jit())
mlir::concretelang::dfr::_dfr_startup_barrier->wait();
// If this is not JIT, then the remote nodes never reach _dfr_stop,
// so root should not instantiate this barrier.
if (mlir::concretelang::dfr::_dfr_is_root_node() &&
mlir::concretelang::dfr::_dfr_is_jit())
mlir::concretelang::dfr::_dfr_startup_barrier->wait();
END_TIME(&mlir::concretelang::dfr::broadcast_timer, "Key broadcasting");
}
BEGIN_TIME(&mlir::concretelang::dfr::compute_timer);
}
// This function cannot be used to terminate the runtime as it is
// non-decidable if another computation phase will follow. Instead the
// _dfr_terminate function provides this facility and is normally
// called on exit from "main" when not using the main wrapper library.
void _dfr_stop() {
// Non-root nodes synchronize here with the root to mark the point
// where the root is free to send work out (only needed in JIT).
if (!mlir::concretelang::dfr::_dfr_is_root_node())
mlir::concretelang::dfr::_dfr_startup_barrier->wait();
void _dfr_stop(int use_dfr_p) {
if (use_dfr_p) {
if (mlir::concretelang::dfr::num_nodes > 1) {
// Non-root nodes synchronize here with the root to mark the point
// where the root is free to send work out (only needed in JIT).
if (!mlir::concretelang::dfr::_dfr_is_root_node())
mlir::concretelang::dfr::_dfr_startup_barrier->wait();
// The barrier is only needed to synchronize the different
// computation phases when the compute nodes need to generate and
// register new work functions in each phase.
// The barrier is only needed to synchronize the different
// computation phases when the compute nodes need to generate and
// register new work functions in each phase.
// TODO: this barrier may be removed based on how work function
// registration is handled - but it is unlikely to result in much
// gain as the root node would be waiting for the end of computation
// on all remote nodes before reaching here anyway (dataflow
// dependences).
if (mlir::concretelang::dfr::_dfr_is_jit()) {
mlir::concretelang::dfr::_dfr_jit_phase_barrier->wait();
// TODO: this barrier may be removed based on how work function
// registration is handled - but it is unlikely to result in much
// gain as the root node would be waiting for the end of computation
// on all remote nodes before reaching here anyway (dataflow
// dependences).
if (mlir::concretelang::dfr::_dfr_is_jit()) {
mlir::concretelang::dfr::_dfr_jit_phase_barrier->wait();
}
mlir::concretelang::dfr::_dfr_node_level_runtime_context_manager
->clearContext();
}
END_TIME(&mlir::concretelang::dfr::compute_timer, "Compute");
}
mlir::concretelang::dfr::_dfr_node_level_runtime_context_manager
->clearContext();
END_TIME(&mlir::concretelang::dfr::whole_timer, "Total execution");
}
void _dfr_try_initialize() {
@@ -1266,6 +1288,7 @@ void _dfr_print_debug(size_t val) {
#else // CONCRETELANG_PARALLEL_EXECUTION_ENABLED
#include "concretelang/Runtime/DFRuntime.hpp"
#include "concretelang/Runtime/time_util.h"
namespace mlir {
namespace concretelang {
@@ -1273,6 +1296,7 @@ namespace dfr {
namespace {
static bool is_jit_p = false;
static bool use_omp_p = false;
static struct timespec compute_timer;
} // namespace
void _dfr_set_required(bool is_required) {}
@@ -1281,9 +1305,18 @@ void _dfr_set_use_omp(bool use_omp) { use_omp_p = use_omp; }
bool _dfr_is_jit() { return is_jit_p; }
bool _dfr_is_root_node() { return true; }
bool _dfr_use_omp() { return use_omp_p; }
} // namespace dfr
} // namespace concretelang
} // namespace mlir
void _dfr_start(int use_dfr_p) {
BEGIN_TIME(&mlir::concretelang::dfr::compute_timer);
}
void _dfr_start_c(void *ctx) { _dfr_start(2); }
void _dfr_stop(int use_dfr_p) {
END_TIME(&mlir::concretelang::dfr::compute_timer, "Compute");
}
void _dfr_terminate() {}
#endif