mirror of
https://github.com/zama-ai/concrete.git
synced 2026-02-09 03:55:04 -05:00
feat(dfr): add timing measurements.
This commit is contained in:
@@ -76,6 +76,7 @@ endif()
|
||||
#-------------------------------------------------------------------------------
|
||||
|
||||
option(CONCRETELANG_PARALLEL_EXECUTION_ENABLED "Enables parallel execution for ConcreteLang." ON)
|
||||
option(CONCRETELANG_TIMING_ENABLED "Enables execution timing." ON)
|
||||
|
||||
if(CONCRETELANG_PARALLEL_EXECUTION_ENABLED)
|
||||
message(STATUS "ConcreteLang parallel execution enabled.")
|
||||
@@ -92,6 +93,14 @@ else()
|
||||
message(STATUS "ConcreteLang parallel execution disabled.")
|
||||
endif()
|
||||
|
||||
if(CONCRETELANG_TIMING_ENABLED)
|
||||
add_compile_options(
|
||||
-DCONCRETELANG_TIMING_ENABLED
|
||||
)
|
||||
else()
|
||||
message(STATUS "ConcreteLang execution timing disabled.")
|
||||
endif()
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
# Unit tests
|
||||
#-------------------------------------------------------------------------------
|
||||
|
||||
@@ -2,6 +2,7 @@ BUILD_DIR=./build
|
||||
Python3_EXECUTABLE?=
|
||||
BINDINGS_PYTHON_ENABLED=ON
|
||||
PARALLEL_EXECUTION_ENABLED=OFF
|
||||
TIMING_ENABLED=OFF
|
||||
CC_COMPILER=
|
||||
CXX_COMPILER=
|
||||
|
||||
@@ -58,6 +59,7 @@ $(BUILD_DIR)/configured.stamp:
|
||||
-DMLIR_ENABLE_BINDINGS_PYTHON=$(BINDINGS_PYTHON_ENABLED) \
|
||||
-DCONCRETELANG_BINDINGS_PYTHON_ENABLED=$(BINDINGS_PYTHON_ENABLED) \
|
||||
-DCONCRETELANG_PARALLEL_EXECUTION_ENABLED=$(PARALLEL_EXECUTION_ENABLED) \
|
||||
-DCONCRETELANG_TIMING_ENABLED=$(TIMING_ENABLED) \
|
||||
-DCONCRETE_FFI_RELEASE=${CONCRETE_PROJECT}/target/release \
|
||||
-DHPX_DIR=${HPX_INSTALL_DIR}/lib/cmake/HPX \
|
||||
-DLLVM_EXTERNAL_PROJECTS=concretelang \
|
||||
|
||||
@@ -27,8 +27,8 @@ void _dfr_deallocate_future_data(void *);
|
||||
|
||||
/* Initialisation & termination. */
|
||||
void _dfr_start_c(void *);
|
||||
void _dfr_start();
|
||||
void _dfr_stop();
|
||||
void _dfr_start(int);
|
||||
void _dfr_stop(int);
|
||||
|
||||
void _dfr_terminate();
|
||||
}
|
||||
|
||||
95
compiler/include/concretelang/Runtime/time_util.h
Normal file
95
compiler/include/concretelang/Runtime/time_util.h
Normal file
@@ -0,0 +1,95 @@
|
||||
// Part of the Concrete Compiler Project, under the BSD3 License with Zama
|
||||
// Exceptions. See
|
||||
// https://github.com/zama-ai/concrete-compiler-internal/blob/main/LICENSE.txt
|
||||
// for license information.
|
||||
|
||||
#ifndef CONCRETELANG_DFR_TIME_UTIL_H
|
||||
#define CONCRETELANG_DFR_TIME_UTIL_H
|
||||
|
||||
#if CONCRETELANG_TIMING_ENABLED
|
||||
|
||||
#include <assert.h>
|
||||
#include <iostream>
|
||||
#include <time.h>
|
||||
|
||||
#include "concretelang/Runtime/DFRuntime.hpp"
|
||||
|
||||
#define TIME_UTIL_CLOCK CLOCK_MONOTONIC
|
||||
|
||||
static inline int timespec_diff(struct timespec *, const struct timespec *,
|
||||
const struct timespec *);
|
||||
|
||||
#define BEGIN_TIME(p) \
|
||||
do { \
|
||||
assert(clock_gettime(TIME_UTIL_CLOCK, (p)) == 0); \
|
||||
} while (0)
|
||||
|
||||
#if CONCRETELANG_PARALLEL_EXECUTION_ENABLED
|
||||
#define END_TIME(p, m) \
|
||||
do { \
|
||||
struct timespec _end_time_tv; \
|
||||
assert(clock_gettime(TIME_UTIL_CLOCK, &_end_time_tv) == 0); \
|
||||
assert(timespec_diff((p), &_end_time_tv, (p)) == 0); \
|
||||
std::cout << "[NODE \t" << _dfr_debug_get_node_id() << "] \t" << (m) \
|
||||
<< " time : \t" << (p)->tv_sec << "." << (p)->tv_nsec \
|
||||
<< " seconds.\n" \
|
||||
<< std::flush; \
|
||||
} while (0)
|
||||
#else
|
||||
#define END_TIME(p, m) \
|
||||
do { \
|
||||
struct timespec _end_time_tv; \
|
||||
assert(clock_gettime(TIME_UTIL_CLOCK, &_end_time_tv) == 0); \
|
||||
assert(timespec_diff((p), &_end_time_tv, (p)) == 0); \
|
||||
std::cout << (m) << " time : \t" << (p)->tv_sec << "." << (p)->tv_nsec \
|
||||
<< " seconds.\n" \
|
||||
<< std::flush; \
|
||||
} while (0)
|
||||
#endif
|
||||
|
||||
static inline double get_thread_cpu_time(void) {
|
||||
struct timespec _tv;
|
||||
double _t;
|
||||
|
||||
assert(clock_gettime(CLOCK_THREAD_CPUTIME_ID, &_tv) == 0);
|
||||
_t = _tv.tv_sec;
|
||||
_t += _tv.tv_nsec * 1e-9;
|
||||
return _t;
|
||||
}
|
||||
|
||||
static inline int timespec_diff(struct timespec *_result,
|
||||
const struct timespec *_px,
|
||||
const struct timespec *_py) {
|
||||
struct timespec _x, _y;
|
||||
|
||||
_x = *_px;
|
||||
_y = *_py;
|
||||
|
||||
/* Perform the carry for the later subtraction by updating y. */
|
||||
if (_x.tv_nsec < _y.tv_nsec) {
|
||||
long _ns = (_y.tv_nsec - _x.tv_nsec) / 1000000000L + 1;
|
||||
_y.tv_nsec -= 1000000000L * _ns;
|
||||
_y.tv_sec += _ns;
|
||||
}
|
||||
if (_x.tv_nsec - _y.tv_nsec > 1000000000L) {
|
||||
long _ns = (_x.tv_nsec - _y.tv_nsec) / 1000000000L;
|
||||
_y.tv_nsec += 1000000000L * _ns;
|
||||
_y.tv_sec -= _ns;
|
||||
}
|
||||
|
||||
/* Compute the time remaining to wait. tv_nsec is certainly
|
||||
positive. */
|
||||
_result->tv_sec = _x.tv_sec - _y.tv_sec;
|
||||
_result->tv_nsec = _x.tv_nsec - _y.tv_nsec;
|
||||
|
||||
/* Return 1 if result is negative. */
|
||||
return _x.tv_sec < _y.tv_sec;
|
||||
}
|
||||
|
||||
#else // CONCRETELANG_TIMING_ENABLED
|
||||
|
||||
#define BEGIN_TIME(p)
|
||||
#define END_TIME(p, m)
|
||||
|
||||
#endif // CONCRETELANG_TIMING_ENABLED
|
||||
#endif
|
||||
@@ -464,41 +464,40 @@ struct LowerDataflowTasksPass
|
||||
registerWorkFunction(entryPoint, wf);
|
||||
|
||||
// Issue _dfr_start/stop calls for this function
|
||||
if (!workFunctions.empty()) {
|
||||
OpBuilder builder(entryPoint.getBody());
|
||||
builder.setInsertionPointToStart(&entryPoint.getBody().front());
|
||||
OpBuilder builder(entryPoint.getBody());
|
||||
builder.setInsertionPointToStart(&entryPoint.getBody().front());
|
||||
int useDFR = (workFunctions.empty()) ? 0 : 1;
|
||||
Value useDFRVal = builder.create<arith::ConstantOp>(
|
||||
entryPoint.getLoc(), builder.getI64IntegerAttr(useDFR));
|
||||
|
||||
if (ctxIndex >= 0) {
|
||||
auto startFunTy =
|
||||
(dfr::_dfr_is_root_node())
|
||||
? mlir::FunctionType::get(
|
||||
entryPoint->getContext(),
|
||||
{entryPoint.getArgument(ctxIndex).getType()}, {})
|
||||
: mlir::FunctionType::get(entryPoint->getContext(), {}, {});
|
||||
(void)insertForwardDeclaration(entryPoint, builder, "_dfr_start_c",
|
||||
startFunTy);
|
||||
builder.create<mlir::func::CallOp>(
|
||||
entryPoint.getLoc(), "_dfr_start_c", mlir::TypeRange(),
|
||||
(dfr::_dfr_is_root_node()) ? entryPoint.getArgument(ctxIndex)
|
||||
: mlir::ValueRange());
|
||||
} else {
|
||||
auto startFunTy =
|
||||
mlir::FunctionType::get(entryPoint->getContext(), {}, {});
|
||||
(void)insertForwardDeclaration(entryPoint, builder, "_dfr_start",
|
||||
startFunTy);
|
||||
builder.create<mlir::func::CallOp>(entryPoint.getLoc(), "_dfr_start",
|
||||
mlir::TypeRange(),
|
||||
mlir::ValueRange());
|
||||
}
|
||||
builder.setInsertionPoint(entryPoint.getBody().back().getTerminator());
|
||||
auto stopFunTy =
|
||||
mlir::FunctionType::get(entryPoint->getContext(), {}, {});
|
||||
(void)insertForwardDeclaration(entryPoint, builder, "_dfr_stop",
|
||||
stopFunTy);
|
||||
builder.create<mlir::func::CallOp>(entryPoint.getLoc(), "_dfr_stop",
|
||||
mlir::TypeRange(),
|
||||
mlir::ValueRange());
|
||||
if (ctxIndex >= 0) {
|
||||
auto startFunTy =
|
||||
(dfr::_dfr_is_root_node())
|
||||
? mlir::FunctionType::get(
|
||||
entryPoint->getContext(),
|
||||
{entryPoint.getArgument(ctxIndex).getType()}, {})
|
||||
: mlir::FunctionType::get(entryPoint->getContext(), {}, {});
|
||||
(void)insertForwardDeclaration(entryPoint, builder, "_dfr_start_c",
|
||||
startFunTy);
|
||||
builder.create<mlir::func::CallOp>(
|
||||
entryPoint.getLoc(), "_dfr_start_c", mlir::TypeRange(),
|
||||
(dfr::_dfr_is_root_node()) ? entryPoint.getArgument(ctxIndex)
|
||||
: mlir::ValueRange());
|
||||
} else {
|
||||
auto startFunTy = mlir::FunctionType::get(entryPoint->getContext(),
|
||||
{useDFRVal.getType()}, {});
|
||||
(void)insertForwardDeclaration(entryPoint, builder, "_dfr_start",
|
||||
startFunTy);
|
||||
builder.create<mlir::func::CallOp>(entryPoint.getLoc(), "_dfr_start",
|
||||
mlir::TypeRange(), useDFRVal);
|
||||
}
|
||||
builder.setInsertionPoint(entryPoint.getBody().back().getTerminator());
|
||||
auto stopFunTy = mlir::FunctionType::get(entryPoint->getContext(),
|
||||
{useDFRVal.getType()}, {});
|
||||
(void)insertForwardDeclaration(entryPoint, builder, "_dfr_stop",
|
||||
stopFunTy);
|
||||
builder.create<mlir::func::CallOp>(entryPoint.getLoc(), "_dfr_stop",
|
||||
mlir::TypeRange(), useDFRVal);
|
||||
}
|
||||
}
|
||||
LowerDataflowTasksPass(bool debug) : debug(debug){};
|
||||
|
||||
@@ -22,6 +22,7 @@
|
||||
#include "concretelang/Runtime/DFRuntime.hpp"
|
||||
#include "concretelang/Runtime/distributed_generic_task_server.hpp"
|
||||
#include "concretelang/Runtime/runtime_api.h"
|
||||
#include "concretelang/Runtime/time_util.h"
|
||||
|
||||
namespace mlir {
|
||||
namespace concretelang {
|
||||
@@ -31,6 +32,7 @@ static std::vector<GenericComputeClient> gcc;
|
||||
static hpx::lcos::barrier *_dfr_jit_phase_barrier;
|
||||
static hpx::lcos::barrier *_dfr_startup_barrier;
|
||||
static size_t num_nodes = 0;
|
||||
static struct timespec init_timer, broadcast_timer, compute_timer, whole_timer;
|
||||
} // namespace
|
||||
} // namespace dfr
|
||||
} // namespace concretelang
|
||||
@@ -1129,72 +1131,92 @@ static inline void _dfr_start_impl(int argc, char *argv[]) {
|
||||
/* Start/stop functions to be called from within user code (or during
|
||||
JIT invocation). These serve to pause/resume the runtime
|
||||
scheduler and to clean up used resources. */
|
||||
void _dfr_start() {
|
||||
// The first invocation will initialise the runtime. As each call to
|
||||
// _dfr_start is matched with _dfr_stop, if this is not hte first,
|
||||
// we need to resume the HPX runtime.
|
||||
assert(
|
||||
mlir::concretelang::dfr::init_guard !=
|
||||
mlir::concretelang::dfr::terminated &&
|
||||
"DFR runtime: attempting to start runtime after it has been terminated");
|
||||
uint64_t expected = mlir::concretelang::dfr::uninitialised;
|
||||
if (mlir::concretelang::dfr::init_guard.compare_exchange_strong(
|
||||
expected, mlir::concretelang::dfr::active))
|
||||
_dfr_start_impl(0, nullptr);
|
||||
void _dfr_start(int use_dfr_p) {
|
||||
BEGIN_TIME(&mlir::concretelang::dfr::whole_timer);
|
||||
if (use_dfr_p) {
|
||||
BEGIN_TIME(&mlir::concretelang::dfr::init_timer);
|
||||
// The first invocation will initialise the runtime. As each call to
|
||||
// _dfr_start is matched with _dfr_stop, if this is not hte first,
|
||||
// we need to resume the HPX runtime.
|
||||
assert(mlir::concretelang::dfr::init_guard !=
|
||||
mlir::concretelang::dfr::terminated &&
|
||||
"DFR runtime: attempting to start runtime after it has been "
|
||||
"terminated");
|
||||
uint64_t expected = mlir::concretelang::dfr::uninitialised;
|
||||
if (mlir::concretelang::dfr::init_guard.compare_exchange_strong(
|
||||
expected, mlir::concretelang::dfr::active))
|
||||
_dfr_start_impl(0, nullptr);
|
||||
END_TIME(&mlir::concretelang::dfr::init_timer, "Initialization");
|
||||
|
||||
assert(mlir::concretelang::dfr::init_guard ==
|
||||
mlir::concretelang::dfr::active &&
|
||||
"DFR runtime failed to initialise");
|
||||
assert(mlir::concretelang::dfr::init_guard ==
|
||||
mlir::concretelang::dfr::active &&
|
||||
"DFR runtime failed to initialise");
|
||||
|
||||
// If this is not the root node in a non-JIT execution, then this
|
||||
// node should only run the scheduler for any incoming work until
|
||||
// termination is flagged. If this is JIT, we need to run the
|
||||
// cancelled function which registers the work functions.
|
||||
if (!mlir::concretelang::dfr::_dfr_is_root_node() &&
|
||||
!mlir::concretelang::dfr::_dfr_is_jit())
|
||||
_dfr_stop_impl();
|
||||
if (use_dfr_p == 1) {
|
||||
BEGIN_TIME(&mlir::concretelang::dfr::compute_timer);
|
||||
}
|
||||
|
||||
// If this is not the root node in a non-JIT execution, then this
|
||||
// node should only run the scheduler for any incoming work until
|
||||
// termination is flagged. If this is JIT, we need to run the
|
||||
// cancelled function which registers the work functions.
|
||||
if (!mlir::concretelang::dfr::_dfr_is_root_node() &&
|
||||
!mlir::concretelang::dfr::_dfr_is_jit())
|
||||
_dfr_stop_impl();
|
||||
}
|
||||
}
|
||||
|
||||
// Startup entry point when a RuntimeContext is used
|
||||
void _dfr_start_c(void *ctx) {
|
||||
_dfr_start();
|
||||
_dfr_start(2);
|
||||
|
||||
new mlir::concretelang::dfr::RuntimeContextManager();
|
||||
mlir::concretelang::dfr::_dfr_node_level_runtime_context_manager->setContext(
|
||||
ctx);
|
||||
if (mlir::concretelang::dfr::num_nodes > 1) {
|
||||
BEGIN_TIME(&mlir::concretelang::dfr::broadcast_timer);
|
||||
new mlir::concretelang::dfr::RuntimeContextManager();
|
||||
mlir::concretelang::dfr::_dfr_node_level_runtime_context_manager
|
||||
->setContext(ctx);
|
||||
|
||||
// If this is not JIT, then the remote nodes never reach _dfr_stop,
|
||||
// so root should not instantiate this barrier.
|
||||
if (mlir::concretelang::dfr::_dfr_is_root_node() &&
|
||||
mlir::concretelang::dfr::_dfr_is_jit())
|
||||
mlir::concretelang::dfr::_dfr_startup_barrier->wait();
|
||||
// If this is not JIT, then the remote nodes never reach _dfr_stop,
|
||||
// so root should not instantiate this barrier.
|
||||
if (mlir::concretelang::dfr::_dfr_is_root_node() &&
|
||||
mlir::concretelang::dfr::_dfr_is_jit())
|
||||
mlir::concretelang::dfr::_dfr_startup_barrier->wait();
|
||||
END_TIME(&mlir::concretelang::dfr::broadcast_timer, "Key broadcasting");
|
||||
}
|
||||
BEGIN_TIME(&mlir::concretelang::dfr::compute_timer);
|
||||
}
|
||||
|
||||
// This function cannot be used to terminate the runtime as it is
|
||||
// non-decidable if another computation phase will follow. Instead the
|
||||
// _dfr_terminate function provides this facility and is normally
|
||||
// called on exit from "main" when not using the main wrapper library.
|
||||
void _dfr_stop() {
|
||||
// Non-root nodes synchronize here with the root to mark the point
|
||||
// where the root is free to send work out (only needed in JIT).
|
||||
if (!mlir::concretelang::dfr::_dfr_is_root_node())
|
||||
mlir::concretelang::dfr::_dfr_startup_barrier->wait();
|
||||
void _dfr_stop(int use_dfr_p) {
|
||||
if (use_dfr_p) {
|
||||
if (mlir::concretelang::dfr::num_nodes > 1) {
|
||||
// Non-root nodes synchronize here with the root to mark the point
|
||||
// where the root is free to send work out (only needed in JIT).
|
||||
if (!mlir::concretelang::dfr::_dfr_is_root_node())
|
||||
mlir::concretelang::dfr::_dfr_startup_barrier->wait();
|
||||
|
||||
// The barrier is only needed to synchronize the different
|
||||
// computation phases when the compute nodes need to generate and
|
||||
// register new work functions in each phase.
|
||||
// The barrier is only needed to synchronize the different
|
||||
// computation phases when the compute nodes need to generate and
|
||||
// register new work functions in each phase.
|
||||
|
||||
// TODO: this barrier may be removed based on how work function
|
||||
// registration is handled - but it is unlikely to result in much
|
||||
// gain as the root node would be waiting for the end of computation
|
||||
// on all remote nodes before reaching here anyway (dataflow
|
||||
// dependences).
|
||||
if (mlir::concretelang::dfr::_dfr_is_jit()) {
|
||||
mlir::concretelang::dfr::_dfr_jit_phase_barrier->wait();
|
||||
// TODO: this barrier may be removed based on how work function
|
||||
// registration is handled - but it is unlikely to result in much
|
||||
// gain as the root node would be waiting for the end of computation
|
||||
// on all remote nodes before reaching here anyway (dataflow
|
||||
// dependences).
|
||||
if (mlir::concretelang::dfr::_dfr_is_jit()) {
|
||||
mlir::concretelang::dfr::_dfr_jit_phase_barrier->wait();
|
||||
}
|
||||
|
||||
mlir::concretelang::dfr::_dfr_node_level_runtime_context_manager
|
||||
->clearContext();
|
||||
}
|
||||
END_TIME(&mlir::concretelang::dfr::compute_timer, "Compute");
|
||||
}
|
||||
|
||||
mlir::concretelang::dfr::_dfr_node_level_runtime_context_manager
|
||||
->clearContext();
|
||||
END_TIME(&mlir::concretelang::dfr::whole_timer, "Total execution");
|
||||
}
|
||||
|
||||
void _dfr_try_initialize() {
|
||||
@@ -1266,6 +1288,7 @@ void _dfr_print_debug(size_t val) {
|
||||
#else // CONCRETELANG_PARALLEL_EXECUTION_ENABLED
|
||||
|
||||
#include "concretelang/Runtime/DFRuntime.hpp"
|
||||
#include "concretelang/Runtime/time_util.h"
|
||||
|
||||
namespace mlir {
|
||||
namespace concretelang {
|
||||
@@ -1273,6 +1296,7 @@ namespace dfr {
|
||||
namespace {
|
||||
static bool is_jit_p = false;
|
||||
static bool use_omp_p = false;
|
||||
static struct timespec compute_timer;
|
||||
} // namespace
|
||||
|
||||
void _dfr_set_required(bool is_required) {}
|
||||
@@ -1281,9 +1305,18 @@ void _dfr_set_use_omp(bool use_omp) { use_omp_p = use_omp; }
|
||||
bool _dfr_is_jit() { return is_jit_p; }
|
||||
bool _dfr_is_root_node() { return true; }
|
||||
bool _dfr_use_omp() { return use_omp_p; }
|
||||
|
||||
} // namespace dfr
|
||||
} // namespace concretelang
|
||||
} // namespace mlir
|
||||
|
||||
void _dfr_start(int use_dfr_p) {
|
||||
BEGIN_TIME(&mlir::concretelang::dfr::compute_timer);
|
||||
}
|
||||
void _dfr_start_c(void *ctx) { _dfr_start(2); }
|
||||
void _dfr_stop(int use_dfr_p) {
|
||||
END_TIME(&mlir::concretelang::dfr::compute_timer, "Compute");
|
||||
}
|
||||
|
||||
void _dfr_terminate() {}
|
||||
#endif
|
||||
|
||||
Reference in New Issue
Block a user