From a23806e67a2d65196aacc6fd7243d228b4852c81 Mon Sep 17 00:00:00 2001 From: Andrey Churbanov Date: Tue, 2 Jul 2019 15:10:20 +0000 Subject: [PATCH] Create a runtime option to disable task throttling. Patch by viroulep (Philippe Virouleau) Differential Revision: https://reviews.llvm.org/D63196 llvm-svn: 364934 --- openmp/runtime/src/kmp.h | 1 + openmp/runtime/src/kmp_global.cpp | 1 + openmp/runtime/src/kmp_settings.cpp | 16 +++++++ openmp/runtime/src/kmp_tasking.cpp | 6 ++- openmp/runtime/test/tasking/omp_fill_taskqueue.c | 60 ++++++++++++++++++++++++ 5 files changed, 82 insertions(+), 2 deletions(-) create mode 100644 openmp/runtime/test/tasking/omp_fill_taskqueue.c diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h index e9b343c..38a16d1 100644 --- a/openmp/runtime/src/kmp.h +++ b/openmp/runtime/src/kmp.h @@ -2121,6 +2121,7 @@ typedef enum kmp_tasking_mode { extern kmp_tasking_mode_t __kmp_tasking_mode; /* determines how/when to execute tasks */ extern int __kmp_task_stealing_constraint; +extern int __kmp_enable_task_throttling; #if OMP_40_ENABLED extern kmp_int32 __kmp_default_device; // Set via OMP_DEFAULT_DEVICE if // specified, defaults to 0 otherwise diff --git a/openmp/runtime/src/kmp_global.cpp b/openmp/runtime/src/kmp_global.cpp index f766516..1cfb62d 100644 --- a/openmp/runtime/src/kmp_global.cpp +++ b/openmp/runtime/src/kmp_global.cpp @@ -341,6 +341,7 @@ omp_memspace_handle_t const omp_low_lat_mem_space = KMP_BUILD_ASSERT(sizeof(kmp_tasking_flags_t) == 4); int __kmp_task_stealing_constraint = 1; /* Constrain task stealing by default */ +int __kmp_enable_task_throttling = 1; #ifdef DEBUG_SUSPEND int __kmp_suspend_count = 0; diff --git a/openmp/runtime/src/kmp_settings.cpp b/openmp/runtime/src/kmp_settings.cpp index 1afba5b..f5781d4 100644 --- a/openmp/runtime/src/kmp_settings.cpp +++ b/openmp/runtime/src/kmp_settings.cpp @@ -4683,6 +4683,20 @@ static void __kmp_stg_print_forkjoin_frames_mode(kmp_str_buf_t *buffer, #endif /* USE_ITT_BUILD */ // ----------------------------------------------------------------------------- +// KMP_ENABLE_TASK_THROTTLING + +static void __kmp_stg_parse_task_throttling(char const *name, + char const *value, void *data) { + __kmp_stg_parse_bool(name, value, &__kmp_enable_task_throttling); +} // __kmp_stg_parse_task_throttling + + +static void __kmp_stg_print_task_throttling(kmp_str_buf_t *buffer, + char const *name, void *data) { + __kmp_stg_print_bool(buffer, name, __kmp_enable_task_throttling); +} // __kmp_stg_print_task_throttling + +// ----------------------------------------------------------------------------- // OMP_DISPLAY_ENV #if OMP_40_ENABLED @@ -5003,6 +5017,8 @@ static kmp_setting_t __kmp_stg_table[] = { {"KMP_FORKJOIN_FRAMES_MODE", __kmp_stg_parse_forkjoin_frames_mode, __kmp_stg_print_forkjoin_frames_mode, NULL, 0, 0}, #endif + {"KMP_ENABLE_TASK_THROTTLING", __kmp_stg_parse_task_throttling, + __kmp_stg_print_task_throttling, NULL, 0, 0}, #if OMP_40_ENABLED {"OMP_DISPLAY_ENV", __kmp_stg_parse_omp_display_env, diff --git a/openmp/runtime/src/kmp_tasking.cpp b/openmp/runtime/src/kmp_tasking.cpp index 62f5f7a..23ace2f 100644 --- a/openmp/runtime/src/kmp_tasking.cpp +++ b/openmp/runtime/src/kmp_tasking.cpp @@ -374,7 +374,8 @@ static kmp_int32 __kmp_push_task(kmp_int32 gtid, kmp_task_t *task) { // Check if deque is full if (TCR_4(thread_data->td.td_deque_ntasks) >= TASK_DEQUE_SIZE(thread_data->td)) { - if (__kmp_task_is_allowed(gtid, __kmp_task_stealing_constraint, taskdata, + if (__kmp_enable_task_throttling && + __kmp_task_is_allowed(gtid, __kmp_task_stealing_constraint, taskdata, thread->th.th_current_task)) { KA_TRACE(20, ("__kmp_push_task: T#%d deque is full; returning " "TASK_NOT_PUSHED for task %p\n", @@ -394,7 +395,8 @@ static kmp_int32 __kmp_push_task(kmp_int32 gtid, kmp_task_t *task) { // Need to recheck as we can get a proxy task from thread outside of OpenMP if (TCR_4(thread_data->td.td_deque_ntasks) >= TASK_DEQUE_SIZE(thread_data->td)) { - if (__kmp_task_is_allowed(gtid, __kmp_task_stealing_constraint, taskdata, + if (__kmp_enable_task_throttling && + __kmp_task_is_allowed(gtid, __kmp_task_stealing_constraint, taskdata, thread->th.th_current_task)) { __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock); KA_TRACE(20, ("__kmp_push_task: T#%d deque is full on 2nd check; " diff --git a/openmp/runtime/test/tasking/omp_fill_taskqueue.c b/openmp/runtime/test/tasking/omp_fill_taskqueue.c new file mode 100644 index 0000000..e95f97a --- /dev/null +++ b/openmp/runtime/test/tasking/omp_fill_taskqueue.c @@ -0,0 +1,60 @@ +// RUN: %libomp-compile && env KMP_ENABLE_TASK_THROTTLING=0 %libomp-run +// RUN: %libomp-compile && env KMP_ENABLE_TASK_THROTTLING=1 %libomp-run + +#include +#include +#include + +/** + * Test the task throttling behavior of the runtime. + * Unless OMP_NUM_THREADS is 1, the master thread pushes tasks to its own tasks + * queue until either of the following happens: + * - the task queue is full, and it starts serializing tasks + * - all tasks have been pushed, and it can begin execution + * The idea is to create a huge number of tasks which execution are blocked + * until the master thread comes to execute tasks (they need to be blocking, + * otherwise the second thread will start emptying the queue). + * At this point we can check the number of enqueued tasks: iff all tasks have + * been enqueued, then there was no task throttling. + * Otherwise there has been some sort of task throttling. + * If what we detect doesn't match the value of the environment variable, the + * test is failed. + */ + + +#define NUM_TASKS 2000 + + +int main() +{ + int i; + int block = 1; + int tid; + int throttling = strcmp(getenv("KMP_ENABLE_TASK_THROTTLING"), "1") == 0; + int enqueued = 0; + int failed = -1; + + #pragma omp parallel num_threads(2) + #pragma omp master + { + for (i = 0; i < NUM_TASKS; i++) { + enqueued++; + #pragma omp task + { + tid = omp_get_thread_num(); + if (tid == 0) { + // As soon as the master thread starts executing task we should unlock + // all tasks, and detect the test failure if it has not been done yet. + if (failed < 0) + failed = throttling ? enqueued == NUM_TASKS : enqueued < NUM_TASKS; + block = 0; + } + while (block) + ; + } + } + block = 0; + } + + return failed; +} -- 2.7.4