From 72ada5ae6c5bcc53045eba559666a65a68149e4c Mon Sep 17 00:00:00 2001 From: "Peyton, Jonathan L" Date: Mon, 7 Sep 2020 00:37:36 -0500 Subject: [PATCH] [OpenMP] Introduce GOMP mutexinoutset in the runtime Encapsulate GOMP task dependencies in separate class and introduce the new mutexinoutset dependency type. This separate class allows future GOMP task APIs easier access to the task dependency functionality and better ability to propagate new dependency types to all existing GOMP task APIs which use task dependencies. Differential Revision: https://reviews.llvm.org/D87267 --- openmp/runtime/src/i18n/en_US.txt | 1 + openmp/runtime/src/kmp_gsupport.cpp | 74 +++++++++++++-- .../runtime/test/tasking/omp50_task_depend_mtx3.c | 102 +++++++++++++++++++++ 3 files changed, 168 insertions(+), 9 deletions(-) create mode 100644 openmp/runtime/test/tasking/omp50_task_depend_mtx3.c diff --git a/openmp/runtime/src/i18n/en_US.txt b/openmp/runtime/src/i18n/en_US.txt index b2ba63c0..97f2ca7 100644 --- a/openmp/runtime/src/i18n/en_US.txt +++ b/openmp/runtime/src/i18n/en_US.txt @@ -426,6 +426,7 @@ AffHWSubsetManyProcs "KMP_HW_SUBSET ignored: too many Procs requested." HierSchedInvalid "Hierarchy ignored: unsupported level: %1$s." AffFormatDefault "OMP: pid %1$s tid %2$s thread %3$s bound to OS proc set {%4$s}" APIDeprecated "%1$s routine deprecated, please use %2$s instead." +GompFeatureNotSupported "libgomp compatibility layer does not support OpenMP feature: %1$s" # -------------------------------------------------------------------------------------------------- -*- HINTS -*- diff --git a/openmp/runtime/src/kmp_gsupport.cpp b/openmp/runtime/src/kmp_gsupport.cpp index f4f1994..b2d1419 100644 --- a/openmp/runtime/src/kmp_gsupport.cpp +++ b/openmp/runtime/src/kmp_gsupport.cpp @@ -17,6 +17,67 @@ #include "ompt-specific.h" #endif +// This class helps convert gomp dependency info into +// kmp_depend_info_t structures +class kmp_gomp_depends_info_t { + void **depend; + kmp_int32 num_deps; + size_t num_out, num_mutexinout, num_in; + size_t offset; + +public: + kmp_gomp_depends_info_t(void **depend) : depend(depend) { + size_t ndeps = (kmp_intptr_t)depend[0]; + size_t num_doable; + // GOMP taskdep structure: + // if depend[0] != 0: + // depend = [ ndeps | nout | &out | ... | &out | &in | ... | &in ] + // + // if depend[0] == 0: + // depend = [ 0 | ndeps | nout | nmtx | nin | &out | ... | &out | &mtx | + // ... | &mtx | &in | ... | &in | &depobj | ... | &depobj ] + if (ndeps) { + num_out = (kmp_intptr_t)depend[1]; + num_in = ndeps - num_out; + num_mutexinout = 0; + num_doable = ndeps; + offset = 2; + } else { + ndeps = (kmp_intptr_t)depend[1]; + num_out = (kmp_intptr_t)depend[2]; + num_mutexinout = (kmp_intptr_t)depend[3]; + num_in = (kmp_intptr_t)depend[4]; + num_doable = num_out + num_mutexinout + num_in; + offset = 5; + } + // TODO: Support gomp depobj + if (ndeps != num_doable) { + KMP_FATAL(GompFeatureNotSupported, "depobj"); + } + num_deps = static_cast(ndeps); + } + kmp_int32 get_num_deps() const { return num_deps; } + kmp_depend_info_t get_kmp_depend(size_t index) const { + kmp_depend_info_t retval; + memset(&retval, '\0', sizeof(retval)); + KMP_ASSERT(index < (size_t)num_deps); + retval.base_addr = (kmp_intptr_t)depend[offset + index]; + retval.len = 0; + // Because inout and out are logically equivalent, + // use inout and in dependency flags. GOMP does not provide a + // way to distinguish if user specified out vs. inout. + if (index < num_out) { + retval.flags.in = 1; + retval.flags.out = 1; + } else if (index >= num_out && index < (num_out + num_mutexinout)) { + retval.flags.mtx = 1; + } else { + retval.flags.in = 1; + } + return retval; + } +}; + #ifdef __cplusplus extern "C" { #endif // __cplusplus @@ -1164,16 +1225,11 @@ void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TASK)(void (*func)(void *), void *data, if (if_cond) { if (gomp_flags & 8) { KMP_ASSERT(depend); - const size_t ndeps = (kmp_intptr_t)depend[0]; - const size_t nout = (kmp_intptr_t)depend[1]; + kmp_gomp_depends_info_t gomp_depends(depend); + kmp_int32 ndeps = gomp_depends.get_num_deps(); kmp_depend_info_t dep_list[ndeps]; - - for (size_t i = 0U; i < ndeps; i++) { - dep_list[i].base_addr = (kmp_intptr_t)depend[2U + i]; - dep_list[i].len = 0U; - dep_list[i].flags.in = 1; - dep_list[i].flags.out = (i < nout); - } + for (kmp_int32 i = 0; i < ndeps; i++) + dep_list[i] = gomp_depends.get_kmp_depend(i); __kmpc_omp_task_with_deps(&loc, gtid, task, ndeps, dep_list, 0, NULL); } else { __kmpc_omp_task(&loc, gtid, task); diff --git a/openmp/runtime/test/tasking/omp50_task_depend_mtx3.c b/openmp/runtime/test/tasking/omp50_task_depend_mtx3.c new file mode 100644 index 0000000..1674899 --- /dev/null +++ b/openmp/runtime/test/tasking/omp50_task_depend_mtx3.c @@ -0,0 +1,102 @@ +// RUN: %libomp-compile-and-run +// UNSUPPORTED: gcc-4, gcc-5, gcc-6, gcc-7, gcc-8 +// UNSUPPORTED: clang-3, clang-4, clang-5, clang-6, clang-7, clang-8 +// TODO: update expected result when icc supports mutexinoutset +// XFAIL: icc + +// Tests OMP 5.0 task dependences "mutexinoutset", emulates compiler codegen +// Mutually exclusive tasks get same input dependency info array +// +// Task tree created: +// task0 task1 +// \ / \ +// task2 task5 +// / \ +// task3 task4 +// / \ +// task6 <-->task7 (these two are mutually exclusive) +// \ / +// task8 +// +#include +#include +#include "omp_my_sleep.h" + +static int checker = 0; // to check if two tasks run simultaneously +static int err = 0; +#ifndef DELAY +#define DELAY 0.1 +#endif + +int mutex_task(int task_id) { + int th = omp_get_thread_num(); + #pragma omp atomic + ++checker; + printf("task %d, th %d\n", task_id, th); + if (checker != 1) { + err++; + printf("Error1, checker %d != 1\n", checker); + } + my_sleep(DELAY); + if (checker != 1) { + err++; + printf("Error2, checker %d != 1\n", checker); + } + #pragma omp atomic + --checker; + return 0; +} + +int main() +{ + int i1,i2,i3,i4; + omp_set_num_threads(2); + #pragma omp parallel + { + #pragma omp single nowait + { + int t = omp_get_thread_num(); + #pragma omp task depend(in: i1, i2) + { int th = omp_get_thread_num(); + printf("task 0_%d, th %d\n", t, th); + my_sleep(DELAY); } + #pragma omp task depend(in: i1, i3) + { int th = omp_get_thread_num(); + printf("task 1_%d, th %d\n", t, th); + my_sleep(DELAY); } + #pragma omp task depend(in: i2) depend(out: i1) + { int th = omp_get_thread_num(); + printf("task 2_%d, th %d\n", t, th); + my_sleep(DELAY); } + #pragma omp task depend(in: i1) + { int th = omp_get_thread_num(); + printf("task 3_%d, th %d\n", t, th); + my_sleep(DELAY); } + #pragma omp task depend(out: i2) + { int th = omp_get_thread_num(); + printf("task 4_%d, th %d\n", t, th); + my_sleep(DELAY+0.1); } // wait a bit longer than task 3 + #pragma omp task depend(out: i3) + { int th = omp_get_thread_num(); + printf("task 5_%d, th %d\n", t, th); + my_sleep(DELAY); } + + #pragma omp task depend(mutexinoutset: i1, i4) + { mutex_task(6); } + #pragma omp task depend(mutexinoutset: i1, i4) + { mutex_task(7); } + + #pragma omp task depend(in: i1) + { int th = omp_get_thread_num(); + printf("task 8_%d, th %d\n", t, th); + my_sleep(DELAY); } + } // single + } // parallel + if (err == 0) { + printf("passed\n"); + return 0; + } else { + printf("failed\n"); + return 1; + } +} -- 2.7.4