From 5644f734d6068f6e75ecd9856e5f837190543667 Mon Sep 17 00:00:00 2001 From: AndreyChurbanov Date: Fri, 20 Nov 2020 12:15:00 +0300 Subject: [PATCH] Revert "[OpenMP] Add support for Intel's umonitor/umwait" This reverts commit 9cfad5f9c5bfd985f1bc8b0954f58013c5236e58. --- openmp/runtime/src/i18n/en_US.txt | 2 - openmp/runtime/src/kmp.h | 179 +++++-------------------- openmp/runtime/src/kmp_barrier.cpp | 55 ++++---- openmp/runtime/src/kmp_global.cpp | 7 - openmp/runtime/src/kmp_os.h | 10 -- openmp/runtime/src/kmp_runtime.cpp | 52 +------- openmp/runtime/src/kmp_settings.cpp | 35 ----- openmp/runtime/src/kmp_stats.h | 1 - openmp/runtime/src/kmp_taskdeps.cpp | 3 +- openmp/runtime/src/kmp_tasking.cpp | 47 ++----- openmp/runtime/src/kmp_wait_release.cpp | 26 +--- openmp/runtime/src/kmp_wait_release.h | 217 ++++++++----------------------- openmp/runtime/src/z_Linux_util.cpp | 46 ++++--- openmp/runtime/src/z_Windows_NT_util.cpp | 34 ++--- 14 files changed, 172 insertions(+), 542 deletions(-) diff --git a/openmp/runtime/src/i18n/en_US.txt b/openmp/runtime/src/i18n/en_US.txt index 26f4cf5..0b08442 100644 --- a/openmp/runtime/src/i18n/en_US.txt +++ b/openmp/runtime/src/i18n/en_US.txt @@ -417,8 +417,6 @@ AffUsingHwloc "%1$s: Affinity capable, using hwloc." AffIgnoringHwloc "%1$s: Ignoring hwloc mechanism." AffHwlocErrorOccurred "%1$s: Hwloc failed in %2$s. Relying on internal affinity mechanisms." EnvSerialWarn "%1$s must be set prior to OpenMP runtime library initialization; ignored." -EnvMwaitWarn "You have enabled the use of umonitor/umwait. If the CPU doesn't have that enabled " - "you'll get an illegal instruction exception." EnvVarDeprecated "%1$s variable deprecated, please use %2$s instead." RedMethodNotSupported "KMP_FORCE_REDUCTION: %1$s method is not supported; using critical." AffHWSubsetNoHWLOC "KMP_HW_SUBSET ignored: unsupported item requested for non-HWLOC topology method (KMP_TOPOLOGY_METHOD)" diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h index c2e30a3..3acee73 100644 --- a/openmp/runtime/src/kmp.h +++ b/openmp/runtime/src/kmp.h @@ -255,10 +255,6 @@ typedef union kmp_team kmp_team_p; typedef union kmp_info kmp_info_p; typedef union kmp_root kmp_root_p; -template class kmp_flag_32; -template class kmp_flag_64; -class kmp_flag_oncore; - #ifdef __cplusplus extern "C" { #endif @@ -1322,96 +1318,6 @@ static inline void __kmp_x86_pause(void) { _mm_pause(); } } \ } -// User-level Monitor/Mwait -#if KMP_HAVE_UMWAIT -// We always try for UMWAIT first -#if (KMP_COMPILER_ICC && __INTEL_COMPILER >= 1300) || \ - (KMP_COMPILER_MSVC && _MSC_VER >= 1700) || \ - (KMP_COMPILER_CLANG && (KMP_MSVC_COMPAT || __MINGW32__)) || \ - (KMP_COMPILER_GCC && __MINGW32__) -#if KMP_OS_UNIX -#include -#else -#include -#endif // KMP_OS_UNIX -#else -#define USE_MWAIT_ASM \ - KMP_OS_UNIX && (!KMP_COMPILER_ICC || __INTEL_COMPILER < 1900) -#endif // KMP_COMPILER_ICC etc. -#if KMP_OS_UNIX && 0 // "waitpkg" not recognized yet -__attribute__((target("waitpkg"))) -#endif -static inline int -__kmp_tpause(uint32_t hint, uint64_t counter) { -#if (USE_MWAIT_ASM) - uint32_t timeHi = uint32_t(counter >> 32); - uint32_t timeLo = uint32_t(counter & 0xffffffff); - char flag; - __asm__ volatile("#tpause\n.byte 0x66, 0x0F, 0xAE, 0xF1\n" - "setb %0" - : "=r"(flag) - : "a"(timeLo), "d"(timeHi), "c"(hint) - :); - return flag; -#else - return _tpause(hint, counter); -#endif -} -#if KMP_OS_UNIX && 0 // "waitpkg" not recognized on our build machine -__attribute__((target("waitpkg"))) -#endif -static inline void -__kmp_umonitor(void *cacheline) { -#if (USE_MWAIT_ASM) - __asm__ volatile("# umonitor\n.byte 0xF3, 0x0F, 0xAE, 0x01 " - : - : "a"(cacheline) - :); -#else - _umonitor(cacheline); -#endif -} -#if KMP_OS_UNIX && 0 // "waitpkg" not recognized on our build machine -__attribute__((target("waitpkg"))) -#endif -static inline int -__kmp_umwait(uint32_t hint, uint64_t counter) { -#if (USE_MWAIT_ASM) - uint32_t timeHi = uint32_t(counter >> 32); - uint32_t timeLo = uint32_t(counter & 0xffffffff); - char flag; - __asm__ volatile("#umwait\n.byte 0xF2, 0x0F, 0xAE, 0xF1\n" - "setb %0" - : "=r"(flag) - : "a"(timeLo), "d"(timeHi), "c"(hint) - :); - return flag; -#else - return _umwait(hint, counter); -#endif -} -#elif KMP_HAVE_MWAIT -#if KMP_OS_UNIX -#include -#else -#include -#endif -#if KMP_OS_UNIX -__attribute__((target("sse3"))) -#endif -static inline void -__kmp_mm_monitor(void *cacheline, unsigned extensions, unsigned hints) { - _mm_monitor(cacheline, extensions, hints); -} -#if KMP_OS_UNIX -__attribute__((target("sse3"))) -#endif -static inline void -__kmp_mm_mwait(unsigned extensions, unsigned hints) { - _mm_mwait(extensions, hints); -} -#endif // KMP_HAVE_UMWAIT - /* ------------------------------------------------------------------------ */ /* Support datatypes for the orphaned construct nesting checks. */ /* ------------------------------------------------------------------------ */ @@ -3188,13 +3094,6 @@ static inline void __kmp_assert_valid_gtid(kmp_int32 gtid) { KMP_FATAL(ThreadIdentInvalid); } -#if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT -extern int __kmp_user_level_mwait; // TRUE or FALSE; from KMP_USER_LEVEL_MWAIT -extern int __kmp_umwait_enabled; // Runtime check if user-level mwait enabled -extern int __kmp_mwait_enabled; // Runtime check if ring3 mwait is enabled -extern int __kmp_mwait_hints; // Hints to pass in to mwait -#endif - /* ------------------------------------------------------------------------- */ extern kmp_global_t __kmp_global; /* global status */ @@ -3396,14 +3295,17 @@ extern kmp_uint32 __kmp_wait_4(kmp_uint32 volatile *spinner, kmp_uint32 checker, extern void __kmp_wait_4_ptr(void *spinner, kmp_uint32 checker, kmp_uint32 (*pred)(void *, kmp_uint32), void *obj); -extern void __kmp_wait_64(kmp_info_t *this_thr, kmp_flag_64<> *flag, +class kmp_flag_32; +class kmp_flag_64; +class kmp_flag_oncore; +extern void __kmp_wait_64(kmp_info_t *this_thr, kmp_flag_64 *flag, int final_spin #if USE_ITT_BUILD , void *itt_sync_obj #endif ); -extern void __kmp_release_64(kmp_flag_64<> *flag); +extern void __kmp_release_64(kmp_flag_64 *flag); extern void __kmp_infinite_loop(void); @@ -3501,6 +3403,13 @@ extern int __kmp_try_suspend_mx(kmp_info_t *th); extern void __kmp_lock_suspend_mx(kmp_info_t *th); extern void __kmp_unlock_suspend_mx(kmp_info_t *th); +extern void __kmp_suspend_32(int th_gtid, kmp_flag_32 *flag); +extern void __kmp_suspend_64(int th_gtid, kmp_flag_64 *flag); +extern void __kmp_suspend_oncore(int th_gtid, kmp_flag_oncore *flag); +extern void __kmp_resume_32(int target_gtid, kmp_flag_32 *flag); +extern void __kmp_resume_64(int target_gtid, kmp_flag_64 *flag); +extern void __kmp_resume_oncore(int target_gtid, kmp_flag_oncore *flag); + extern void __kmp_elapsed(double *); extern void __kmp_elapsed_tick(double *); @@ -3625,6 +3534,28 @@ extern kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref, kmp_task_t *task); extern void __kmp_fulfill_event(kmp_event_t *event); +int __kmp_execute_tasks_32(kmp_info_t *thread, kmp_int32 gtid, + kmp_flag_32 *flag, int final_spin, + int *thread_finished, +#if USE_ITT_BUILD + void *itt_sync_obj, +#endif /* USE_ITT_BUILD */ + kmp_int32 is_constrained); +int __kmp_execute_tasks_64(kmp_info_t *thread, kmp_int32 gtid, + kmp_flag_64 *flag, int final_spin, + int *thread_finished, +#if USE_ITT_BUILD + void *itt_sync_obj, +#endif /* USE_ITT_BUILD */ + kmp_int32 is_constrained); +int __kmp_execute_tasks_oncore(kmp_info_t *thread, kmp_int32 gtid, + kmp_flag_oncore *flag, int final_spin, + int *thread_finished, +#if USE_ITT_BUILD + void *itt_sync_obj, +#endif /* USE_ITT_BUILD */ + kmp_int32 is_constrained); + extern void __kmp_free_task_team(kmp_info_t *thread, kmp_task_team_t *task_team); extern void __kmp_reap_task_teams(void); @@ -3988,46 +3919,4 @@ extern void __kmp_omp_display_env(int verbose); } #endif -template -extern void __kmp_suspend_32(int th_gtid, kmp_flag_32 *flag); -template -extern void __kmp_suspend_64(int th_gtid, kmp_flag_64 *flag); -extern void __kmp_suspend_oncore(int th_gtid, kmp_flag_oncore *flag); -template -#if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT -extern void __kmp_mwait_32(int th_gtid, kmp_flag_32 *flag); -template -extern void __kmp_mwait_64(int th_gtid, kmp_flag_64 *flag); -extern void __kmp_mwait_oncore(int th_gtid, kmp_flag_oncore *flag); -template -#endif -extern void __kmp_resume_32(int target_gtid, kmp_flag_32 *flag); -template -extern void __kmp_resume_64(int target_gtid, kmp_flag_64 *flag); -extern void __kmp_resume_oncore(int target_gtid, kmp_flag_oncore *flag); - -template -int __kmp_execute_tasks_32(kmp_info_t *thread, kmp_int32 gtid, - kmp_flag_32 *flag, int final_spin, - int *thread_finished, -#if USE_ITT_BUILD - void *itt_sync_obj, -#endif /* USE_ITT_BUILD */ - kmp_int32 is_constrained); -template -int __kmp_execute_tasks_64(kmp_info_t *thread, kmp_int32 gtid, - kmp_flag_64 *flag, int final_spin, - int *thread_finished, -#if USE_ITT_BUILD - void *itt_sync_obj, -#endif /* USE_ITT_BUILD */ - kmp_int32 is_constrained); -int __kmp_execute_tasks_oncore(kmp_info_t *thread, kmp_int32 gtid, - kmp_flag_oncore *flag, int final_spin, - int *thread_finished, -#if USE_ITT_BUILD - void *itt_sync_obj, -#endif /* USE_ITT_BUILD */ - kmp_int32 is_constrained); - #endif /* KMP_H */ diff --git a/openmp/runtime/src/kmp_barrier.cpp b/openmp/runtime/src/kmp_barrier.cpp index c4b8e11..de66128 100644 --- a/openmp/runtime/src/kmp_barrier.cpp +++ b/openmp/runtime/src/kmp_barrier.cpp @@ -78,7 +78,7 @@ static bool __kmp_linear_barrier_gather_template( is valid any more - it could be deallocated by the master thread at any time. */ ANNOTATE_BARRIER_BEGIN(this_thr); - kmp_flag_64<> flag(&thr_bar->b_arrived, other_threads[0]); + kmp_flag_64 flag(&thr_bar->b_arrived, other_threads[0]); flag.release(); } else { kmp_balign_team_t *team_bar = &team->t.t_bar[bt]; @@ -101,14 +101,14 @@ static bool __kmp_linear_barrier_gather_template( &other_threads[i]->th.th_bar[bt].bb.b_arrived, new_state)); // Wait for worker thread to arrive + kmp_flag_64 flag(&other_threads[i]->th.th_bar[bt].bb.b_arrived, + new_state); if (cancellable) { - kmp_flag_64 flag( - &other_threads[i]->th.th_bar[bt].bb.b_arrived, new_state); - if (flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj))) + bool cancelled = flag.wait_cancellable_nosleep( + this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj)); + if (cancelled) return true; } else { - kmp_flag_64<> flag(&other_threads[i]->th.th_bar[bt].bb.b_arrived, - new_state); flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj)); } ANNOTATE_BARRIER_END(other_threads[i]); @@ -203,20 +203,22 @@ static bool __kmp_linear_barrier_release_template( other_threads[i]->th.th_bar[bt].bb.b_go, other_threads[i]->th.th_bar[bt].bb.b_go + KMP_BARRIER_STATE_BUMP)); ANNOTATE_BARRIER_BEGIN(other_threads[i]); - kmp_flag_64<> flag(&other_threads[i]->th.th_bar[bt].bb.b_go, - other_threads[i]); + kmp_flag_64 flag(&other_threads[i]->th.th_bar[bt].bb.b_go, + other_threads[i]); flag.release(); } } } else { // Wait for the MASTER thread to release us KA_TRACE(20, ("__kmp_linear_barrier_release: T#%d wait go(%p) == %u\n", gtid, &thr_bar->b_go, KMP_BARRIER_STATE_BUMP)); + kmp_flag_64 flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP); if (cancellable) { - kmp_flag_64 flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP); - if (flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj))) + bool cancelled = flag.wait_cancellable_nosleep( + this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj)); + if (cancelled) { return true; + } } else { - kmp_flag_64<> flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP); flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj)); } ANNOTATE_BARRIER_END(this_thr); @@ -337,7 +339,7 @@ __kmp_tree_barrier_gather(enum barrier_type bt, kmp_info_t *this_thr, int gtid, gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), team->t.t_id, child_tid, &child_bar->b_arrived, new_state)); // Wait for child to arrive - kmp_flag_64<> flag(&child_bar->b_arrived, new_state); + kmp_flag_64 flag(&child_bar->b_arrived, new_state); flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj)); ANNOTATE_BARRIER_END(child_thr); #if USE_ITT_BUILD && USE_ITT_NOTIFY @@ -382,7 +384,7 @@ __kmp_tree_barrier_gather(enum barrier_type bt, kmp_info_t *this_thr, int gtid, is valid any more - it could be deallocated by the master thread at any time. */ ANNOTATE_BARRIER_BEGIN(this_thr); - kmp_flag_64<> flag(&thr_bar->b_arrived, other_threads[parent_tid]); + kmp_flag_64 flag(&thr_bar->b_arrived, other_threads[parent_tid]); flag.release(); } else { // Need to update the team arrived pointer if we are the master thread @@ -418,7 +420,7 @@ static void __kmp_tree_barrier_release( KA_TRACE(20, ("__kmp_tree_barrier_release: T#%d wait go(%p) == %u\n", gtid, &thr_bar->b_go, KMP_BARRIER_STATE_BUMP)); // Wait for parent thread to release us - kmp_flag_64<> flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP); + kmp_flag_64 flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP); flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj)); ANNOTATE_BARRIER_END(this_thr); #if USE_ITT_BUILD && USE_ITT_NOTIFY @@ -496,7 +498,7 @@ static void __kmp_tree_barrier_release( child_bar->b_go + KMP_BARRIER_STATE_BUMP)); // Release child from barrier ANNOTATE_BARRIER_BEGIN(child_thr); - kmp_flag_64<> flag(&child_bar->b_go, child_thr); + kmp_flag_64 flag(&child_bar->b_go, child_thr); flag.release(); child++; child_tid++; @@ -538,7 +540,7 @@ __kmp_hyper_barrier_gather(enum barrier_type bt, kmp_info_t *this_thr, int gtid, #endif /* Perform a hypercube-embedded tree gather to wait until all of the threads have arrived, and reduce any required data as we go. */ - kmp_flag_64<> p_flag(&thr_bar->b_arrived); + kmp_flag_64 p_flag(&thr_bar->b_arrived); for (level = 0, offset = 1; offset < num_threads; level += branch_bits, offset <<= branch_bits) { kmp_uint32 child; @@ -586,7 +588,7 @@ __kmp_hyper_barrier_gather(enum barrier_type bt, kmp_info_t *this_thr, int gtid, gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), team->t.t_id, child_tid, &child_bar->b_arrived, new_state)); // Wait for child to arrive - kmp_flag_64<> c_flag(&child_bar->b_arrived, new_state); + kmp_flag_64 c_flag(&child_bar->b_arrived, new_state); c_flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj)); ANNOTATE_BARRIER_END(child_thr); KMP_MB(); // Synchronize parent and child threads. @@ -668,7 +670,7 @@ static void __kmp_hyper_barrier_release( KA_TRACE(20, ("__kmp_hyper_barrier_release: T#%d wait go(%p) == %u\n", gtid, &thr_bar->b_go, KMP_BARRIER_STATE_BUMP)); // Wait for parent thread to release us - kmp_flag_64<> flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP); + kmp_flag_64 flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP); flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj)); ANNOTATE_BARRIER_END(this_thr); #if USE_ITT_BUILD && USE_ITT_NOTIFY @@ -770,7 +772,7 @@ static void __kmp_hyper_barrier_release( child_bar->b_go + KMP_BARRIER_STATE_BUMP)); // Release child from barrier ANNOTATE_BARRIER_BEGIN(child_thr); - kmp_flag_64<> flag(&child_bar->b_go, child_thr); + kmp_flag_64 flag(&child_bar->b_go, child_thr); flag.release(); } } @@ -915,7 +917,7 @@ static void __kmp_hierarchical_barrier_gather( KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) waiting " "for leaf kids\n", gtid, team->t.t_id, tid)); - kmp_flag_64<> flag(&thr_bar->b_arrived, leaf_state); + kmp_flag_64 flag(&thr_bar->b_arrived, leaf_state); flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj)); if (reduce) { ANNOTATE_REDUCE_AFTER(reduce); @@ -955,7 +957,7 @@ static void __kmp_hierarchical_barrier_gather( gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), team->t.t_id, child_tid, &child_bar->b_arrived, new_state)); - kmp_flag_64<> flag(&child_bar->b_arrived, new_state); + kmp_flag_64 flag(&child_bar->b_arrived, new_state); flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj)); ANNOTATE_BARRIER_END(child_thr); if (reduce) { @@ -988,7 +990,7 @@ static void __kmp_hierarchical_barrier_gather( gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), team->t.t_id, child_tid, &child_bar->b_arrived, new_state)); - kmp_flag_64<> flag(&child_bar->b_arrived, new_state); + kmp_flag_64 flag(&child_bar->b_arrived, new_state); flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj)); ANNOTATE_BARRIER_END(child_thr); if (reduce) { @@ -1023,8 +1025,7 @@ static void __kmp_hierarchical_barrier_gather( !thr_bar->use_oncore_barrier) { // Parent is waiting on my b_arrived // flag; release it ANNOTATE_BARRIER_BEGIN(this_thr); - kmp_flag_64<> flag(&thr_bar->b_arrived, - other_threads[thr_bar->parent_tid]); + kmp_flag_64 flag(&thr_bar->b_arrived, other_threads[thr_bar->parent_tid]); flag.release(); } else { // Leaf does special release on "offset" bits of parent's b_arrived flag @@ -1068,7 +1069,7 @@ static void __kmp_hierarchical_barrier_release( thr_bar->team == NULL) { // Use traditional method of waiting on my own b_go flag thr_bar->wait_flag = KMP_BARRIER_OWN_FLAG; - kmp_flag_64<> flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP); + kmp_flag_64 flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP); flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj)); ANNOTATE_BARRIER_END(this_thr); TCW_8(thr_bar->b_go, @@ -1217,7 +1218,7 @@ static void __kmp_hierarchical_barrier_release( child_bar->b_go + KMP_BARRIER_STATE_BUMP)); // Release child using child's b_go flag ANNOTATE_BARRIER_BEGIN(child_thr); - kmp_flag_64<> flag(&child_bar->b_go, child_thr); + kmp_flag_64 flag(&child_bar->b_go, child_thr); flag.release(); } } else { // Release all children at once with leaf_state bits on my own @@ -1243,7 +1244,7 @@ static void __kmp_hierarchical_barrier_release( child_bar->b_go + KMP_BARRIER_STATE_BUMP)); // Release child using child's b_go flag ANNOTATE_BARRIER_BEGIN(child_thr); - kmp_flag_64<> flag(&child_bar->b_go, child_thr); + kmp_flag_64 flag(&child_bar->b_go, child_thr); flag.release(); } } diff --git a/openmp/runtime/src/kmp_global.cpp b/openmp/runtime/src/kmp_global.cpp index 6c94196..6e636dc 100644 --- a/openmp/runtime/src/kmp_global.cpp +++ b/openmp/runtime/src/kmp_global.cpp @@ -206,13 +206,6 @@ int __kmp_display_env = FALSE; int __kmp_display_env_verbose = FALSE; int __kmp_omp_cancellation = FALSE; -#if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT -int __kmp_user_level_mwait = FALSE; -int __kmp_umwait_enabled = FALSE; -int __kmp_mwait_enabled = FALSE; -int __kmp_mwait_hints = 0; -#endif - /* map OMP 3.0 schedule types with our internal schedule types */ enum sched_type __kmp_sch_map[kmp_sched_upper - kmp_sched_lower_ext + kmp_sched_upper_std - kmp_sched_lower - 2] = { diff --git a/openmp/runtime/src/kmp_os.h b/openmp/runtime/src/kmp_os.h index 6accf5c..42513f1 100644 --- a/openmp/runtime/src/kmp_os.h +++ b/openmp/runtime/src/kmp_os.h @@ -281,16 +281,6 @@ template <> struct traits_t { #define __forceinline __inline #endif -/* Check if the OS/arch can support user-level mwait */ -// All mwait code tests for UMWAIT first, so it should only fall back to ring3 -// MWAIT for KNL. -#define KMP_HAVE_MWAIT \ - ((KMP_ARCH_X86 || KMP_ARCH_X86_64) && (KMP_OS_LINUX || KMP_OS_WINDOWS) && \ - !KMP_MIC2) -#define KMP_HAVE_UMWAIT \ - ((KMP_ARCH_X86 || KMP_ARCH_X86_64) && (KMP_OS_LINUX || KMP_OS_WINDOWS) && \ - !KMP_MIC) - #if KMP_OS_WINDOWS #include diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp index e074231..c4fb008 100644 --- a/openmp/runtime/src/kmp_runtime.cpp +++ b/openmp/runtime/src/kmp_runtime.cpp @@ -5458,7 +5458,7 @@ void __kmp_free_team(kmp_root_t *root, } #endif // first check if thread is sleeping - kmp_flag_64<> fl(&th->th.th_bar[bs_forkjoin_barrier].bb.b_go, th); + kmp_flag_64 fl(&th->th.th_bar[bs_forkjoin_barrier].bb.b_go, th); if (fl.is_sleeping()) fl.resume(__kmp_gtid_from_thread(th)); KMP_CPU_PAUSE(); @@ -5885,7 +5885,7 @@ static void __kmp_reap_thread(kmp_info_t *thread, int is_root) { /* Need release fence here to prevent seg faults for tree forkjoin barrier * (GEH) */ ANNOTATE_HAPPENS_BEFORE(thread); - kmp_flag_64<> flag(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go, thread); + kmp_flag_64 flag(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go, thread); __kmp_release_64(&flag); } @@ -6579,48 +6579,6 @@ static void __kmp_check_mic_type() { #endif /* KMP_MIC_SUPPORTED */ -#if KMP_HAVE_UMWAIT -static void __kmp_user_level_mwait_init() { - struct kmp_cpuid buf; - __kmp_x86_cpuid(7, 0, &buf); - __kmp_umwait_enabled = ((buf.ecx >> 5) & 1) && __kmp_user_level_mwait; - KF_TRACE(30, ("__kmp_user_level_mwait_init: __kmp_umwait_enabled = %d\n", - __kmp_umwait_enabled)); -} -#elif KMP_HAVE_MWAIT -#ifndef AT_INTELPHIUSERMWAIT -// Spurious, non-existent value that should always fail to return anything. -// Will be replaced with the correct value when we know that. -#define AT_INTELPHIUSERMWAIT 10000 -#endif -// getauxval() function is available in RHEL7 and SLES12. If a system with an -// earlier OS is used to build the RTL, we'll use the following internal -// function when the entry is not found. -unsigned long getauxval(unsigned long) KMP_WEAK_ATTRIBUTE_EXTERNAL; -unsigned long getauxval(unsigned long) { return 0; } - -static void __kmp_user_level_mwait_init() { - // When getauxval() and correct value of AT_INTELPHIUSERMWAIT are available - // use them to find if the user-level mwait is enabled. Otherwise, forcibly - // set __kmp_mwait_enabled=TRUE on Intel MIC if the environment variable - // KMP_USER_LEVEL_MWAIT was set to TRUE. - if (__kmp_mic_type == mic3) { - unsigned long res = getauxval(AT_INTELPHIUSERMWAIT); - if ((res & 0x1) || __kmp_user_level_mwait) { - __kmp_mwait_enabled = TRUE; - if (__kmp_user_level_mwait) { - KMP_INFORM(EnvMwaitWarn); - } - } else { - __kmp_mwait_enabled = FALSE; - } - } - KF_TRACE(30, ("__kmp_user_level_mwait_init: __kmp_mic_type = %d, " - "__kmp_mwait_enabled = %d\n", - __kmp_mic_type, __kmp_mwait_enabled)); -} -#endif /* KMP_HAVE_UMWAIT */ - static void __kmp_do_serial_initialize(void) { int i, gtid; int size; @@ -6795,9 +6753,6 @@ static void __kmp_do_serial_initialize(void) { __kmp_env_initialize(NULL); -#if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT - __kmp_user_level_mwait_init(); -#endif // Print all messages in message catalog for testing purposes. #ifdef KMP_DEBUG char const *val = __kmp_env_get("KMP_DUMP_CATALOG"); @@ -8398,8 +8353,7 @@ void __kmp_resume_if_soft_paused() { for (int gtid = 1; gtid < __kmp_threads_capacity; ++gtid) { kmp_info_t *thread = __kmp_threads[gtid]; if (thread) { // Wake it if sleeping - kmp_flag_64<> fl(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go, - thread); + kmp_flag_64 fl(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go, thread); if (fl.is_sleeping()) fl.resume(gtid); else if (__kmp_try_suspend_mx(thread)) { // got suspend lock diff --git a/openmp/runtime/src/kmp_settings.cpp b/openmp/runtime/src/kmp_settings.cpp index e3a2afc..5745cbb 100644 --- a/openmp/runtime/src/kmp_settings.cpp +++ b/openmp/runtime/src/kmp_settings.cpp @@ -4621,35 +4621,6 @@ static void __kmp_stg_print_task_throttling(kmp_str_buf_t *buffer, __kmp_stg_print_bool(buffer, name, __kmp_enable_task_throttling); } // __kmp_stg_print_task_throttling -#if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT -// ----------------------------------------------------------------------------- -// KMP_USER_LEVEL_MWAIT - -static void __kmp_stg_parse_user_level_mwait(char const *name, - char const *value, void *data) { - __kmp_stg_parse_bool(name, value, &__kmp_user_level_mwait); -} // __kmp_stg_parse_user_level_mwait - -static void __kmp_stg_print_user_level_mwait(kmp_str_buf_t *buffer, - char const *name, void *data) { - __kmp_stg_print_bool(buffer, name, __kmp_user_level_mwait); -} // __kmp_stg_print_user_level_mwait - -// ----------------------------------------------------------------------------- -// KMP_MWAIT_HINTS - -static void __kmp_stg_parse_mwait_hints(char const *name, char const *value, - void *data) { - __kmp_stg_parse_int(name, value, 0, INT_MAX, &__kmp_mwait_hints); -} // __kmp_stg_parse_mwait_hints - -static void __kmp_stg_print_mwait_hints(kmp_str_buf_t *buffer, char const *name, - void *data) { - __kmp_stg_print_int(buffer, name, __kmp_mwait_hints); -} // __kmp_stg_print_mwait_hints - -#endif // KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT - // ----------------------------------------------------------------------------- // OMP_DISPLAY_ENV @@ -4968,12 +4939,6 @@ static kmp_setting_t __kmp_stg_table[] = { __kmp_stg_print_omp_tool_libraries, NULL, 0, 0}, #endif -#if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT - {"KMP_USER_LEVEL_MWAIT", __kmp_stg_parse_user_level_mwait, - __kmp_stg_print_user_level_mwait, NULL, 0, 0}, - {"KMP_MWAIT_HINTS", __kmp_stg_parse_mwait_hints, - __kmp_stg_print_mwait_hints, NULL, 0, 0}, -#endif {"", NULL, NULL, NULL, 0, 0}}; // settings static int const __kmp_stg_count = diff --git a/openmp/runtime/src/kmp_stats.h b/openmp/runtime/src/kmp_stats.h index 4aed98a..c281f64 100644 --- a/openmp/runtime/src/kmp_stats.h +++ b/openmp/runtime/src/kmp_stats.h @@ -258,7 +258,6 @@ enum stats_state_e { macro(KMP_tree_release, 0, arg) \ macro(USER_resume, 0, arg) \ macro(USER_suspend, 0, arg) \ - macro(USER_mwait, 0, arg) \ macro(KMP_allocate_team, 0, arg) \ macro(KMP_setup_icv_copy, 0, arg) \ macro(USER_icv_copy, 0, arg) \ diff --git a/openmp/runtime/src/kmp_taskdeps.cpp b/openmp/runtime/src/kmp_taskdeps.cpp index 3a1f5ab..77148d5 100644 --- a/openmp/runtime/src/kmp_taskdeps.cpp +++ b/openmp/runtime/src/kmp_taskdeps.cpp @@ -786,8 +786,7 @@ void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps, } int thread_finished = FALSE; - kmp_flag_32 flag( - (std::atomic *)&node.dn.npredecessors, 0U); + kmp_flag_32 flag((std::atomic *)&node.dn.npredecessors, 0U); while (node.dn.npredecessors > 0) { flag.execute_tasks(thread, gtid, FALSE, &thread_finished USE_ITT_BUILD_ARG(NULL), diff --git a/openmp/runtime/src/kmp_tasking.cpp b/openmp/runtime/src/kmp_tasking.cpp index b564313..3dfc3c4 100644 --- a/openmp/runtime/src/kmp_tasking.cpp +++ b/openmp/runtime/src/kmp_tasking.cpp @@ -1876,10 +1876,9 @@ static kmp_int32 __kmpc_omp_taskwait_template(ident_t *loc_ref, kmp_int32 gtid, must_wait = must_wait || (thread->th.th_task_team != NULL && thread->th.th_task_team->tt.tt_found_proxy_tasks); if (must_wait) { - kmp_flag_32 flag( - RCAST(std::atomic *, - &(taskdata->td_incomplete_child_tasks)), - 0U); + kmp_flag_32 flag(RCAST(std::atomic *, + &(taskdata->td_incomplete_child_tasks)), + 0U); while (KMP_ATOMIC_LD_ACQ(&taskdata->td_incomplete_child_tasks) != 0) { flag.execute_tasks(thread, gtid, FALSE, &thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), @@ -1985,7 +1984,7 @@ kmp_int32 __kmpc_omp_taskyield(ident_t *loc_ref, kmp_int32 gtid, int end_part) { thread->th.ompt_thread_info.ompt_task_yielded = 1; #endif __kmp_execute_tasks_32( - thread, gtid, (kmp_flag_32<> *)NULL, FALSE, + thread, gtid, NULL, FALSE, &thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint); #if OMPT_SUPPORT @@ -2513,8 +2512,8 @@ void __kmpc_end_taskgroup(ident_t *loc, int gtid) { if (!taskdata->td_flags.team_serial || (thread->th.th_task_team != NULL && thread->th.th_task_team->tt.tt_found_proxy_tasks)) { - kmp_flag_32 flag( - RCAST(std::atomic *, &(taskgroup->count)), 0U); + kmp_flag_32 flag(RCAST(std::atomic *, &(taskgroup->count)), + 0U); while (KMP_ATOMIC_LD_ACQ(&taskgroup->count) != 0) { flag.execute_tasks(thread, gtid, FALSE, &thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), @@ -3022,9 +3021,8 @@ static inline int __kmp_execute_tasks_template( } } -template int __kmp_execute_tasks_32( - kmp_info_t *thread, kmp_int32 gtid, kmp_flag_32 *flag, int final_spin, + kmp_info_t *thread, kmp_int32 gtid, kmp_flag_32 *flag, int final_spin, int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj), kmp_int32 is_constrained) { return __kmp_execute_tasks_template( @@ -3032,9 +3030,8 @@ int __kmp_execute_tasks_32( thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained); } -template int __kmp_execute_tasks_64( - kmp_info_t *thread, kmp_int32 gtid, kmp_flag_64 *flag, int final_spin, + kmp_info_t *thread, kmp_int32 gtid, kmp_flag_64 *flag, int final_spin, int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj), kmp_int32 is_constrained) { return __kmp_execute_tasks_template( @@ -3051,23 +3048,6 @@ int __kmp_execute_tasks_oncore( thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained); } -template int -__kmp_execute_tasks_32(kmp_info_t *, kmp_int32, - kmp_flag_32 *, int, - int *USE_ITT_BUILD_ARG(void *), kmp_int32); - -template int __kmp_execute_tasks_64(kmp_info_t *, kmp_int32, - kmp_flag_64 *, - int, - int *USE_ITT_BUILD_ARG(void *), - kmp_int32); - -template int __kmp_execute_tasks_64(kmp_info_t *, kmp_int32, - kmp_flag_64 *, - int, - int *USE_ITT_BUILD_ARG(void *), - kmp_int32); - // __kmp_enable_tasking: Allocate task team and resume threads sleeping at the // next barrier so they can assist in executing enqueued tasks. // First thread in allocates the task team atomically. @@ -3617,10 +3597,9 @@ void __kmp_task_team_wait( // Worker threads may have dropped through to release phase, but could // still be executing tasks. Wait here for tasks to complete. To avoid // memory contention, only master thread checks termination condition. - kmp_flag_32 flag( - RCAST(std::atomic *, - &task_team->tt.tt_unfinished_threads), - 0U); + kmp_flag_32 flag(RCAST(std::atomic *, + &task_team->tt.tt_unfinished_threads), + 0U); flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj)); } // Deactivate the old task team, so that the worker threads will stop @@ -3642,7 +3621,7 @@ void __kmp_task_team_wait( } // __kmp_tasking_barrier: -// This routine is called only when __kmp_tasking_mode == tskm_extra_barrier. +// This routine may only called when __kmp_tasking_mode == tskm_extra_barrier. // Internal function to execute all tasks prior to a regular barrier or a join // barrier. It is a full barrier itself, which unfortunately turns regular // barriers into double barriers and join barriers into 1 1/2 barriers. @@ -3656,7 +3635,7 @@ void __kmp_tasking_barrier(kmp_team_t *team, kmp_info_t *thread, int gtid) { #if USE_ITT_BUILD KMP_FSYNC_SPIN_INIT(spin, NULL); #endif /* USE_ITT_BUILD */ - kmp_flag_32 spin_flag(spin, 0U); + kmp_flag_32 spin_flag(spin, 0U); while (!spin_flag.execute_tasks(thread, gtid, TRUE, &flag USE_ITT_BUILD_ARG(NULL), 0)) { #if USE_ITT_BUILD diff --git a/openmp/runtime/src/kmp_wait_release.cpp b/openmp/runtime/src/kmp_wait_release.cpp index cabb572..7d12c74 100644 --- a/openmp/runtime/src/kmp_wait_release.cpp +++ b/openmp/runtime/src/kmp_wait_release.cpp @@ -12,32 +12,14 @@ #include "kmp_wait_release.h" -void __kmp_wait_64(kmp_info_t *this_thr, kmp_flag_64<> *flag, +void __kmp_wait_64(kmp_info_t *this_thr, kmp_flag_64 *flag, int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) { if (final_spin) - __kmp_wait_template, TRUE>( + __kmp_wait_template( this_thr, flag USE_ITT_BUILD_ARG(itt_sync_obj)); else - __kmp_wait_template, FALSE>( + __kmp_wait_template( this_thr, flag USE_ITT_BUILD_ARG(itt_sync_obj)); } -void __kmp_release_64(kmp_flag_64<> *flag) { __kmp_release_template(flag); } - -#if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT -template -void __kmp_mwait_32(int th_gtid, kmp_flag_32 *flag) { - __kmp_mwait_template(th_gtid, flag); -} -template -void __kmp_mwait_64(int th_gtid, kmp_flag_64 *flag) { - __kmp_mwait_template(th_gtid, flag); -} -void __kmp_mwait_oncore(int th_gtid, kmp_flag_oncore *flag) { - __kmp_mwait_template(th_gtid, flag); -} - -template void __kmp_mwait_32(int, kmp_flag_32 *); -template void __kmp_mwait_64(int, kmp_flag_64 *); -template void __kmp_mwait_64(int, kmp_flag_64 *); -#endif +void __kmp_release_64(kmp_flag_64 *flag) { __kmp_release_template(flag); } diff --git a/openmp/runtime/src/kmp_wait_release.h b/openmp/runtime/src/kmp_wait_release.h index e28b25f..b235be3 100644 --- a/openmp/runtime/src/kmp_wait_release.h +++ b/openmp/runtime/src/kmp_wait_release.h @@ -42,26 +42,20 @@ enum flag_type { flag_oncore /**< special 64-bit flag for on-core barrier (hierarchical) */ }; -struct flag_properties { - unsigned int type : 16; - unsigned int reserved : 16; -}; - /*! * Base class for wait/release volatile flag */ template class kmp_flag_native { volatile P *loc; - flag_properties t; + flag_type t; public: typedef P flag_t; - kmp_flag_native(volatile P *p, flag_type ft) - : loc(p), t({(unsigned int)ft, 0U}) {} + kmp_flag_native(volatile P *p, flag_type ft) : loc(p), t(ft) {} volatile P *get() { return loc; } void *get_void_p() { return RCAST(void *, CCAST(P *, loc)); } void set(volatile P *new_loc) { loc = new_loc; } - flag_type get_type() { return (flag_type)(t.type); } + flag_type get_type() { return t; } P load() { return *loc; } void store(P val) { *loc = val; } }; @@ -73,12 +67,10 @@ template class kmp_flag { std::atomic

*loc; /**< Pointer to the flag storage that is modified by another thread */ - flag_properties t; /**< "Type" of the flag in loc */ - + flag_type t; /**< "Type" of the flag in loc */ public: typedef P flag_t; - kmp_flag(std::atomic

*p, flag_type ft) - : loc(p), t({(unsigned int)ft, 0U}) {} + kmp_flag(std::atomic

*p, flag_type ft) : loc(p), t(ft) {} /*! * @result the pointer to the actual flag */ @@ -94,7 +86,7 @@ public: /*! * @result the flag_type */ - flag_type get_type() { return (flag_type)(t.type); } + flag_type get_type() { return t; } /*! * @result flag value */ @@ -112,7 +104,6 @@ public: bool notdone_check(); P internal_release(); void suspend(int th_gtid); - void mwait(int th_gtid); void resume(int th_gtid); P set_sleeping(); P unset_sleeping(); @@ -169,8 +160,8 @@ static void __ompt_implicit_task_end(kmp_info_t *this_thr, to wake it back up to prevent deadlocks! NOTE: We may not belong to a team at this point. */ -template +template static inline bool __kmp_wait_template(kmp_info_t *this_thr, C *flag USE_ITT_BUILD_ARG(void *itt_sync_obj)) { @@ -194,7 +185,7 @@ __kmp_wait_template(kmp_info_t *this_thr, return false; } th_gtid = this_thr->th.th_info.ds.ds_gtid; - if (Cancellable) { + if (cancellable) { kmp_team_t *team = this_thr->th.th_team; if (team && team->t.t_cancel_request == cancel_parallel) return true; @@ -384,7 +375,7 @@ final_spin=FALSE) } #endif // Check if the barrier surrounding this wait loop has been cancelled - if (Cancellable) { + if (cancellable) { kmp_team_t *team = this_thr->th.th_team; if (team && team->t.t_cancel_request == cancel_parallel) break; @@ -409,31 +400,23 @@ final_spin=FALSE) #endif // Don't suspend if wait loop designated non-sleepable // in template parameters - if (!Sleepable) + if (!sleepable) continue; if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME && __kmp_pause_status != kmp_soft_paused) continue; -#if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT - if (__kmp_mwait_enabled || __kmp_umwait_enabled) { - KF_TRACE(50, ("__kmp_wait_sleep: T#%d using monitor/mwait\n", th_gtid)); - flag->mwait(th_gtid); - } else { -#endif - KF_TRACE(50, ("__kmp_wait_sleep: T#%d suspend time reached\n", th_gtid)); + KF_TRACE(50, ("__kmp_wait_sleep: T#%d suspend time reached\n", th_gtid)); + #if KMP_OS_UNIX - if (final_spin) - KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, false); + if (final_spin) + KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, false); #endif - flag->suspend(th_gtid); + flag->suspend(th_gtid); #if KMP_OS_UNIX - if (final_spin) - KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, true); -#endif -#if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT - } + if (final_spin) + KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, true); #endif if (TCR_4(__kmp_global.g.g_done)) { @@ -475,7 +458,7 @@ final_spin=FALSE) KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, false); #endif KMP_FSYNC_SPIN_ACQUIRED(CCAST(void *, spin)); - if (Cancellable) { + if (cancellable) { kmp_team_t *team = this_thr->th.th_team; if (team && team->t.t_cancel_request == cancel_parallel) { if (tasks_completed) { @@ -492,83 +475,6 @@ final_spin=FALSE) return false; } -#if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT -// Set up a monitor on the flag variable causing the calling thread to wait in -// a less active state until the flag variable is modified. -template -static inline void __kmp_mwait_template(int th_gtid, C *flag) { - KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_mwait); - kmp_info_t *th = __kmp_threads[th_gtid]; - - KF_TRACE(30, ("__kmp_mwait_template: T#%d enter for flag = %p\n", th_gtid, - flag->get())); - - // User-level mwait is available - KMP_DEBUG_ASSERT(__kmp_mwait_enabled || __kmp_umwait_enabled); - - __kmp_suspend_initialize_thread(th); - __kmp_lock_suspend_mx(th); - - volatile void *spin = flag->get(); - void *cacheline = (void *)(kmp_uint64(spin) & ~(CACHE_LINE - 1)); - - if (!flag->done_check()) { - // Mark thread as no longer active - th->th.th_active = FALSE; - if (th->th.th_active_in_pool) { - th->th.th_active_in_pool = FALSE; - KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth); - KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) >= 0); - } - flag->set_sleeping(); - KF_TRACE(50, ("__kmp_mwait_template: T#%d calling monitor\n", th_gtid)); -#if KMP_HAVE_UMWAIT - if (__kmp_umwait_enabled) { - __kmp_umonitor(cacheline); - } -#elif KMP_HAVE_MWAIT - if (__kmp_mwait_enabled) { - __kmp_mm_monitor(cacheline, 0, 0); - } -#endif - // To avoid a race, check flag between 'monitor' and 'mwait'. A write to - // the address could happen after the last time we checked and before - // monitoring started, in which case monitor can't detect the change. - if (flag->done_check()) - flag->unset_sleeping(); - else { - // if flag changes here, wake-up happens immediately - TCW_PTR(th->th.th_sleep_loc, (void *)flag); - __kmp_unlock_suspend_mx(th); - KF_TRACE(50, ("__kmp_mwait_template: T#%d calling mwait\n", th_gtid)); -#if KMP_HAVE_UMWAIT - if (__kmp_umwait_enabled) { - __kmp_umwait(1, 100); // to do: enable ctrl via hints, backoff counter - } -#elif KMP_HAVE_MWAIT - if (__kmp_mwait_enabled) { - __kmp_mm_mwait(0, __kmp_mwait_hints); - } -#endif - KF_TRACE(50, ("__kmp_mwait_template: T#%d mwait done\n", th_gtid)); - __kmp_lock_suspend_mx(th); - // Clean up sleep info; doesn't matter how/why this thread stopped waiting - if (flag->is_sleeping()) - flag->unset_sleeping(); - TCW_PTR(th->th.th_sleep_loc, NULL); - } - // Mark thread as active again - th->th.th_active = TRUE; - if (TCR_4(th->th.th_in_pool)) { - KMP_ATOMIC_INC(&__kmp_thread_pool_active_nth); - th->th.th_active_in_pool = TRUE; - } - } // Drop out to main wait loop to check flag, handle tasks, etc. - __kmp_unlock_suspend_mx(th); - KF_TRACE(30, ("__kmp_mwait_template: T#%d exit\n", th_gtid)); -} -#endif // KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT - /* Release any threads specified as waiting on the flag by releasing the flag and resume the waiting thread if indicated by the sleep bit(s). A thread that calls __kmp_wait_template must call this function to wake up the potentially @@ -639,7 +545,7 @@ template <> struct flag_traits { }; // Basic flag that does not use C11 Atomics -template +template class kmp_basic_flag_native : public kmp_flag_native { typedef flag_traits traits_type; FlagType checker; /**< Value to compare flag to to check if flag has been @@ -682,13 +588,7 @@ public: /*! * @result true if the flag object has been released. */ - bool done_check() { - if (Sleepable) - return (traits_type::tcr(*(this->get())) & ~KMP_BARRIER_SLEEP_STATE) == - checker; - else - return traits_type::tcr(*(this->get())) == checker; - } + bool done_check() { return traits_type::tcr(*(this->get())) == checker; } /*! * @param old_loc in old value of flag * @result true if the flag's old value indicates it was released. @@ -743,8 +643,7 @@ public: enum barrier_type get_bt() { return bs_last_barrier; } }; -template -class kmp_basic_flag : public kmp_flag { +template class kmp_basic_flag : public kmp_flag { typedef flag_traits traits_type; FlagType checker; /**< Value to compare flag to to check if flag has been released. */ @@ -786,12 +685,7 @@ public: /*! * @result true if the flag object has been released. */ - bool done_check() { - if (Sleepable) - return (this->load() & ~KMP_BARRIER_SLEEP_STATE) == checker; - else - return this->load() == checker; - } + bool done_check() { return this->load() == checker; } /*! * @param old_loc in old value of flag * @result true if the flag's old value indicates it was released. @@ -842,19 +736,14 @@ public: enum barrier_type get_bt() { return bs_last_barrier; } }; -template -class kmp_flag_32 : public kmp_basic_flag { +class kmp_flag_32 : public kmp_basic_flag { public: - kmp_flag_32(std::atomic *p) - : kmp_basic_flag(p) {} + kmp_flag_32(std::atomic *p) : kmp_basic_flag(p) {} kmp_flag_32(std::atomic *p, kmp_info_t *thr) - : kmp_basic_flag(p, thr) {} + : kmp_basic_flag(p, thr) {} kmp_flag_32(std::atomic *p, kmp_uint32 c) - : kmp_basic_flag(p, c) {} + : kmp_basic_flag(p, c) {} void suspend(int th_gtid) { __kmp_suspend_32(th_gtid, this); } -#if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT - void mwait(int th_gtid) { __kmp_mwait_32(th_gtid, this); } -#endif void resume(int th_gtid) { __kmp_resume_32(th_gtid, this); } int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin, int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj), @@ -863,32 +752,27 @@ public: this_thr, gtid, this, final_spin, thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained); } - bool wait(kmp_info_t *this_thr, + void wait(kmp_info_t *this_thr, int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) { if (final_spin) - return __kmp_wait_template( + __kmp_wait_template( this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj)); else - return __kmp_wait_template( + __kmp_wait_template( this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj)); } void release() { __kmp_release_template(this); } flag_type get_ptr_type() { return flag32; } }; -template -class kmp_flag_64 : public kmp_basic_flag_native { +class kmp_flag_64 : public kmp_basic_flag_native { public: - kmp_flag_64(volatile kmp_uint64 *p) - : kmp_basic_flag_native(p) {} + kmp_flag_64(volatile kmp_uint64 *p) : kmp_basic_flag_native(p) {} kmp_flag_64(volatile kmp_uint64 *p, kmp_info_t *thr) - : kmp_basic_flag_native(p, thr) {} + : kmp_basic_flag_native(p, thr) {} kmp_flag_64(volatile kmp_uint64 *p, kmp_uint64 c) - : kmp_basic_flag_native(p, c) {} + : kmp_basic_flag_native(p, c) {} void suspend(int th_gtid) { __kmp_suspend_64(th_gtid, this); } -#if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT - void mwait(int th_gtid) { __kmp_mwait_64(th_gtid, this); } -#endif void resume(int th_gtid) { __kmp_resume_64(th_gtid, this); } int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin, int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj), @@ -897,15 +781,27 @@ public: this_thr, gtid, this, final_spin, thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained); } - bool wait(kmp_info_t *this_thr, + void wait(kmp_info_t *this_thr, int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) { if (final_spin) - return __kmp_wait_template( + __kmp_wait_template( this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj)); else - return __kmp_wait_template( + __kmp_wait_template( this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj)); } + bool wait_cancellable_nosleep(kmp_info_t *this_thr, + int final_spin + USE_ITT_BUILD_ARG(void *itt_sync_obj)) { + bool retval = false; + if (final_spin) + retval = __kmp_wait_template( + this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj)); + else + retval = __kmp_wait_template( + this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj)); + return retval; + } void release() { __kmp_release_template(this); } flag_type get_ptr_type() { return flag64; } }; @@ -963,8 +859,8 @@ public: return true; else if (flag_switch) { this_thr->th.th_bar[bt].bb.wait_flag = KMP_BARRIER_SWITCHING; - kmp_flag_64<> flag(&this_thr->th.th_bar[bt].bb.b_go, - (kmp_uint64)KMP_BARRIER_STATE_BUMP); + kmp_flag_64 flag(&this_thr->th.th_bar[bt].bb.b_go, + (kmp_uint64)KMP_BARRIER_STATE_BUMP); __kmp_wait_64(this_thr, &flag, TRUE USE_ITT_BUILD_ARG(itt_sync_obj)); } return false; @@ -1000,9 +896,6 @@ public: } void release() { __kmp_release_template(this); } void suspend(int th_gtid) { __kmp_suspend_oncore(th_gtid, this); } -#if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT - void mwait(int th_gtid) { __kmp_mwait_oncore(th_gtid, this); } -#endif void resume(int th_gtid) { __kmp_resume_oncore(th_gtid, this); } int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin, int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj), @@ -1022,15 +915,15 @@ static inline void __kmp_null_resume_wrapper(int gtid, volatile void *flag) { if (!flag) return; - switch (RCAST(kmp_flag_64<> *, CCAST(void *, flag))->get_type()) { + switch (RCAST(kmp_flag_64 *, CCAST(void *, flag))->get_type()) { case flag32: - __kmp_resume_32(gtid, (kmp_flag_32<> *)NULL); + __kmp_resume_32(gtid, NULL); break; case flag64: - __kmp_resume_64(gtid, (kmp_flag_64<> *)NULL); + __kmp_resume_64(gtid, NULL); break; case flag_oncore: - __kmp_resume_oncore(gtid, (kmp_flag_oncore *)NULL); + __kmp_resume_oncore(gtid, NULL); break; } } diff --git a/openmp/runtime/src/z_Linux_util.cpp b/openmp/runtime/src/z_Linux_util.cpp index 3706585..58cc4d2 100644 --- a/openmp/runtime/src/z_Linux_util.cpp +++ b/openmp/runtime/src/z_Linux_util.cpp @@ -1459,7 +1459,8 @@ static inline void __kmp_suspend_template(int th_gtid, C *flag) { __kmp_suspend_initialize_thread(th); - __kmp_lock_suspend_mx(th); + status = pthread_mutex_lock(&th->th.th_suspend_mx.m_mutex); + KMP_CHECK_SYSFAIL("pthread_mutex_lock", status); KF_TRACE(10, ("__kmp_suspend_template: T#%d setting sleep bit for spin(%p)\n", th_gtid, flag->get())); @@ -1470,7 +1471,8 @@ static inline void __kmp_suspend_template(int th_gtid, C *flag) { if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME && __kmp_pause_status != kmp_soft_paused) { flag->unset_sleeping(); - __kmp_unlock_suspend_mx(th); + status = pthread_mutex_unlock(&th->th.th_suspend_mx.m_mutex); + KMP_CHECK_SYSFAIL("pthread_mutex_unlock", status); return; } KF_TRACE(5, ("__kmp_suspend_template: T#%d set sleep bit for spin(%p)==%x," @@ -1533,7 +1535,7 @@ static inline void __kmp_suspend_template(int th_gtid, C *flag) { th_gtid)); status = pthread_cond_wait(&th->th.th_suspend_cv.c_cond, &th->th.th_suspend_mx.m_mutex); -#endif // USE_SUSPEND_TIMEOUT +#endif if ((status != 0) && (status != EINTR) && (status != ETIMEDOUT)) { KMP_SYSFAIL("pthread_cond_wait", status); @@ -1573,26 +1575,21 @@ static inline void __kmp_suspend_template(int th_gtid, C *flag) { } #endif - __kmp_unlock_suspend_mx(th); + status = pthread_mutex_unlock(&th->th.th_suspend_mx.m_mutex); + KMP_CHECK_SYSFAIL("pthread_mutex_unlock", status); KF_TRACE(30, ("__kmp_suspend_template: T#%d exit\n", th_gtid)); } -template -void __kmp_suspend_32(int th_gtid, kmp_flag_32 *flag) { +void __kmp_suspend_32(int th_gtid, kmp_flag_32 *flag) { __kmp_suspend_template(th_gtid, flag); } -template -void __kmp_suspend_64(int th_gtid, kmp_flag_64 *flag) { +void __kmp_suspend_64(int th_gtid, kmp_flag_64 *flag) { __kmp_suspend_template(th_gtid, flag); } void __kmp_suspend_oncore(int th_gtid, kmp_flag_oncore *flag) { __kmp_suspend_template(th_gtid, flag); } -template void __kmp_suspend_32(int, kmp_flag_32 *); -template void __kmp_suspend_64(int, kmp_flag_64 *); -template void __kmp_suspend_64(int, kmp_flag_64 *); - /* This routine signals the thread specified by target_gtid to wake up after setting the sleep bit indicated by the flag argument to FALSE. The target thread must already have called __kmp_suspend_template() */ @@ -1611,7 +1608,9 @@ static inline void __kmp_resume_template(int target_gtid, C *flag) { KMP_DEBUG_ASSERT(gtid != target_gtid); __kmp_suspend_initialize_thread(th); - __kmp_lock_suspend_mx(th); + + status = pthread_mutex_lock(&th->th.th_suspend_mx.m_mutex); + KMP_CHECK_SYSFAIL("pthread_mutex_lock", status); if (!flag) { // coming from __kmp_null_resume_wrapper flag = (C *)CCAST(void *, th->th.th_sleep_loc); @@ -1620,11 +1619,13 @@ static inline void __kmp_resume_template(int target_gtid, C *flag) { // First, check if the flag is null or its type has changed. If so, someone // else woke it up. if (!flag || flag->get_type() != flag->get_ptr_type()) { // get_ptr_type - // simply shows what flag was cast to + // simply shows what + // flag was cast to KF_TRACE(5, ("__kmp_resume_template: T#%d exiting, thread T#%d already " "awake: flag(%p)\n", gtid, target_gtid, NULL)); - __kmp_unlock_suspend_mx(th); + status = pthread_mutex_unlock(&th->th.th_suspend_mx.m_mutex); + KMP_CHECK_SYSFAIL("pthread_mutex_unlock", status); return; } else { // if multiple threads are sleeping, flag should be internally // referring to a specific thread here @@ -1634,7 +1635,8 @@ static inline void __kmp_resume_template(int target_gtid, C *flag) { "awake: flag(%p): " "%u => %u\n", gtid, target_gtid, flag->get(), old_spin, flag->load())); - __kmp_unlock_suspend_mx(th); + status = pthread_mutex_unlock(&th->th.th_suspend_mx.m_mutex); + KMP_CHECK_SYSFAIL("pthread_mutex_unlock", status); return; } KF_TRACE(5, ("__kmp_resume_template: T#%d about to wakeup T#%d, reset " @@ -1654,27 +1656,23 @@ static inline void __kmp_resume_template(int target_gtid, C *flag) { #endif status = pthread_cond_signal(&th->th.th_suspend_cv.c_cond); KMP_CHECK_SYSFAIL("pthread_cond_signal", status); - __kmp_unlock_suspend_mx(th); + status = pthread_mutex_unlock(&th->th.th_suspend_mx.m_mutex); + KMP_CHECK_SYSFAIL("pthread_mutex_unlock", status); KF_TRACE(30, ("__kmp_resume_template: T#%d exiting after signaling wake up" " for T#%d\n", gtid, target_gtid)); } -template -void __kmp_resume_32(int target_gtid, kmp_flag_32 *flag) { +void __kmp_resume_32(int target_gtid, kmp_flag_32 *flag) { __kmp_resume_template(target_gtid, flag); } -template -void __kmp_resume_64(int target_gtid, kmp_flag_64 *flag) { +void __kmp_resume_64(int target_gtid, kmp_flag_64 *flag) { __kmp_resume_template(target_gtid, flag); } void __kmp_resume_oncore(int target_gtid, kmp_flag_oncore *flag) { __kmp_resume_template(target_gtid, flag); } -template void __kmp_resume_32(int, kmp_flag_32 *); -template void __kmp_resume_64(int, kmp_flag_64 *); - #if KMP_USE_MONITOR void __kmp_resume_monitor() { KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_resume); diff --git a/openmp/runtime/src/z_Windows_NT_util.cpp b/openmp/runtime/src/z_Windows_NT_util.cpp index af231e2..f463ef6 100644 --- a/openmp/runtime/src/z_Windows_NT_util.cpp +++ b/openmp/runtime/src/z_Windows_NT_util.cpp @@ -363,7 +363,7 @@ static inline void __kmp_suspend_template(int th_gtid, C *flag) { th_gtid, flag->get())); __kmp_suspend_initialize_thread(th); - __kmp_lock_suspend_mx(th); + __kmp_win32_mutex_lock(&th->th.th_suspend_mx); KF_TRACE(10, ("__kmp_suspend_template: T#%d setting sleep bit for flag's" " loc(%p)\n", @@ -375,7 +375,7 @@ static inline void __kmp_suspend_template(int th_gtid, C *flag) { if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME && __kmp_pause_status != kmp_soft_paused) { flag->unset_sleeping(); - __kmp_unlock_suspend_mx(th); + __kmp_win32_mutex_unlock(&th->th.th_suspend_mx); return; } @@ -437,26 +437,21 @@ static inline void __kmp_suspend_template(int th_gtid, C *flag) { } } - __kmp_unlock_suspend_mx(th); + __kmp_win32_mutex_unlock(&th->th.th_suspend_mx); + KF_TRACE(30, ("__kmp_suspend_template: T#%d exit\n", th_gtid)); } -template -void __kmp_suspend_32(int th_gtid, kmp_flag_32 *flag) { +void __kmp_suspend_32(int th_gtid, kmp_flag_32 *flag) { __kmp_suspend_template(th_gtid, flag); } -template -void __kmp_suspend_64(int th_gtid, kmp_flag_64 *flag) { +void __kmp_suspend_64(int th_gtid, kmp_flag_64 *flag) { __kmp_suspend_template(th_gtid, flag); } void __kmp_suspend_oncore(int th_gtid, kmp_flag_oncore *flag) { __kmp_suspend_template(th_gtid, flag); } -template void __kmp_suspend_32(int, kmp_flag_32 *); -template void __kmp_suspend_64(int, kmp_flag_64 *); -template void __kmp_suspend_64(int, kmp_flag_64 *); - /* This routine signals the thread specified by target_gtid to wake up after setting the sleep bit indicated by the flag argument to FALSE */ template @@ -472,7 +467,7 @@ static inline void __kmp_resume_template(int target_gtid, C *flag) { gtid, target_gtid)); __kmp_suspend_initialize_thread(th); - __kmp_lock_suspend_mx(th); + __kmp_win32_mutex_lock(&th->th.th_suspend_mx); if (!flag) { // coming from __kmp_null_resume_wrapper flag = (C *)th->th.th_sleep_loc; @@ -486,7 +481,7 @@ static inline void __kmp_resume_template(int target_gtid, C *flag) { KF_TRACE(5, ("__kmp_resume_template: T#%d exiting, thread T#%d already " "awake: flag's loc(%p)\n", gtid, target_gtid, NULL)); - __kmp_unlock_suspend_mx(th); + __kmp_win32_mutex_unlock(&th->th.th_suspend_mx); return; } else { typename C::flag_t old_spin = flag->unset_sleeping(); @@ -494,7 +489,7 @@ static inline void __kmp_resume_template(int target_gtid, C *flag) { KF_TRACE(5, ("__kmp_resume_template: T#%d exiting, thread T#%d already " "awake: flag's loc(%p): %u => %u\n", gtid, target_gtid, flag->get(), old_spin, *(flag->get()))); - __kmp_unlock_suspend_mx(th); + __kmp_win32_mutex_unlock(&th->th.th_suspend_mx); return; } } @@ -504,28 +499,23 @@ static inline void __kmp_resume_template(int target_gtid, C *flag) { gtid, target_gtid, flag->get())); __kmp_win32_cond_signal(&th->th.th_suspend_cv); - __kmp_unlock_suspend_mx(th); + __kmp_win32_mutex_unlock(&th->th.th_suspend_mx); KF_TRACE(30, ("__kmp_resume_template: T#%d exiting after signaling wake up" " for T#%d\n", gtid, target_gtid)); } -template -void __kmp_resume_32(int target_gtid, kmp_flag_32 *flag) { +void __kmp_resume_32(int target_gtid, kmp_flag_32 *flag) { __kmp_resume_template(target_gtid, flag); } -template -void __kmp_resume_64(int target_gtid, kmp_flag_64 *flag) { +void __kmp_resume_64(int target_gtid, kmp_flag_64 *flag) { __kmp_resume_template(target_gtid, flag); } void __kmp_resume_oncore(int target_gtid, kmp_flag_oncore *flag) { __kmp_resume_template(target_gtid, flag); } -template void __kmp_resume_32(int, kmp_flag_32 *); -template void __kmp_resume_64(int, kmp_flag_64 *); - void __kmp_yield() { Sleep(0); } void __kmp_gtid_set_specific(int gtid) { -- 2.7.4