[OpenMP] Make use of sched_yield optional in runtime

author Jonathan Peyton <jonathan.l.peyton@intel.com>

Thu, 28 Feb 2019 19:11:29 +0000 (19:11 +0000)

committer Jonathan Peyton <jonathan.l.peyton@intel.com>

Thu, 28 Feb 2019 19:11:29 +0000 (19:11 +0000)
author Jonathan Peyton <jonathan.l.peyton@intel.com>
Thu, 28 Feb 2019 19:11:29 +0000 (19:11 +0000)
committer Jonathan Peyton <jonathan.l.peyton@intel.com>
Thu, 28 Feb 2019 19:11:29 +0000 (19:11 +0000)
diff --git a/openmp/runtime/src/dllexports b/openmp/runtime/src/dllexports

index b03cbae..c0b0b60 100644 (file)
--- a/openmp/runtime/src/dllexports
+++ b/openmp/runtime/src/dllexports
@@ -158,7 +158,7 @@
      #
  
      # Regular entry points
-        __kmp_wait_yield_4
+        __kmp_wait_4
          __kmp_fork_call
          __kmp_invoke_microtask
      %ifdef KMP_USE_MONITOR
diff --git a/openmp/runtime/src/exports_so.txt b/openmp/runtime/src/exports_so.txt

index e23fa0c..4926697 100644 (file)
--- a/openmp/runtime/src/exports_so.txt
+++ b/openmp/runtime/src/exports_so.txt
@@ -83,7 +83,7 @@ VERSION {
          __kmp_reap_worker;
          __kmp_release_64;
          __kmp_wait_64;
-        __kmp_wait_yield_4;
+        __kmp_wait_4;
  
          # ittnotify symbols to be used by debugger
          __kmp_itt_fini_ittlib;
diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h

index 4dd6cf0..5125eca 100644 (file)
--- a/openmp/runtime/src/kmp.h
+++ b/openmp/runtime/src/kmp.h
@@ -981,10 +981,6 @@ extern kmp_uint64 __kmp_now_nsec();
    (KMP_BLOCKTIME(team, tid) * KMP_USEC_PER_SEC)
  #define KMP_BLOCKING(goal, count) ((count) % 1000 != 0 || (goal) > KMP_NOW())
  #endif
-#define KMP_YIELD_NOW()                                                        \
-  (KMP_NOW_MSEC() / KMP_MAX(__kmp_dflt_blocktime, 1) %                         \
-       (__kmp_yield_on_count + __kmp_yield_off_count) <                        \
-   (kmp_uint32)__kmp_yield_on_count)
  #endif // KMP_USE_MONITOR
  
  #define KMP_MIN_STATSCOLS 40
@@ -999,14 +995,6 @@ extern kmp_uint64 __kmp_now_nsec();
  #define KMP_MAX_CHUNK (INT_MAX - 1)
  #define KMP_DEFAULT_CHUNK 1
  
-#define KMP_MIN_INIT_WAIT 1
-#define KMP_MAX_INIT_WAIT (INT_MAX / 2)
-#define KMP_DEFAULT_INIT_WAIT 2048U
-
-#define KMP_MIN_NEXT_WAIT 1
-#define KMP_MAX_NEXT_WAIT (INT_MAX / 2)
-#define KMP_DEFAULT_NEXT_WAIT 1024U
-
  #define KMP_DFLT_DISP_NUM_BUFF 7
  #define KMP_MAX_ORDERED 8
  
@@ -1090,7 +1078,7 @@ extern void __kmp_x86_cpuid(int mode, int mode2, struct kmp_cpuid *p);
  extern void __kmp_x86_pause(void);
  #elif KMP_MIC
  // Performance testing on KNC (C0QS-7120 P/A/X/D, 61-core, 16 GB Memory) showed
-// regression after removal of extra PAUSE from KMP_YIELD_SPIN(). Changing
+// regression after removal of extra PAUSE from spin loops. Changing
  // the delay from 100 to 300 showed even better performance than double PAUSE
  // on Spec OMP2001 and LCPC tasking tests, no regressions on EPCC.
  static inline void __kmp_x86_pause(void) { _mm_delay_32(300); }
@@ -1115,31 +1103,54 @@ static inline void __kmp_x86_pause(void) { _mm_pause(); }
  #define KMP_INIT_YIELD(count)                                                  \
    { (count) = __kmp_yield_init; }
  
+#define KMP_OVERSUBSCRIBED                                                     \
+  (TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc))
+
+#define KMP_TRY_YIELD                                                          \
+  ((__kmp_use_yield == 1) || (__kmp_use_yield == 2 && (KMP_OVERSUBSCRIBED)))
+
+#define KMP_TRY_YIELD_OVERSUB                                                  \
+  ((__kmp_use_yield == 1 || __kmp_use_yield == 2) && (KMP_OVERSUBSCRIBED))
+
  #define KMP_YIELD(cond)                                                        \
    {                                                                            \
      KMP_CPU_PAUSE();                                                           \
-    __kmp_yield((cond));                                                       \
+    if ((cond) && (KMP_TRY_YIELD))                                             \
+      __kmp_yield();                                                           \
+  }
+
+#define KMP_YIELD_OVERSUB()                                                    \
+  {                                                                            \
+    KMP_CPU_PAUSE();                                                           \
+    if ((KMP_TRY_YIELD_OVERSUB))                                               \
+      __kmp_yield();                                                           \
    }
  
  // Note the decrement of 2 in the following Macros. With KMP_LIBRARY=turnaround,
  // there should be no yielding since initial value from KMP_INIT_YIELD() is odd.
-
-#define KMP_YIELD_WHEN(cond, count)                                            \
+#define KMP_YIELD_SPIN(count)                                                  \
    {                                                                            \
      KMP_CPU_PAUSE();                                                           \
-    (count) -= 2;                                                              \
-    if (!(count)) {                                                            \
-      __kmp_yield(cond);                                                       \
-      (count) = __kmp_yield_next;                                              \
+    if (KMP_TRY_YIELD) {                                                       \
+      (count) -= 2;                                                            \
+      if (!(count)) {                                                          \
+        __kmp_yield();                                                         \
+        (count) = __kmp_yield_next;                                            \
+      }                                                                        \
      }                                                                          \
    }
-#define KMP_YIELD_SPIN(count)                                                  \
+
+#define KMP_YIELD_OVERSUB_ELSE_SPIN(count)                                     \
    {                                                                            \
      KMP_CPU_PAUSE();                                                           \
-    (count) -= 2;                                                              \
-    if (!(count)) {                                                            \
-      __kmp_yield(1);                                                          \
-      (count) = __kmp_yield_next;                                              \
+    if ((KMP_TRY_YIELD_OVERSUB))                                               \
+      __kmp_yield();                                                           \
+    else if (__kmp_use_yield == 1) {                                           \
+      (count) -= 2;                                                            \
+      if (!(count)) {                                                          \
+        __kmp_yield();                                                         \
+        (count) = __kmp_yield_next;                                            \
+      }                                                                        \
      }                                                                          \
    }
  
@@ -2945,10 +2956,6 @@ extern kmp_lock_t __kmp_global_lock; /* control OS/global access  */
  extern kmp_queuing_lock_t __kmp_dispatch_lock; /* control dispatch access  */
  extern kmp_lock_t __kmp_debug_lock; /* control I/O access for KMP_DEBUG */
  
-/* used for yielding spin-waits */
-extern unsigned int __kmp_init_wait; /* initial number of spin-tests   */
-extern unsigned int __kmp_next_wait; /* susequent number of spin-tests */
-
  extern enum library_type __kmp_library;
  
  extern enum sched_type __kmp_sched; /* default runtime scheduling */
@@ -2977,16 +2984,11 @@ extern int __kmp_reserve_warn; /* have we issued reserve_threads warning? */
  extern int __kmp_suspend_count; /* count inside __kmp_suspend_template() */
  #endif
  
+extern kmp_int32 __kmp_use_yield;
+extern kmp_int32 __kmp_use_yield_exp_set;
  extern kmp_uint32 __kmp_yield_init;
  extern kmp_uint32 __kmp_yield_next;
  
-#if KMP_USE_MONITOR
-extern kmp_uint32 __kmp_yielding_on;
-#endif
-extern kmp_uint32 __kmp_yield_cycle;
-extern kmp_int32 __kmp_yield_on_count;
-extern kmp_int32 __kmp_yield_off_count;
-
  /* ------------------------------------------------------------------------- */
  extern int __kmp_allThreadsSpecified;
  
@@ -3309,7 +3311,7 @@ extern void __kmp_push_num_teams(ident_t *loc, int gtid, int num_teams,
                                   int num_threads);
  #endif
  
-extern void __kmp_yield(int cond);
+extern void __kmp_yield();
  
  extern void __kmpc_dispatch_init_4(ident_t *loc, kmp_int32 gtid,
                                     enum sched_type schedule, kmp_int32 lb,
@@ -3374,13 +3376,11 @@ extern kmp_uint32 __kmp_neq_4(kmp_uint32 value, kmp_uint32 checker);
  extern kmp_uint32 __kmp_lt_4(kmp_uint32 value, kmp_uint32 checker);
  extern kmp_uint32 __kmp_ge_4(kmp_uint32 value, kmp_uint32 checker);
  extern kmp_uint32 __kmp_le_4(kmp_uint32 value, kmp_uint32 checker);
-extern kmp_uint32 __kmp_wait_yield_4(kmp_uint32 volatile *spinner,
-                                     kmp_uint32 checker,
-                                     kmp_uint32 (*pred)(kmp_uint32, kmp_uint32),
-                                     void *obj);
-extern void __kmp_wait_yield_4_ptr(void *spinner, kmp_uint32 checker,
-                                   kmp_uint32 (*pred)(void *, kmp_uint32),
-                                   void *obj);
+extern kmp_uint32 __kmp_wait_4(kmp_uint32 volatile *spinner, kmp_uint32 checker,
+                               kmp_uint32 (*pred)(kmp_uint32, kmp_uint32),
+                               void *obj);
+extern void __kmp_wait_4_ptr(void *spinner, kmp_uint32 checker,
+                             kmp_uint32 (*pred)(void *, kmp_uint32), void *obj);
  
  class kmp_flag_32;
  class kmp_flag_64;
diff --git a/openmp/runtime/src/kmp_csupport.cpp b/openmp/runtime/src/kmp_csupport.cpp

index 455cbe2..592a266 100644 (file)
--- a/openmp/runtime/src/kmp_csupport.cpp
+++ b/openmp/runtime/src/kmp_csupport.cpp
@@ -683,7 +683,7 @@ void __kmpc_flush(ident_t *loc) {
    // }
    // and adding the yield here is good for at least a 10x speedup
    // when running >2 threads per core (on the NAS LU benchmark).
-  __kmp_yield(TRUE);
+  __kmp_yield();
  #endif
  #else
  #error Unknown or unsupported architecture
@@ -993,24 +993,18 @@ __kmp_init_indirect_csptr(kmp_critical_name *crit, ident_t const *loc,
        kmp_uint32 spins;                                                        \
        KMP_FSYNC_PREPARE(l);                                                    \
        KMP_INIT_YIELD(spins);                                                   \
-      if (TCR_4(__kmp_nth) >                                                   \
-          (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) {               \
-        KMP_YIELD(TRUE);                                                       \
-      } else {                                                                 \
-        KMP_YIELD_SPIN(spins);                                                 \
-      }                                                                        \
        kmp_backoff_t backoff = __kmp_spin_backoff_params;                       \
-      while (                                                                  \
-          KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free ||                        \
-          !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) {  \
-        __kmp_spin_backoff(&backoff);                                          \
+      do {                                                                     \
          if (TCR_4(__kmp_nth) >                                                 \
              (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) {             \
            KMP_YIELD(TRUE);                                                     \
          } else {                                                               \
            KMP_YIELD_SPIN(spins);                                               \
          }                                                                      \
-      }                                                                        \
+        __kmp_spin_backoff(&backoff);                                          \
+      } while (                                                                \
+          KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free ||                        \
+          !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy));   \
      }                                                                          \
      KMP_FSYNC_ACQUIRED(l);                                                     \
    }
@@ -1096,8 +1090,7 @@ __kmp_init_indirect_csptr(kmp_critical_name *crit, ident_t const *loc,
                KMP_LOCK_BUSY(1, futex), NULL, NULL, 0);                         \
      }                                                                          \
      KMP_MB();                                                                  \
-    KMP_YIELD(TCR_4(__kmp_nth) >                                               \
-              (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc));            \
+    KMP_YIELD_OVERSUB();                                                       \
    }
  
  #endif // KMP_USE_FUTEX
@@ -3976,8 +3969,8 @@ void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims,
    // __kmp_dispatch_num_buffers)
    if (idx != sh_buf->doacross_buf_idx) {
      // Shared buffer is occupied, wait for it to be free
-    __kmp_wait_yield_4((volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx,
-                       __kmp_eq_4, NULL);
+    __kmp_wait_4((volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx,
+                 __kmp_eq_4, NULL);
    }
  #if KMP_32_BIT_ARCH
    // Check if we are the first thread. After the CAS the first thread gets 0,
diff --git a/openmp/runtime/src/kmp_dispatch.cpp b/openmp/runtime/src/kmp_dispatch.cpp

index 872b96d..564e73b 100644 (file)
--- a/openmp/runtime/src/kmp_dispatch.cpp
+++ b/openmp/runtime/src/kmp_dispatch.cpp
@@ -858,9 +858,9 @@ __kmp_dispatch_init(ident_t *loc, int gtid, enum sched_type schedule, T lb,
      KD_TRACE(100, ("__kmp_dispatch_init: T#%d before wait: my_buffer_index:%d "
                     "sh->buffer_index:%d\n",
                     gtid, my_buffer_index, sh->buffer_index));
-    __kmp_wait_yield<kmp_uint32>(&sh->buffer_index, my_buffer_index,
-                                 __kmp_eq<kmp_uint32> USE_ITT_BUILD_ARG(NULL));
-    // Note: KMP_WAIT_YIELD() cannot be used there: buffer index and
+    __kmp_wait<kmp_uint32>(&sh->buffer_index, my_buffer_index,
+                           __kmp_eq<kmp_uint32> USE_ITT_BUILD_ARG(NULL));
+    // Note: KMP_WAIT() cannot be used there: buffer index and
      // my_buffer_index are *always* 32-bit integers.
      KMP_MB(); /* is this necessary? */
      KD_TRACE(100, ("__kmp_dispatch_init: T#%d after wait: my_buffer_index:%d "
@@ -1004,8 +1004,8 @@ static void __kmp_dispatch_finish(int gtid, ident_t *loc) {
        }
  #endif
  
-      __kmp_wait_yield<UT>(&sh->u.s.ordered_iteration, lower,
-                           __kmp_ge<UT> USE_ITT_BUILD_ARG(NULL));
+      __kmp_wait<UT>(&sh->u.s.ordered_iteration, lower,
+                     __kmp_ge<UT> USE_ITT_BUILD_ARG(NULL));
        KMP_MB(); /* is this necessary? */
  #ifdef KMP_DEBUG
        {
@@ -1073,8 +1073,8 @@ static void __kmp_dispatch_finish_chunk(int gtid, ident_t *loc) {
        }
  #endif
  
-      __kmp_wait_yield<UT>(&sh->u.s.ordered_iteration, lower,
-                           __kmp_ge<UT> USE_ITT_BUILD_ARG(NULL));
+      __kmp_wait<UT>(&sh->u.s.ordered_iteration, lower,
+                     __kmp_ge<UT> USE_ITT_BUILD_ARG(NULL));
  
        KMP_MB(); /* is this necessary? */
        KD_TRACE(1000, ("__kmp_dispatch_finish_chunk: T#%d resetting "
@@ -2489,10 +2489,10 @@ kmp_uint32 __kmp_le_4(kmp_uint32 value, kmp_uint32 checker) {
  }
  
  kmp_uint32
-__kmp_wait_yield_4(volatile kmp_uint32 *spinner, kmp_uint32 checker,
-                   kmp_uint32 (*pred)(kmp_uint32, kmp_uint32),
-                   void *obj // Higher-level synchronization object, or NULL.
-                   ) {
+__kmp_wait_4(volatile kmp_uint32 *spinner, kmp_uint32 checker,
+             kmp_uint32 (*pred)(kmp_uint32, kmp_uint32),
+             void *obj // Higher-level synchronization object, or NULL.
+             ) {
    // note: we may not belong to a team at this point
    volatile kmp_uint32 *spin = spinner;
    kmp_uint32 check = checker;
@@ -2509,20 +2509,16 @@ __kmp_wait_yield_4(volatile kmp_uint32 *spinner, kmp_uint32 checker,
         split. It causes problems with infinite recursion because of exit lock */
      /* if ( TCR_4(__kmp_global.g.g_done) && __kmp_global.g.g_abort)
          __kmp_abort_thread(); */
-
-    /* if we have waited a bit, or are oversubscribed, yield */
-    /* pause is in the following code */
-    KMP_YIELD(TCR_4(__kmp_nth) > __kmp_avail_proc);
-    KMP_YIELD_SPIN(spins);
+    KMP_YIELD_OVERSUB_ELSE_SPIN(spins);
    }
    KMP_FSYNC_SPIN_ACQUIRED(obj);
    return r;
  }
  
-void __kmp_wait_yield_4_ptr(
-    void *spinner, kmp_uint32 checker, kmp_uint32 (*pred)(void *, kmp_uint32),
-    void *obj // Higher-level synchronization object, or NULL.
-    ) {
+void __kmp_wait_4_ptr(void *spinner, kmp_uint32 checker,
+                      kmp_uint32 (*pred)(void *, kmp_uint32),
+                      void *obj // Higher-level synchronization object, or NULL.
+                      ) {
    // note: we may not belong to a team at this point
    void *spin = spinner;
    kmp_uint32 check = checker;
@@ -2534,10 +2530,9 @@ void __kmp_wait_yield_4_ptr(
    // main wait spin loop
    while (!f(spin, check)) {
      KMP_FSYNC_SPIN_PREPARE(obj);
-    /* if we have waited a bit, or are oversubscribed, yield */
+    /* if we have waited a bit, or are noversubscribed, yield */
      /* pause is in the following code */
-    KMP_YIELD(TCR_4(__kmp_nth) > __kmp_avail_proc);
-    KMP_YIELD_SPIN(spins);
+    KMP_YIELD_OVERSUB_ELSE_SPIN(spins);
    }
    KMP_FSYNC_SPIN_ACQUIRED(obj);
  }
diff --git a/openmp/runtime/src/kmp_dispatch.h b/openmp/runtime/src/kmp_dispatch.h

index 84603d1..98979fa 100644 (file)
--- a/openmp/runtime/src/kmp_dispatch.h
+++ b/openmp/runtime/src/kmp_dispatch.h
@@ -269,7 +269,7 @@ template <typename T> kmp_uint32 __kmp_eq(T value, T checker) {
  }
  
  /*
-    Spin wait loop that first does pause, then yield.
+    Spin wait loop that pauses between checks.
      Waits until function returns non-zero when called with *spinner and check.
      Does NOT put threads to sleep.
      Arguments:
@@ -282,15 +282,14 @@ template <typename T> kmp_uint32 __kmp_eq(T value, T checker) {
          is used to report locks consistently. For example, if lock is acquired
          immediately, its address is reported to ittnotify via
          KMP_FSYNC_ACQUIRED(). However, it lock cannot be acquired immediately
-        and lock routine calls to KMP_WAIT_YIELD(), the later should report the
+        and lock routine calls to KMP_WAIT(), the later should report the
          same address, not an address of low-level spinner.
  #endif // USE_ITT_BUILD
      TODO: make inline function (move to header file for icl)
  */
  template <typename UT>
-static UT __kmp_wait_yield(volatile UT *spinner, UT checker,
-                           kmp_uint32 (*pred)(UT, UT)
-                               USE_ITT_BUILD_ARG(void *obj)) {
+static UT __kmp_wait(volatile UT *spinner, UT checker,
+                     kmp_uint32 (*pred)(UT, UT) USE_ITT_BUILD_ARG(void *obj)) {
    // note: we may not belong to a team at this point
    volatile UT *spin = spinner;
    UT check = checker;
@@ -308,12 +307,8 @@ static UT __kmp_wait_yield(volatile UT *spinner, UT checker,
         It causes problems with infinite recursion because of exit lock */
      /* if ( TCR_4(__kmp_global.g.g_done) && __kmp_global.g.g_abort)
          __kmp_abort_thread(); */
-
-    // if we are oversubscribed,
-    // or have waited a bit (and KMP_LIBRARY=throughput, then yield
-    // pause is in the following code
-    KMP_YIELD(TCR_4(__kmp_nth) > __kmp_avail_proc);
-    KMP_YIELD_SPIN(spins);
+    // If oversubscribed, or have waited a bit then yield.
+    KMP_YIELD_OVERSUB_ELSE_SPIN(spins);
    }
    KMP_FSYNC_SPIN_ACQUIRED(obj);
    return r;
@@ -379,8 +374,8 @@ void __kmp_dispatch_deo(int *gtid_ref, int *cid_ref, ident_t *loc_ref) {
        __kmp_str_free(&buff);
      }
  #endif
-    __kmp_wait_yield<UT>(&sh->u.s.ordered_iteration, lower,
-                         __kmp_ge<UT> USE_ITT_BUILD_ARG(NULL));
+    __kmp_wait<UT>(&sh->u.s.ordered_iteration, lower,
+                   __kmp_ge<UT> USE_ITT_BUILD_ARG(NULL));
      KMP_MB(); /* is this necessary? */
  #ifdef KMP_DEBUG
      {
diff --git a/openmp/runtime/src/kmp_dispatch_hier.h b/openmp/runtime/src/kmp_dispatch_hier.h

index 48b164e..3f1cc61 100644 (file)
--- a/openmp/runtime/src/kmp_dispatch_hier.h
+++ b/openmp/runtime/src/kmp_dispatch_hier.h
@@ -263,8 +263,8 @@ void core_barrier_impl<T>::barrier(kmp_int32 id,
                  next_wait_value));
    char v = (current_wait_value ? 0x1 : 0x0);
    (RCAST(volatile char *, &(bdata->val[current_index])))[id] = v;
-  __kmp_wait_yield<kmp_uint64>(&(bdata->val[current_index]), current_wait_value,
-                               __kmp_eq<kmp_uint64> USE_ITT_BUILD_ARG(NULL));
+  __kmp_wait<kmp_uint64>(&(bdata->val[current_index]), current_wait_value,
+                         __kmp_eq<kmp_uint64> USE_ITT_BUILD_ARG(NULL));
    tdata->wait_val[current_index] = next_wait_value;
    tdata->index = next_index;
  }
@@ -310,8 +310,8 @@ void counter_barrier_impl<T>::barrier(kmp_int32 id,
                  next_wait_value));
    val = RCAST(volatile kmp_int64 *, &(bdata->val[current_index]));
    KMP_TEST_THEN_INC64(val);
-  __kmp_wait_yield<kmp_uint64>(&(bdata->val[current_index]), current_wait_value,
-                               __kmp_ge<kmp_uint64> USE_ITT_BUILD_ARG(NULL));
+  __kmp_wait<kmp_uint64>(&(bdata->val[current_index]), current_wait_value,
+                         __kmp_ge<kmp_uint64> USE_ITT_BUILD_ARG(NULL));
    tdata->wait_val[current_index] = next_wait_value;
    tdata->index = next_index;
  }
diff --git a/openmp/runtime/src/kmp_global.cpp b/openmp/runtime/src/kmp_global.cpp

index ee321d7..627724c 100644 (file)
--- a/openmp/runtime/src/kmp_global.cpp
+++ b/openmp/runtime/src/kmp_global.cpp
@@ -62,11 +62,6 @@ int __kmp_version = 0;
  std::atomic<kmp_int32> __kmp_team_counter = ATOMIC_VAR_INIT(0);
  std::atomic<kmp_int32> __kmp_task_counter = ATOMIC_VAR_INIT(0);
  
-unsigned int __kmp_init_wait =
-    KMP_DEFAULT_INIT_WAIT; /* initial number of spin-tests   */
-unsigned int __kmp_next_wait =
-    KMP_DEFAULT_NEXT_WAIT; /* susequent number of spin-tests */
-
  size_t __kmp_stksize = KMP_DEFAULT_STKSIZE;
  #if KMP_USE_MONITOR
  size_t __kmp_monitor_stksize = 0; // auto adjust
@@ -395,22 +390,17 @@ int __kmp_env_blocktime = FALSE; /* KMP_BLOCKTIME specified? */
  int __kmp_env_checks = FALSE; /* KMP_CHECKS specified?    */
  int __kmp_env_consistency_check = FALSE; /* KMP_CONSISTENCY_CHECK specified? */
  
+// From KMP_USE_YIELD:
+// 0 = never yield;
+// 1 = always yield (default);
+// 2 = yield only if oversubscribed
+kmp_int32 __kmp_use_yield = 1;
+// This will be 1 if KMP_USE_YIELD environment variable was set explicitly
+kmp_int32 __kmp_use_yield_exp_set = 0;
+
  kmp_uint32 __kmp_yield_init = KMP_INIT_WAIT;
  kmp_uint32 __kmp_yield_next = KMP_NEXT_WAIT;
  
-#if KMP_USE_MONITOR
-kmp_uint32 __kmp_yielding_on = 1;
-#endif
-#if KMP_OS_CNK
-kmp_uint32 __kmp_yield_cycle = 0;
-#else
-kmp_uint32 __kmp_yield_cycle = 1; /* Yield-cycle is on by default */
-#endif
-kmp_int32 __kmp_yield_on_count =
-    10; /* By default, yielding is on for 10 monitor periods. */
-kmp_int32 __kmp_yield_off_count =
-    1; /* By default, yielding is off for 1 monitor periods. */
-
  /* ------------------------------------------------------ */
  /* STATE mostly syncronized with global lock */
  /* data written to rarely by masters, read often by workers */
diff --git a/openmp/runtime/src/kmp_itt.h b/openmp/runtime/src/kmp_itt.h

index 94719f8..b14a193 100644 (file)
--- a/openmp/runtime/src/kmp_itt.h
+++ b/openmp/runtime/src/kmp_itt.h
@@ -219,7 +219,7 @@ __kmp_inline void __kmp_itt_stack_callee_leave(__itt_caller);
     with a delay (and not called at all if waiting time is small). So, in spin
     loops, do not use KMP_FSYNC_PREPARE(), but use KMP_FSYNC_SPIN_INIT() (before
     spin loop), KMP_FSYNC_SPIN_PREPARE() (whithin the spin loop), and
-   KMP_FSYNC_SPIN_ACQUIRED(). See KMP_WAIT_YIELD() for example. */
+   KMP_FSYNC_SPIN_ACQUIRED(). See KMP_WAIT() for example. */
  
  #undef KMP_FSYNC_SPIN_INIT
  #define KMP_FSYNC_SPIN_INIT(obj, spin)                                         \
diff --git a/openmp/runtime/src/kmp_lock.cpp b/openmp/runtime/src/kmp_lock.cpp

index af91644..78d63c6 100644 (file)
--- a/openmp/runtime/src/kmp_lock.cpp
+++ b/openmp/runtime/src/kmp_lock.cpp
@@ -100,23 +100,12 @@ __kmp_acquire_tas_lock_timed_template(kmp_tas_lock_t *lck, kmp_int32 gtid) {
    kmp_uint32 spins;
    KMP_FSYNC_PREPARE(lck);
    KMP_INIT_YIELD(spins);
-  if (TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) {
-    KMP_YIELD(TRUE);
-  } else {
-    KMP_YIELD_SPIN(spins);
-  }
-
    kmp_backoff_t backoff = __kmp_spin_backoff_params;
-  while (KMP_ATOMIC_LD_RLX(&lck->lk.poll) != tas_free ||
-         !__kmp_atomic_compare_store_acq(&lck->lk.poll, tas_free, tas_busy)) {
+  do {
      __kmp_spin_backoff(&backoff);
-    if (TCR_4(__kmp_nth) >
-        (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) {
-      KMP_YIELD(TRUE);
-    } else {
-      KMP_YIELD_SPIN(spins);
-    }
-  }
+    KMP_YIELD_OVERSUB_ELSE_SPIN(spins);
+  } while (KMP_ATOMIC_LD_RLX(&lck->lk.poll) != tas_free ||
+           !__kmp_atomic_compare_store_acq(&lck->lk.poll, tas_free, tas_busy));
    KMP_FSYNC_ACQUIRED(lck);
    return KMP_LOCK_ACQUIRED_FIRST;
  }
@@ -169,8 +158,7 @@ int __kmp_release_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {
    KMP_ATOMIC_ST_REL(&lck->lk.poll, KMP_LOCK_FREE(tas));
    KMP_MB(); /* Flush all pending memory write invalidates.  */
  
-  KMP_YIELD(TCR_4(__kmp_nth) >
-            (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc));
+  KMP_YIELD_OVERSUB();
    return KMP_LOCK_RELEASED;
  }
  
@@ -474,8 +462,7 @@ int __kmp_release_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {
    KA_TRACE(1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d exiting\n", lck,
                    lck->lk.poll, gtid));
  
-  KMP_YIELD(TCR_4(__kmp_nth) >
-            (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc));
+  KMP_YIELD_OVERSUB();
    return KMP_LOCK_RELEASED;
  }
  
@@ -651,7 +638,7 @@ __kmp_acquire_ticket_lock_timed_template(kmp_ticket_lock_t *lck,
                                  std::memory_order_acquire) == my_ticket) {
      return KMP_LOCK_ACQUIRED_FIRST;
    }
-  KMP_WAIT_YIELD_PTR(&lck->lk.now_serving, my_ticket, __kmp_bakery_check, lck);
+  KMP_WAIT_PTR(&lck->lk.now_serving, my_ticket, __kmp_bakery_check, lck);
    return KMP_LOCK_ACQUIRED_FIRST;
  }
  
@@ -1249,10 +1236,9 @@ __kmp_acquire_queuing_lock_timed_template(kmp_queuing_lock_t *lck,
                 ("__kmp_acquire_queuing_lock: lck:%p, T#%d waiting for lock\n",
                  lck, gtid));
  
-      /* ToDo: May want to consider using __kmp_wait_sleep  or something that
-         sleeps for throughput only here. */
        KMP_MB();
-      KMP_WAIT_YIELD(spin_here_p, FALSE, KMP_EQ, lck);
+      // ToDo: Use __kmp_wait_sleep or similar when blocktime != inf
+      KMP_WAIT(spin_here_p, FALSE, KMP_EQ, lck);
  
  #ifdef DEBUG_QUEUING_LOCKS
        TRACE_LOCK(gtid + 1, "acq spin");
@@ -1282,8 +1268,8 @@ __kmp_acquire_queuing_lock_timed_template(kmp_queuing_lock_t *lck,
      /* Yield if number of threads > number of logical processors */
      /* ToDo: Not sure why this should only be in oversubscription case,
         maybe should be traditional YIELD_INIT/YIELD_WHEN loop */
-    KMP_YIELD(TCR_4(__kmp_nth) >
-              (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc));
+    KMP_YIELD_OVERSUB();
+
  #ifdef DEBUG_QUEUING_LOCKS
      TRACE_LOCK(gtid + 1, "acq retry");
  #endif
@@ -1462,8 +1448,8 @@ int __kmp_release_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
          KMP_MB();
          /* make sure enqueuing thread has time to update next waiting thread
           * field */
-        *head_id_p = KMP_WAIT_YIELD((volatile kmp_uint32 *)waiting_id_p, 0,
-                                    KMP_NEQ, NULL);
+        *head_id_p =
+            KMP_WAIT((volatile kmp_uint32 *)waiting_id_p, 0, KMP_NEQ, NULL);
  #ifdef DEBUG_QUEUING_LOCKS
          TRACE_LOCK(gtid + 1, "rel deq: (h,t)->(h',t)");
  #endif
@@ -2131,7 +2117,7 @@ static void __kmp_acquire_adaptive_lock(kmp_adaptive_lock_t *lck,
        // lock from now on.
        while (!__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(lck))) {
          KMP_INC_STAT(lck, lemmingYields);
-        __kmp_yield(TRUE);
+        KMP_YIELD(TRUE);
        }
  
        if (__kmp_test_adaptive_lock_only(lck, gtid))
@@ -2259,23 +2245,14 @@ __kmp_acquire_drdpa_lock_timed_template(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
    // polling area has been reconfigured.  Unless it is reconfigured, the
    // reloads stay in L1 cache and are cheap.
    //
-  // Keep this code in sync with KMP_WAIT_YIELD, in kmp_dispatch.cpp !!!
-  //
-  // The current implementation of KMP_WAIT_YIELD doesn't allow for mask
+  // Keep this code in sync with KMP_WAIT, in kmp_dispatch.cpp !!!
+  // The current implementation of KMP_WAIT doesn't allow for mask
    // and poll to be re-read every spin iteration.
    kmp_uint32 spins;
-
    KMP_FSYNC_PREPARE(lck);
    KMP_INIT_YIELD(spins);
    while (polls[ticket & mask] < ticket) { // atomic load
-    // If we are oversubscribed,
-    // or have waited a bit (and KMP_LIBRARY=turnaround), then yield.
-    // CPU Pause is in the macros for yield.
-    //
-    KMP_YIELD(TCR_4(__kmp_nth) >
-              (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc));
-    KMP_YIELD_SPIN(spins);
-
+    KMP_YIELD_OVERSUB_ELSE_SPIN(spins);
      // Re-read the mask and the poll pointer from the lock structure.
      //
      // Make certain that "mask" is read before "polls" !!!
@@ -2807,8 +2784,9 @@ static void __kmp_acquire_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
      }
      if ((status & _XABORT_EXPLICIT) && _XABORT_CODE(status) == 0xff) {
        // Wait until lock becomes free
-      while (!__kmp_is_unlocked_queuing_lock(lck))
-        __kmp_yield(TRUE);
+      while (!__kmp_is_unlocked_queuing_lock(lck)) {
+        KMP_YIELD(TRUE);
+      }
      } else if (!(status & _XABORT_RETRY))
        break;
    } while (retries--);
diff --git a/openmp/runtime/src/kmp_lock.h b/openmp/runtime/src/kmp_lock.h

index 8ce500c..ccd84eb 100644 (file)
--- a/openmp/runtime/src/kmp_lock.h
+++ b/openmp/runtime/src/kmp_lock.h
@@ -652,21 +652,11 @@ extern int (*__kmp_acquire_user_lock_with_checks_)(kmp_user_lock_p lck,
        kmp_uint32 spins;                                                        \
        KMP_FSYNC_PREPARE(lck);                                                  \
        KMP_INIT_YIELD(spins);                                                   \
-      if (TCR_4(__kmp_nth) >                                                   \
-          (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) {               \
-        KMP_YIELD(TRUE);                                                       \
-      } else {                                                                 \
-        KMP_YIELD_SPIN(spins);                                                 \
-      }                                                                        \
-      while (lck->tas.lk.poll != 0 || !__kmp_atomic_compare_store_acq(         \
-                                          &lck->tas.lk.poll, 0, gtid + 1)) {   \
-        if (TCR_4(__kmp_nth) >                                                 \
-            (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) {             \
-          KMP_YIELD(TRUE);                                                     \
-        } else {                                                               \
-          KMP_YIELD_SPIN(spins);                                               \
-        }                                                                      \
-      }                                                                        \
+      do {                                                                     \
+        KMP_YIELD_OVERSUB_ELSE_SPIN(spins);                                    \
+      } while (                                                                \
+          lck->tas.lk.poll != 0 ||                                             \
+          !__kmp_atomic_compare_store_acq(&lck->tas.lk.poll, 0, gtid + 1));    \
      }                                                                          \
      KMP_FSYNC_ACQUIRED(lck);                                                   \
    } else {                                                                     \
@@ -770,22 +760,11 @@ extern int (*__kmp_acquire_nested_user_lock_with_checks_)(kmp_user_lock_p lck,
          kmp_uint32 spins;                                                      \
          KMP_FSYNC_PREPARE(lck);                                                \
          KMP_INIT_YIELD(spins);                                                 \
-        if (TCR_4(__kmp_nth) >                                                 \
-            (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) {             \
-          KMP_YIELD(TRUE);                                                     \
-        } else {                                                               \
-          KMP_YIELD_SPIN(spins);                                               \
-        }                                                                      \
-        while (                                                                \
+        do {                                                                   \
+          KMP_YIELD_OVERSUB_ELSE_SPIN(spins);                                  \
+        } while (                                                              \
              (lck->tas.lk.poll != 0) ||                                         \
-            !__kmp_atomic_compare_store_acq(&lck->tas.lk.poll, 0, gtid + 1)) { \
-          if (TCR_4(__kmp_nth) >                                               \
-              (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) {           \
-            KMP_YIELD(TRUE);                                                   \
-          } else {                                                             \
-            KMP_YIELD_SPIN(spins);                                             \
-          }                                                                    \
-        }                                                                      \
+            !__kmp_atomic_compare_store_acq(&lck->tas.lk.poll, 0, gtid + 1));  \
        }                                                                        \
        lck->tas.lk.depth_locked = 1;                                            \
        *depth = KMP_LOCK_ACQUIRED_FIRST;                                        \
diff --git a/openmp/runtime/src/kmp_os.h b/openmp/runtime/src/kmp_os.h

index a553463..529c218 100644 (file)
--- a/openmp/runtime/src/kmp_os.h
+++ b/openmp/runtime/src/kmp_os.h
@@ -304,7 +304,7 @@ extern "C" {
  
  #define KMP_CACHE_PREFETCH(ADDR) /* nothing */
  
-// Define attribute that indicates that the fall through from the previous 
+// Define attribute that indicates that the fall through from the previous
  // case label is intentional and should not be diagnosed by a compiler
  //   Code from libcxx/include/__config
  // Use a function like macro to imply that it must be followed by a semicolon
@@ -882,8 +882,8 @@ typedef void (*microtask_t)(int *gtid, int *npr, ...);
  #define VOLATILE_CAST(x) (x)
  #endif
  
-#define KMP_WAIT_YIELD __kmp_wait_yield_4
-#define KMP_WAIT_YIELD_PTR __kmp_wait_yield_4_ptr
+#define KMP_WAIT __kmp_wait_4
+#define KMP_WAIT_PTR __kmp_wait_4_ptr
  #define KMP_EQ __kmp_eq_4
  #define KMP_NEQ __kmp_neq_4
  #define KMP_LT __kmp_lt_4
diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp

index ead122f..2a9e31d 100644 (file)
--- a/openmp/runtime/src/kmp_runtime.cpp
+++ b/openmp/runtime/src/kmp_runtime.cpp
@@ -327,7 +327,7 @@ void __kmp_infinite_loop(void) {
    static int done = FALSE;
  
    while (!done) {
-    KMP_YIELD(1);
+    KMP_YIELD(TRUE);
    }
  }
  
@@ -672,24 +672,6 @@ BOOL WINAPI DllMain(HINSTANCE hInstDLL, DWORD fdwReason, LPVOID lpReserved) {
  #endif /* KMP_OS_WINDOWS */
  #endif /* KMP_DYNAMIC_LIB */
  
-/* Change the library type to "status" and return the old type */
-/* called from within initialization routines where __kmp_initz_lock is held */
-int __kmp_change_library(int status) {
-  int old_status;
-
-  old_status = __kmp_yield_init &
-               1; // check whether KMP_LIBRARY=throughput (even init count)
-
-  if (status) {
-    __kmp_yield_init |= 1; // throughput => turnaround (odd init count)
-  } else {
-    __kmp_yield_init &= ~1; // turnaround => throughput (even init count)
-  }
-
-  return old_status; // return previous setting of whether
-  // KMP_LIBRARY=throughput
-}
-
  /* __kmp_parallel_deo -- Wait until it's our turn. */
  void __kmp_parallel_deo(int *gtid_ref, int *cid_ref, ident_t *loc_ref) {
    int gtid = *gtid_ref;
@@ -708,8 +690,8 @@ void __kmp_parallel_deo(int *gtid_ref, int *cid_ref, ident_t *loc_ref) {
  #ifdef BUILD_PARALLEL_ORDERED
    if (!team->t.t_serialized) {
      KMP_MB();
-    KMP_WAIT_YIELD(&team->t.t_ordered.dt.t_value, __kmp_tid_from_gtid(gtid),
-                   KMP_EQ, NULL);
+    KMP_WAIT(&team->t.t_ordered.dt.t_value, __kmp_tid_from_gtid(gtid), KMP_EQ,
+             NULL);
      KMP_MB();
    }
  #endif /* BUILD_PARALLEL_ORDERED */
@@ -7735,13 +7717,14 @@ void __kmp_aux_set_library(enum library_type arg) {
    switch (__kmp_library) {
    case library_serial: {
      KMP_INFORM(LibraryIsSerial);
-    (void)__kmp_change_library(TRUE);
    } break;
    case library_turnaround:
-    (void)__kmp_change_library(TRUE);
+    if (__kmp_use_yield == 1 && !__kmp_use_yield_exp_set)
+      __kmp_use_yield = 2; // only yield when oversubscribed
      break;
    case library_throughput:
-    (void)__kmp_change_library(FALSE);
+    if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME)
+      __kmp_dflt_blocktime = 200;
      break;
    default:
      KMP_FATAL(UnknownLibraryType, arg);
diff --git a/openmp/runtime/src/kmp_settings.cpp b/openmp/runtime/src/kmp_settings.cpp

index b2e300f..68e36f5 100644 (file)
--- a/openmp/runtime/src/kmp_settings.cpp
+++ b/openmp/runtime/src/kmp_settings.cpp
@@ -629,6 +629,19 @@ static void __kmp_stg_print_teams_thread_limit(kmp_str_buf_t *buffer,
  } // __kmp_stg_print_teams_thread_limit
  
  // -----------------------------------------------------------------------------
+// KMP_USE_YIELD
+static void __kmp_stg_parse_use_yield(char const *name, char const *value,
+                                      void *data) {
+  __kmp_stg_parse_int(name, value, 0, 2, &__kmp_use_yield);
+  __kmp_use_yield_exp_set = 1;
+} // __kmp_stg_parse_use_yield
+
+static void __kmp_stg_print_use_yield(kmp_str_buf_t *buffer, char const *name,
+                                      void *data) {
+  __kmp_stg_print_int(buffer, name, __kmp_use_yield);
+} // __kmp_stg_print_use_yield
+
+// -----------------------------------------------------------------------------
  // KMP_BLOCKTIME
  
  static void __kmp_stg_parse_blocktime(char const *name, char const *value,
@@ -745,18 +758,24 @@ static void __kmp_stg_parse_wait_policy(char const *name, char const *value,
        __kmp_library = library_serial;
      } else if (__kmp_str_match("throughput", 2, value)) { /* TH */
        __kmp_library = library_throughput;
+      if (blocktime_str == NULL) {
+        // KMP_BLOCKTIME not specified, so set default to 0.
+        __kmp_dflt_blocktime = 0;
+      }
      } else if (__kmp_str_match("turnaround", 2, value)) { /* TU */
        __kmp_library = library_turnaround;
      } else if (__kmp_str_match("dedicated", 1, value)) { /* D */
        __kmp_library = library_turnaround;
      } else if (__kmp_str_match("multiuser", 1, value)) { /* M */
        __kmp_library = library_throughput;
+      if (blocktime_str == NULL) {
+        // KMP_BLOCKTIME not specified, so set default to 0.
+        __kmp_dflt_blocktime = 0;
+      }
      } else {
        KMP_WARNING(StgInvalidValue, name, value);
      }
    }
-  __kmp_aux_set_library(__kmp_library);
-
  } // __kmp_stg_parse_wait_policy
  
  static void __kmp_stg_print_wait_policy(kmp_str_buf_t *buffer, char const *name,
@@ -3944,79 +3963,9 @@ static void __kmp_stg_print_par_range_env(kmp_str_buf_t *buffer,
    }
  } // __kmp_stg_print_par_range_env
  
-// -----------------------------------------------------------------------------
-// KMP_YIELD_CYCLE, KMP_YIELD_ON, KMP_YIELD_OFF
-
-static void __kmp_stg_parse_yield_cycle(char const *name, char const *value,
-                                        void *data) {
-  int flag = __kmp_yield_cycle;
-  __kmp_stg_parse_bool(name, value, &flag);
-  __kmp_yield_cycle = flag;
-} // __kmp_stg_parse_yield_cycle
-
-static void __kmp_stg_print_yield_cycle(kmp_str_buf_t *buffer, char const *name,
-                                        void *data) {
-  __kmp_stg_print_bool(buffer, name, __kmp_yield_cycle);
-} // __kmp_stg_print_yield_cycle
-
-static void __kmp_stg_parse_yield_on(char const *name, char const *value,
-                                     void *data) {
-  __kmp_stg_parse_int(name, value, 2, INT_MAX, &__kmp_yield_on_count);
-} // __kmp_stg_parse_yield_on
-
-static void __kmp_stg_print_yield_on(kmp_str_buf_t *buffer, char const *name,
-                                     void *data) {
-  __kmp_stg_print_int(buffer, name, __kmp_yield_on_count);
-} // __kmp_stg_print_yield_on
-
-static void __kmp_stg_parse_yield_off(char const *name, char const *value,
-                                      void *data) {
-  __kmp_stg_parse_int(name, value, 2, INT_MAX, &__kmp_yield_off_count);
-} // __kmp_stg_parse_yield_off
-
-static void __kmp_stg_print_yield_off(kmp_str_buf_t *buffer, char const *name,
-                                      void *data) {
-  __kmp_stg_print_int(buffer, name, __kmp_yield_off_count);
-} // __kmp_stg_print_yield_off
-
  #endif
  
  // -----------------------------------------------------------------------------
-// KMP_INIT_WAIT, KMP_NEXT_WAIT
-
-static void __kmp_stg_parse_init_wait(char const *name, char const *value,
-                                      void *data) {
-  int wait;
-  KMP_ASSERT((__kmp_init_wait & 1) == 0);
-  wait = __kmp_init_wait / 2;
-  __kmp_stg_parse_int(name, value, KMP_MIN_INIT_WAIT, KMP_MAX_INIT_WAIT, &wait);
-  __kmp_init_wait = wait * 2;
-  KMP_ASSERT((__kmp_init_wait & 1) == 0);
-  __kmp_yield_init = __kmp_init_wait;
-} // __kmp_stg_parse_init_wait
-
-static void __kmp_stg_print_init_wait(kmp_str_buf_t *buffer, char const *name,
-                                      void *data) {
-  __kmp_stg_print_int(buffer, name, __kmp_init_wait);
-} // __kmp_stg_print_init_wait
-
-static void __kmp_stg_parse_next_wait(char const *name, char const *value,
-                                      void *data) {
-  int wait;
-  KMP_ASSERT((__kmp_next_wait & 1) == 0);
-  wait = __kmp_next_wait / 2;
-  __kmp_stg_parse_int(name, value, KMP_MIN_NEXT_WAIT, KMP_MAX_NEXT_WAIT, &wait);
-  __kmp_next_wait = wait * 2;
-  KMP_ASSERT((__kmp_next_wait & 1) == 0);
-  __kmp_yield_next = __kmp_next_wait;
-} // __kmp_stg_parse_next_wait
-
-static void __kmp_stg_print_next_wait(kmp_str_buf_t *buffer, char const *name,
-                                      void *data) {
-  __kmp_stg_print_int(buffer, name, __kmp_next_wait);
-} //__kmp_stg_print_next_wait
-
-// -----------------------------------------------------------------------------
  // KMP_GTID_MODE
  
  static void __kmp_stg_parse_gtid_mode(char const *name, char const *value,
@@ -4726,6 +4675,8 @@ static kmp_setting_t __kmp_stg_table[] = {
      {"KMP_ALL_THREADS", __kmp_stg_parse_device_thread_limit, NULL, NULL, 0, 0},
      {"KMP_BLOCKTIME", __kmp_stg_parse_blocktime, __kmp_stg_print_blocktime,
       NULL, 0, 0},
+    {"KMP_USE_YIELD", __kmp_stg_parse_use_yield, __kmp_stg_print_use_yield,
+     NULL, 0, 0},
      {"KMP_DUPLICATE_LIB_OK", __kmp_stg_parse_duplicate_lib_ok,
       __kmp_stg_print_duplicate_lib_ok, NULL, 0, 0},
      {"KMP_LIBRARY", __kmp_stg_parse_wait_policy, __kmp_stg_print_wait_policy,
@@ -4830,12 +4781,6 @@ static kmp_setting_t __kmp_stg_table[] = {
  
      {"KMP_PAR_RANGE", __kmp_stg_parse_par_range_env,
       __kmp_stg_print_par_range_env, NULL, 0, 0},
-    {"KMP_YIELD_CYCLE", __kmp_stg_parse_yield_cycle,
-     __kmp_stg_print_yield_cycle, NULL, 0, 0},
-    {"KMP_YIELD_ON", __kmp_stg_parse_yield_on, __kmp_stg_print_yield_on, NULL,
-     0, 0},
-    {"KMP_YIELD_OFF", __kmp_stg_parse_yield_off, __kmp_stg_print_yield_off,
-     NULL, 0, 0},
  #endif // KMP_DEBUG
  
      {"KMP_ALIGN_ALLOC", __kmp_stg_parse_align_alloc,
@@ -4927,10 +4872,6 @@ static kmp_setting_t __kmp_stg_table[] = {
  #endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
      {"KMP_MALLOC_POOL_INCR", __kmp_stg_parse_malloc_pool_incr,
       __kmp_stg_print_malloc_pool_incr, NULL, 0, 0},
-    {"KMP_INIT_WAIT", __kmp_stg_parse_init_wait, __kmp_stg_print_init_wait,
-     NULL, 0, 0},
-    {"KMP_NEXT_WAIT", __kmp_stg_parse_next_wait, __kmp_stg_print_next_wait,
-     NULL, 0, 0},
      {"KMP_GTID_MODE", __kmp_stg_parse_gtid_mode, __kmp_stg_print_gtid_mode,
       NULL, 0, 0},
      {"OMP_DYNAMIC", __kmp_stg_parse_omp_dynamic, __kmp_stg_print_omp_dynamic,
diff --git a/openmp/runtime/src/kmp_tasking.cpp b/openmp/runtime/src/kmp_tasking.cpp

index 9e8b22a..f6d6ae1 100644 (file)
--- a/openmp/runtime/src/kmp_tasking.cpp
+++ b/openmp/runtime/src/kmp_tasking.cpp
@@ -2705,8 +2705,7 @@ static inline int __kmp_execute_tasks_template(
        if (thread->th.th_task_team == NULL) {
          break;
        }
-      // Yield before executing next task
-      KMP_YIELD(__kmp_library == library_throughput);
+      KMP_YIELD(__kmp_library == library_throughput); // Yield before next task
        // If execution of a stolen task results in more tasks being placed on our
        // run queue, reset use_own_tasks
        if (!use_own_tasks && TCR_4(threads_data[tid].td.td_deque_ntasks) != 0) {
@@ -3242,10 +3241,8 @@ void __kmp_wait_to_unref_task_teams(void) {
        break;
      }
  
-    // If we are oversubscribed, or have waited a bit (and library mode is
-    // throughput), yield. Pause is in the following code.
-    KMP_YIELD(TCR_4(__kmp_nth) > __kmp_avail_proc);
-    KMP_YIELD_SPIN(spins); // Yields only if KMP_LIBRARY=throughput
+    // If oversubscribed or have waited a bit, yield.
+    KMP_YIELD_OVERSUB_ELSE_SPIN(spins);
    }
  }
  
@@ -3410,7 +3407,7 @@ void __kmp_tasking_barrier(kmp_team_t *team, kmp_info_t *thread, int gtid) {
          __kmp_abort_thread();
        break;
      }
-    KMP_YIELD(TRUE); // GH: We always yield here
+    KMP_YIELD(TRUE);
    }
  #if USE_ITT_BUILD
    KMP_FSYNC_SPIN_ACQUIRED(RCAST(void *, spin));
diff --git a/openmp/runtime/src/kmp_taskq.cpp b/openmp/runtime/src/kmp_taskq.cpp

index 2b01174..442ad4e 100644 (file)
--- a/openmp/runtime/src/kmp_taskq.cpp
+++ b/openmp/runtime/src/kmp_taskq.cpp
@@ -51,7 +51,7 @@ static void __kmp_taskq_eo(int *gtid_ref, int *cid_ref, ident_t *loc_ref) {
  
      taskq = tq->tq_curr_thunk[tid]->th.th_shareds->sv_queue;
  
-    KMP_WAIT_YIELD(&taskq->tq_tasknum_serving, my_token, KMP_EQ, NULL);
+    KMP_WAIT(&taskq->tq_tasknum_serving, my_token, KMP_EQ, NULL);
      KMP_MB();
    }
  }
@@ -95,7 +95,7 @@ static void __kmp_taskq_check_ordered(kmp_int32 gtid, kmpc_thunk_t *thunk) {
    taskq = thunk->th.th_shareds->sv_queue;
  
    if (taskq->tq_tasknum_serving <= my_token) {
-    KMP_WAIT_YIELD(&taskq->tq_tasknum_serving, my_token, KMP_GE, NULL);
+    KMP_WAIT(&taskq->tq_tasknum_serving, my_token, KMP_GE, NULL);
      KMP_MB();
      taskq->tq_tasknum_serving = my_token + 1;
      KMP_MB();
@@ -1056,8 +1056,7 @@ static void __kmp_remove_queue_from_tree(kmp_taskq_t *tq, kmp_int32 global_tid,
      while (queue->tq_ref_count > 1) {
        __kmp_release_lock(&queue->tq.tq_parent->tq_link_lck, global_tid);
  
-      KMP_WAIT_YIELD((volatile kmp_uint32 *)&queue->tq_ref_count, 1, KMP_LE,
-                     NULL);
+      KMP_WAIT((volatile kmp_uint32 *)&queue->tq_ref_count, 1, KMP_LE, NULL);
  
        __kmp_acquire_lock(&queue->tq.tq_parent->tq_link_lck, global_tid);
        // Make sure data structures are in consistent state before querying them
@@ -1538,8 +1537,6 @@ void __kmpc_end_taskq(ident_t *loc, kmp_int32 global_tid,
    in_parallel = (queue->tq_flags & TQF_PARALLEL_CONTEXT);
  
    if (in_parallel) {
-    kmp_uint32 spins;
-
      /* this is just a safeguard to release the waiting threads if */
      /* the outermost taskq never queues a task                    */
  
@@ -1556,12 +1553,10 @@ void __kmpc_end_taskq(ident_t *loc, kmp_int32 global_tid,
  
      do {
        /* wait until something is available to dequeue */
-      KMP_INIT_YIELD(spins);
-
        while ((queue->tq_nfull == 0) && (queue->tq_taskq_slot == NULL) &&
               (!__kmp_taskq_has_any_children(queue)) &&
               (!(queue->tq_flags & TQF_ALL_TASKS_QUEUED))) {
-        KMP_YIELD_WHEN(TRUE, spins);
+        KMP_CPU_PAUSE();
        }
  
        /* check to see if we can execute tasks in the queue */
@@ -1628,7 +1623,6 @@ void __kmpc_end_taskq(ident_t *loc, kmp_int32 global_tid,
  
        /* WAIT until all tasks are finished and no child queues exist before
         * proceeding */
-      KMP_INIT_YIELD(spins);
  
        while (!__kmp_taskq_tasks_finished(queue) ||
               __kmp_taskq_has_any_children(queue)) {
@@ -1643,7 +1637,8 @@ void __kmpc_end_taskq(ident_t *loc, kmp_int32 global_tid,
                                          in_parallel);
          }
  
-        KMP_YIELD_WHEN(thunk == NULL, spins);
+        if (thunk == NULL)
+          KMP_CPU_PAUSE();
  
          __kmp_find_and_remove_finished_child_taskq(tq, global_tid, queue);
        }
@@ -1669,8 +1664,6 @@ void __kmpc_end_taskq(ident_t *loc, kmp_int32 global_tid,
  
      // Outermost Queue: steal work from descendants until all tasks are finished
  
-    KMP_INIT_YIELD(spins);
-
      while (!__kmp_taskq_tasks_finished(queue)) {
        thunk = __kmp_find_task_in_descendant_queue(global_tid, queue);
  
@@ -1683,7 +1676,8 @@ void __kmpc_end_taskq(ident_t *loc, kmp_int32 global_tid,
          __kmp_execute_task_from_queue(tq, loc, global_tid, thunk, in_parallel);
        }
  
-      KMP_YIELD_WHEN(thunk == NULL, spins);
+      if (thunk == NULL)
+        KMP_CPU_PAUSE();
      }
  
      /* Need this barrier to prevent destruction of queue before threads have all
diff --git a/openmp/runtime/src/kmp_wait_release.h b/openmp/runtime/src/kmp_wait_release.h

index 21f3610..d1120d4 100644 (file)
--- a/openmp/runtime/src/kmp_wait_release.h
+++ b/openmp/runtime/src/kmp_wait_release.h
@@ -150,8 +150,8 @@ static void __ompt_implicit_task_end(kmp_info_t *this_thr,
  }
  #endif
  
-/* Spin wait loop that first does pause, then yield, then sleep. A thread that
-   calls __kmp_wait_*  must make certain that another thread calls __kmp_release
+/* Spin wait loop that first does pause/yield, then sleep. A thread that calls
+   __kmp_wait_*  must make certain that another thread calls __kmp_release
     to wake it back up to prevent deadlocks!
  
     NOTE: We may not belong to a team at this point.  */
@@ -270,8 +270,7 @@ final_spin=FALSE)
    }
  #endif
  
-  // Setup for waiting
-  KMP_INIT_YIELD(spins);
+  KMP_INIT_YIELD(spins); // Setup for waiting
  
    if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME
  #if OMP_50_ENABLED
@@ -368,14 +367,8 @@ final_spin=FALSE)
  
      // If we are oversubscribed, or have waited a bit (and
      // KMP_LIBRARY=throughput), then yield
-    // TODO: Should it be number of cores instead of thread contexts? Like:
-    // KMP_YIELD(TCR_4(__kmp_nth) > __kmp_ncores);
-    // Need performance improvement data to make the change...
-    if (oversubscribed) {
-      KMP_YIELD(1);
-    } else {
-      KMP_YIELD_SPIN(spins);
-    }
+    KMP_YIELD_OVERSUB_ELSE_SPIN(spins);
+
      // Check if this thread was transferred from a team
      // to the thread pool (or vice-versa) while spinning.
      in_pool = !!TCR_4(this_thr->th.th_in_pool);
diff --git a/openmp/runtime/src/z_Linux_util.cpp b/openmp/runtime/src/z_Linux_util.cpp

index 08b9742..df1c47b 100644 (file)
--- a/openmp/runtime/src/z_Linux_util.cpp
+++ b/openmp/runtime/src/z_Linux_util.cpp
@@ -437,7 +437,7 @@ void __kmp_terminate_thread(int gtid) {
                  __kmp_msg_null);
    }
  #endif
-  __kmp_yield(TRUE);
+  KMP_YIELD(TRUE);
  } //
  
  /* Set thread stack info according to values returned by pthread_getattr_np().
@@ -580,8 +580,6 @@ static void *__kmp_launch_monitor(void *thr) {
    sigset_t new_set;
  #endif /* KMP_BLOCK_SIGNALS */
    struct timespec interval;
-  int yield_count;
-  int yield_cycles = 0;
  
    KMP_MB(); /* Flush all pending memory write invalidates.  */
  
@@ -665,13 +663,6 @@ static void *__kmp_launch_monitor(void *thr) {
  
    KA_TRACE(10, ("__kmp_launch_monitor: #2 monitor\n"));
  
-  if (__kmp_yield_cycle) {
-    __kmp_yielding_on = 0; /* Start out with yielding shut off */
-    yield_count = __kmp_yield_off_count;
-  } else {
-    __kmp_yielding_on = 1; /* Yielding is on permanently */
-  }
-
    while (!TCR_4(__kmp_global.g.g_done)) {
      struct timespec now;
      struct timeval tval;
@@ -707,22 +698,6 @@ static void *__kmp_launch_monitor(void *thr) {
      status = pthread_mutex_unlock(&__kmp_wait_mx.m_mutex);
      KMP_CHECK_SYSFAIL("pthread_mutex_unlock", status);
  
-    if (__kmp_yield_cycle) {
-      yield_cycles++;
-      if ((yield_cycles % yield_count) == 0) {
-        if (__kmp_yielding_on) {
-          __kmp_yielding_on = 0; /* Turn it off now */
-          yield_count = __kmp_yield_off_count;
-        } else {
-          __kmp_yielding_on = 1; /* Turn it on now */
-          yield_count = __kmp_yield_on_count;
-        }
-        yield_cycles = 0;
-      }
-    } else {
-      __kmp_yielding_on = 1;
-    }
-
      TCW_4(__kmp_global.g.g_time.dt.t_value,
            TCR_4(__kmp_global.g.g_time.dt.t_value) + 1);
  
@@ -1011,8 +986,8 @@ retry:
    // Wait for the monitor thread is really started and set its *priority*.
    KMP_DEBUG_ASSERT(sizeof(kmp_uint32) ==
                     sizeof(__kmp_global.g.g_time.dt.t_value));
-  __kmp_wait_yield_4((kmp_uint32 volatile *)&__kmp_global.g.g_time.dt.t_value,
-                     -1, &__kmp_neq_4, NULL);
+  __kmp_wait_4((kmp_uint32 volatile *)&__kmp_global.g.g_time.dt.t_value, -1,
+               &__kmp_neq_4, NULL);
  #endif // KMP_REAL_TIME_FIX
  
  #ifdef KMP_THREAD_ATTR
@@ -1688,18 +1663,7 @@ void __kmp_resume_monitor() {
  }
  #endif // KMP_USE_MONITOR
  
-void __kmp_yield(int cond) {
-  if (!cond)
-    return;
-#if KMP_USE_MONITOR
-  if (!__kmp_yielding_on)
-    return;
-#else
-  if (__kmp_yield_cycle && !KMP_YIELD_NOW())
-    return;
-#endif
-  sched_yield();
-}
+void __kmp_yield() { sched_yield(); }
  
  void __kmp_gtid_set_specific(int gtid) {
    if (__kmp_init_gtid) {
diff --git a/openmp/runtime/src/z_Windows_NT_util.cpp b/openmp/runtime/src/z_Windows_NT_util.cpp

index 038ac86..0049ca8 100644 (file)
--- a/openmp/runtime/src/z_Windows_NT_util.cpp
+++ b/openmp/runtime/src/z_Windows_NT_util.cpp
@@ -483,10 +483,7 @@ void __kmp_resume_oncore(int target_gtid, kmp_flag_oncore *flag) {
    __kmp_resume_template(target_gtid, flag);
  }
  
-void __kmp_yield(int cond) {
-  if (cond)
-    Sleep(0);
-}
+void __kmp_yield() { Sleep(0); }
  
  void __kmp_gtid_set_specific(int gtid) {
    if (__kmp_init_gtid) {
@@ -1245,8 +1242,8 @@ static void __kmp_reap_common(kmp_info_t *th) {
       Right solution seems to be waiting for *either* thread termination *or*
       ds_alive resetting. */
    {
-    // TODO: This code is very similar to KMP_WAIT_YIELD. Need to generalize
-    // KMP_WAIT_YIELD to cover this usage also.
+    // TODO: This code is very similar to KMP_WAIT. Need to generalize
+    // KMP_WAIT to cover this usage also.
      void *obj = NULL;
      kmp_uint32 spins;
  #if USE_ITT_BUILD
@@ -1258,8 +1255,7 @@ static void __kmp_reap_common(kmp_info_t *th) {
        KMP_FSYNC_SPIN_PREPARE(obj);
  #endif /* USE_ITT_BUILD */
        __kmp_is_thread_alive(th, &exit_val);
-      KMP_YIELD(TCR_4(__kmp_nth) > __kmp_avail_proc);
-      KMP_YIELD_SPIN(spins);
+      KMP_YIELD_OVERSUB_ELSE_SPIN(spins);
      } while (exit_val == STILL_ACTIVE && TCR_4(th->th.th_info.ds.ds_alive));
  #if USE_ITT_BUILD
      if (exit_val == STILL_ACTIVE) {
author	Jonathan Peyton <jonathan.l.peyton@intel.com>
	Thu, 28 Feb 2019 19:11:29 +0000 (19:11 +0000)
committer	Jonathan Peyton <jonathan.l.peyton@intel.com>
	Thu, 28 Feb 2019 19:11:29 +0000 (19:11 +0000)
openmp/runtime/src/dllexports		patch \| blob \| history
openmp/runtime/src/exports_so.txt		patch \| blob \| history
openmp/runtime/src/kmp.h		patch \| blob \| history
openmp/runtime/src/kmp_csupport.cpp		patch \| blob \| history
openmp/runtime/src/kmp_dispatch.cpp		patch \| blob \| history
openmp/runtime/src/kmp_dispatch.h		patch \| blob \| history
openmp/runtime/src/kmp_dispatch_hier.h		patch \| blob \| history
openmp/runtime/src/kmp_global.cpp		patch \| blob \| history
openmp/runtime/src/kmp_itt.h		patch \| blob \| history
openmp/runtime/src/kmp_lock.cpp		patch \| blob \| history
openmp/runtime/src/kmp_lock.h		patch \| blob \| history
openmp/runtime/src/kmp_os.h		patch \| blob \| history
openmp/runtime/src/kmp_runtime.cpp		patch \| blob \| history
openmp/runtime/src/kmp_settings.cpp		patch \| blob \| history
openmp/runtime/src/kmp_tasking.cpp		patch \| blob \| history
openmp/runtime/src/kmp_taskq.cpp		patch \| blob \| history
openmp/runtime/src/kmp_wait_release.h		patch \| blob \| history
openmp/runtime/src/z_Linux_util.cpp		patch \| blob \| history
openmp/runtime/src/z_Windows_NT_util.cpp		patch \| blob \| history