[OPNEMP, NVPTX] Fixed sychronization construct + code cleanup.

author Alexey Bataev <a.bataev@hotmail.com>

Mon, 23 Jul 2018 13:52:12 +0000 (13:52 +0000)

committer Alexey Bataev <a.bataev@hotmail.com>

Mon, 23 Jul 2018 13:52:12 +0000 (13:52 +0000)
author Alexey Bataev <a.bataev@hotmail.com>
Mon, 23 Jul 2018 13:52:12 +0000 (13:52 +0000)
committer Alexey Bataev <a.bataev@hotmail.com>
Mon, 23 Jul 2018 13:52:12 +0000 (13:52 +0000)
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/loop.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/loop.cu

index 60818afdaf90b25f28ab71ee0d288146e4b52e9d..f3e475d7108b1f507059a64660efd45e349d5423 100644 (file)
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/loop.cu
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/loop.cu
@@ -240,12 +240,8 @@ public:
  
      // Process schedule.
      if (tnum == 1 || tripCount <= 1 || OrderedSchedule(schedule)) {
-      if (OrderedSchedule(schedule)) {
-        if (isSPMDMode())
-          __syncthreads();
-        else
-          __kmpc_barrier(loc, threadId);
-      }
+      if (OrderedSchedule(schedule))
+        __kmpc_barrier(loc, threadId);
        PRINT(LD_LOOP,
              "go sequential as tnum=%ld, trip count %lld, ordered sched=%d\n",
              (long)tnum, P64(tripCount), schedule);
@@ -338,10 +334,7 @@ public:
              omptarget_nvptx_threadPrivateContext->Stride(tid));
  
      } else if (schedule == kmp_sched_dynamic || schedule == kmp_sched_guided) {
-      if (isSPMDMode())
-        __syncthreads();
-      else
-        __kmpc_barrier(loc, threadId);
+      __kmpc_barrier(loc, threadId);
        // save sched state
        int teamId = GetOmpTeamId();
        omptarget_nvptx_threadPrivateContext->ScheduleType(tid) = schedule;
@@ -352,10 +345,7 @@ public:
          omptarget_nvptx_threadPrivateContext->LoopUpperBound(teamId) = ub;
          omptarget_nvptx_threadPrivateContext->NextLowerBound(teamId) = lb;
        }
-      if (isSPMDMode())
-        __syncthreads();
-      else
-        __kmpc_barrier(loc, threadId);
+      __kmpc_barrier(loc, threadId);
        PRINT(LD_LOOP,
              "dispatch init (dyn) : num threads = %d, lb = %llu, ub = %" PRId64
              ", chunk %" PRIu64 "\n",
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/reduction.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/reduction.cu

index aedb6359f9acb0d874db073ed9b26d6283d30540..b813a11d20f44dc4bbb309753d0d4af628f54e77 100644 (file)
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/reduction.cu
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/reduction.cu
@@ -25,9 +25,8 @@ int32_t __gpu_block_reduce() {
    if (nt != blockDim.x)
      return 0;
    unsigned tnum = __ACTIVEMASK();
-  if (tnum != (~0x0)) { // assume swapSize is 32
+  if (tnum != (~0x0)) // assume swapSize is 32
      return 0;
-  }
    return 1;
  }
  
@@ -48,32 +47,21 @@ int32_t __kmpc_reduce_gpu(kmp_Indent *loc, int32_t global_tid, int32_t num_vars,
  
    if (numthread == 1)
      return 1;
-  else if (!__gpu_block_reduce())
+  if (!__gpu_block_reduce())
      return 2;
-  else {
-    if (threadIdx.x == 0)
-      return 1;
-    else
-      return 0;
-  }
+  if (threadIdx.x == 0)
+    return 1;
+  return 0;
  }
  
  EXTERN
  int32_t __kmpc_reduce_combined(kmp_Indent *loc) {
-  if (threadIdx.x == 0) {
-    return 2;
-  } else {
-    return 0;
-  }
+  return threadIdx.x == 0 ? 2 : 0;
  }
  
  EXTERN
  int32_t __kmpc_reduce_simd(kmp_Indent *loc) {
-  if (threadIdx.x % 32 == 0) {
-    return 1;
-  } else {
-    return 0;
-  }
+  return (threadIdx.x % 32 == 0) ? 1 : 0;
  }
  
  EXTERN
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/supporti.h b/openmp/libomptarget/deviceRTLs/nvptx/src/supporti.h

index 2b267c359d693ebf15ad2d6555e2c925349f5a8f..4de2039e42eafcc86bc640dcb107db49899587bd 100644 (file)
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/supporti.h
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/supporti.h
@@ -155,8 +155,7 @@ INLINE int IsTeamMaster(int ompThreadId) { return (ompThreadId == 0); }
  INLINE int GetNumberOfProcsInDevice() {
    if (isGenericMode())
      return GetNumberOfWorkersInTeam();
-  else
-    return GetNumberOfThreadsInBlock();
+  return GetNumberOfThreadsInBlock();
  }
  
  INLINE int GetNumberOfProcsInTeam() { return GetNumberOfProcsInDevice(); }
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/sync.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/sync.cu

index 68f08a16ac49111fe1f98de6e16632264f77fe87..7e55df8ca71db29217e0469ef85307d5d44367a0 100644 (file)
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/sync.cu
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/sync.cu
@@ -41,25 +41,21 @@ EXTERN int32_t __kmpc_cancel_barrier(kmp_Indent *loc_ref, int32_t tid) {
  }
  
  EXTERN void __kmpc_barrier(kmp_Indent *loc_ref, int32_t tid) {
-  if (isSPMDMode()) {
-    __kmpc_barrier_simple_spmd(loc_ref, tid);
-  } else if (isRuntimeUninitialized()) {
-    __kmpc_barrier_simple_generic(loc_ref, tid);
+  if (isRuntimeUninitialized()) {
+    if (isSPMDMode())
+      __kmpc_barrier_simple_spmd(loc_ref, tid);
+    else
+      __kmpc_barrier_simple_generic(loc_ref, tid);
    } else {
      tid = GetLogicalThreadIdInBlock();
      omptarget_nvptx_TaskDescr *currTaskDescr =
          omptarget_nvptx_threadPrivateContext->GetTopLevelTaskDescr(tid);
-    if (!currTaskDescr->InL2OrHigherParallelRegion()) {
-      int numberOfActiveOMPThreads =
-          GetNumberOfOmpThreads(tid, isSPMDMode(), isRuntimeUninitialized());
-#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 700
-      // On Volta and newer architectures we require that all lanes in
-      // a warp (at least, all present for the kernel launch) participate in the
-      // barrier.  This is enforced when launching the parallel region.  An
-      // exception is when there are < WARPSIZE workers.  In this case only 1
-      // worker is started, so we don't need a barrier.
-      if (numberOfActiveOMPThreads > 1) {
-#endif
+    int numberOfActiveOMPThreads = GetNumberOfOmpThreads(
+        tid, isSPMDMode(), /*isRuntimeUninitialized=*/false);
+    if (numberOfActiveOMPThreads > 1) {
+      if (isSPMDMode()) {
+        __kmpc_barrier_simple_spmd(loc_ref, tid);
+      } else {
          // The #threads parameter must be rounded up to the WARPSIZE.
          int threads =
              WARPSIZE * ((numberOfActiveOMPThreads + WARPSIZE - 1) / WARPSIZE);
@@ -69,10 +65,8 @@ EXTERN void __kmpc_barrier(kmp_Indent *loc_ref, int32_t tid) {
                numberOfActiveOMPThreads, threads);
          // Barrier #1 is for synchronization among active threads.
          named_sync(L1_BARRIER, threads);
-#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 700
-      } // numberOfActiveOMPThreads > 1
-#endif
-    }
+      }
+    } // numberOfActiveOMPThreads > 1
      PRINT0(LD_SYNC, "completed kmpc_barrier\n");
    }
  }
author	Alexey Bataev <a.bataev@hotmail.com>
	Mon, 23 Jul 2018 13:52:12 +0000 (13:52 +0000)
committer	Alexey Bataev <a.bataev@hotmail.com>
	Mon, 23 Jul 2018 13:52:12 +0000 (13:52 +0000)
openmp/libomptarget/deviceRTLs/nvptx/src/loop.cu		patch \| blob \| history
openmp/libomptarget/deviceRTLs/nvptx/src/reduction.cu		patch \| blob \| history
openmp/libomptarget/deviceRTLs/nvptx/src/supporti.h		patch \| blob \| history
openmp/libomptarget/deviceRTLs/nvptx/src/sync.cu		patch \| blob \| history