[nfc][libomptarget] Move named_sync() into target_impl

author Jon Chesterfield <jonathanchesterfield@gmail.com>

Wed, 30 Oct 2019 13:39:49 +0000 (13:39 +0000)

committer JonChesterfield <jonathanchesterfield@gmail.com>

Wed, 30 Oct 2019 16:25:05 +0000 (16:25 +0000)
author Jon Chesterfield <jonathanchesterfield@gmail.com>
Wed, 30 Oct 2019 13:39:49 +0000 (13:39 +0000)
committer JonChesterfield <jonathanchesterfield@gmail.com>
Wed, 30 Oct 2019 16:25:05 +0000 (16:25 +0000)
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/loop.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/loop.cu

index 39dfebd..3eff3a1 100644 (file)
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/loop.cu
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/loop.cu
@@ -765,7 +765,7 @@ INLINE void syncWorkersInGenericMode(uint32_t NumThreads) {
    // is started, so we don't need a barrier.
    if (NumThreads > 1) {
  #endif
-    named_sync(L1_BARRIER, WARPSIZE * NumWarps);
+    __kmpc_impl_named_sync(L1_BARRIER, WARPSIZE * NumWarps);
  #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 700
    }
  #endif
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/reduction.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/reduction.cu

index cee3e5d..da7204d 100644 (file)
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/reduction.cu
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/reduction.cu
@@ -256,7 +256,7 @@ static int32_t nvptx_teams_reduce_nowait(int32_t global_tid, int32_t num_vars,
    // If we guard this barrier as follows it leads to deadlock, probably
    // because of a compiler bug: if (!IsGenericMode()) __syncthreads();
    uint16_t SyncWarps = (NumThreads + WARPSIZE - 1) / WARPSIZE;
-  named_sync(L1_BARRIER, SyncWarps * WARPSIZE);
+  __kmpc_impl_named_sync(L1_BARRIER, SyncWarps * WARPSIZE);
  
    // If this team is not the last, quit.
    if (/* Volatile read by all threads */ !IsLastTeam)
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/support.h b/openmp/libomptarget/deviceRTLs/nvptx/src/support.h

index e10f2a1..de685b8 100644 (file)
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/support.h
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/support.h
@@ -84,11 +84,6 @@ INLINE unsigned long PadBytes(unsigned long size, unsigned long alignment);
    ((void *)((char *)((void *)(_addr)) - (_bytes)))
  
  ////////////////////////////////////////////////////////////////////////////////
-// Named Barrier Routines
-////////////////////////////////////////////////////////////////////////////////
-INLINE void named_sync(const int barrier, const int num_threads);
-
-////////////////////////////////////////////////////////////////////////////////
  // Teams Reduction Scratchpad Helpers
  ////////////////////////////////////////////////////////////////////////////////
  INLINE unsigned int *GetTeamsReductionTimestamp();
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/supporti.h b/openmp/libomptarget/deviceRTLs/nvptx/src/supporti.h

index d4da6ad..6fa8578 100644 (file)
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/supporti.h
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/supporti.h
@@ -269,17 +269,6 @@ INLINE void *SafeFree(void *ptr, const char *msg) {
  }
  
  ////////////////////////////////////////////////////////////////////////////////
-// Named Barrier Routines
-////////////////////////////////////////////////////////////////////////////////
-
-INLINE void named_sync(const int barrier, const int num_threads) {
-  asm volatile("bar.sync %0, %1;"
-               :
-               : "r"(barrier), "r"(num_threads)
-               : "memory");
-}
-
-////////////////////////////////////////////////////////////////////////////////
  // Teams Reduction Scratchpad Helpers
  ////////////////////////////////////////////////////////////////////////////////
  
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/sync.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/sync.cu

index 28a5419..f36e877 100644 (file)
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/sync.cu
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/sync.cu
@@ -61,7 +61,7 @@ EXTERN void __kmpc_barrier(kmp_Ident *loc_ref, int32_t tid) {
                "call kmpc_barrier with %d omp threads, sync parameter %d\n",
                (int)numberOfActiveOMPThreads, (int)threads);
          // Barrier #1 is for synchronization among active threads.
-        named_sync(L1_BARRIER, threads);
+        __kmpc_impl_named_sync(L1_BARRIER, threads);
        }
      } else {
        // Still need to flush the memory per the standard.
@@ -92,7 +92,7 @@ EXTERN void __kmpc_barrier_simple_generic(kmp_Ident *loc_ref, int32_t tid) {
          "%d\n",
          (int)numberOfActiveOMPThreads, (int)threads);
    // Barrier #1 is for synchronization among active threads.
-  named_sync(L1_BARRIER, threads);
+  __kmpc_impl_named_sync(L1_BARRIER, threads);
    PRINT0(LD_SYNC, "completed kmpc_barrier_simple_generic\n");
  }
  
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h b/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h

index bbce9f1..95fe2ad 100644 (file)
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h
@@ -153,4 +153,11 @@ INLINE void __kmpc_impl_syncwarp(__kmpc_impl_lanemask_t Mask) {
  #endif // CUDA_VERSION
  }
  
+INLINE void __kmpc_impl_named_sync(int barrier, uint32_t num_threads) {
+  asm volatile("bar.sync %0, %1;"
+               :
+               : "r"(barrier), "r"(num_threads)
+               : "memory");
+}
+
  #endif
author	Jon Chesterfield <jonathanchesterfield@gmail.com>
	Wed, 30 Oct 2019 13:39:49 +0000 (13:39 +0000)
committer	JonChesterfield <jonathanchesterfield@gmail.com>
	Wed, 30 Oct 2019 16:25:05 +0000 (16:25 +0000)
openmp/libomptarget/deviceRTLs/nvptx/src/loop.cu		patch \| blob \| history
openmp/libomptarget/deviceRTLs/nvptx/src/reduction.cu		patch \| blob \| history
openmp/libomptarget/deviceRTLs/nvptx/src/support.h		patch \| blob \| history
openmp/libomptarget/deviceRTLs/nvptx/src/supporti.h		patch \| blob \| history
openmp/libomptarget/deviceRTLs/nvptx/src/sync.cu		patch \| blob \| history
openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h		patch \| blob \| history