[libomptarget][nfc] Use cuda variable wrappers from support.h

author JonChesterfield <jonathanchesterfield@gmail.com>

Thu, 14 Nov 2019 12:43:56 +0000 (12:43 +0000)

committer Jon Chesterfield <jonathanchesterfield@gmail.com>

Thu, 14 Nov 2019 12:45:09 +0000 (12:45 +0000)
author JonChesterfield <jonathanchesterfield@gmail.com>
Thu, 14 Nov 2019 12:43:56 +0000 (12:43 +0000)
committer Jon Chesterfield <jonathanchesterfield@gmail.com>
Thu, 14 Nov 2019 12:45:09 +0000 (12:45 +0000)
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu

index 78b04ec5cffe4fe834b6f2915faef44c6bdb8172..f2892acb790acf8f7e3edf9ef79749c11a6a229a 100644 (file)
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu
@@ -13,11 +13,6 @@
  #include "target_impl.h"
  #include <stdio.h>
  
-// Warp ID in the CUDA block
-INLINE static unsigned getWarpId() { return threadIdx.x / WARPSIZE; }
-// Lane ID in the CUDA warp.
-INLINE static unsigned getLaneId() { return threadIdx.x % WARPSIZE; }
-
  // Return true if this is the first active thread in the warp.
  INLINE static bool IsWarpMasterActiveThread() {
    unsigned long long Mask = __kmpc_impl_activemask();
@@ -67,7 +62,7 @@ __kmpc_initialize_data_sharing_environment(__kmpc_data_sharing_slot *rootS,
    DSPRINT0(DSFLAG_INIT,
             "Entering __kmpc_initialize_data_sharing_environment\n");
  
-  unsigned WID = getWarpId();
+  unsigned WID = GetWarpId();
    DSPRINT(DSFLAG_INIT, "Warp ID: %u\n", WID);
  
    omptarget_nvptx_TeamDescr *teamDescr =
@@ -111,7 +106,7 @@ EXTERN void *__kmpc_data_sharing_environment_begin(
    DSPRINT(DSFLAG, "Default Data Size %016llx\n",
            (unsigned long long)SharingDefaultDataSize);
  
-  unsigned WID = getWarpId();
+  unsigned WID = GetWarpId();
    __kmpc_impl_lanemask_t CurActiveThreads = __kmpc_impl_activemask();
  
    __kmpc_data_sharing_slot *&SlotP = DataSharingState.SlotPtr[WID];
@@ -231,7 +226,7 @@ EXTERN void __kmpc_data_sharing_environment_end(
  
    DSPRINT0(DSFLAG, "Entering __kmpc_data_sharing_environment_end\n");
  
-  unsigned WID = getWarpId();
+  unsigned WID = GetWarpId();
  
    if (IsEntryPoint) {
      if (IsWarpMasterActiveThread()) {
@@ -359,7 +354,7 @@ EXTERN void __kmpc_data_sharing_init_stack_spmd() {
    // This function initializes the stack pointer with the pointer to the
    // statically allocated shared memory slots. The size of a shared memory
    // slot is pre-determined to be 256 bytes.
-  if (threadIdx.x == 0)
+  if (GetThreadIdInBlock() == 0)
      data_sharing_init_stack_common();
  
    __threadfence_block();
@@ -377,7 +372,7 @@ INLINE static void* data_sharing_push_stack_common(size_t PushSize) {
    PushSize = (PushSize + (Alignment - 1)) / Alignment * Alignment;
  
    // Frame pointer must be visible to all workers in the same warp.
-  const unsigned WID = getWarpId();
+  const unsigned WID = GetWarpId();
    void *FrameP = 0;
    __kmpc_impl_lanemask_t CurActive = __kmpc_impl_activemask();
  
@@ -467,7 +462,7 @@ EXTERN void *__kmpc_data_sharing_push_stack(size_t DataSize,
    // Compute the start address of the frame of each thread in the warp.
    uintptr_t FrameStartAddress =
        (uintptr_t) data_sharing_push_stack_common(PushSize);
-  FrameStartAddress += (uintptr_t) (getLaneId() * DataSize);
+  FrameStartAddress += (uintptr_t) (GetLaneId() * DataSize);
    return (void *)FrameStartAddress;
  }
  
@@ -482,7 +477,7 @@ EXTERN void __kmpc_data_sharing_pop_stack(void *FrameStart) {
    __threadfence_block();
  
    if (GetThreadIdInBlock() % WARPSIZE == 0) {
-    unsigned WID = getWarpId();
+    unsigned WID = GetWarpId();
  
      // Current slot
      __kmpc_data_sharing_slot *&SlotP = DataSharingState.SlotPtr[WID];
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/debug.h b/openmp/libomptarget/deviceRTLs/nvptx/src/debug.h

index 1052392155a738ab26df529530d6249e9f75fb82..3388b04616f4f334212d5722f35fa0fe8665fe8c 100644 (file)
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/debug.h
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/debug.h
@@ -128,12 +128,12 @@
  
  #if OMPTARGET_NVPTX_DEBUG || OMPTARGET_NVPTX_TEST || OMPTARGET_NVPTX_WARNING
  #include <stdio.h>
-#include "target_impl.h"
+#include "support.h"
  
  template <typename... Arguments>
  NOINLINE static void log(const char *fmt, Arguments... parameters) {
-  printf(fmt, (int)blockIdx.x, (int)threadIdx.x, (int)(threadIdx.x / WARPSIZE),
-         (int)(threadIdx.x & 0x1F), parameters...);
+  printf(fmt, (int)GetBlockIdInKernel(), (int)GetThreadIdInBlock(),
+         (int)GetWarpId(), (int)GetLaneId(), parameters...);
  }
  
  #endif
@@ -144,9 +144,8 @@ template <typename... Arguments>
  NOINLINE static void check(bool cond, const char *fmt,
                             Arguments... parameters) {
    if (!cond)
-    printf(fmt, (int)blockIdx.x, (int)threadIdx.x,
-           (int)(threadIdx.x / WARPSIZE), (int)(threadIdx.x & 0x1F),
-           parameters...);
+    printf(fmt, (int)GetBlockIdInKernel(), (int)GetThreadIdInBlock(),
+           (int)GetWarpId(), (int)GetLaneId(), parameters...);
    assert(cond);
  }
  
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/libcall.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/libcall.cu

index 20a22f425324f32d1dec8effa6eb0441a06d15ab..e86b1d1fdbf309e57883a42ee82a8b5f479f207d 100644 (file)
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/libcall.cu
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/libcall.cu
@@ -364,7 +364,7 @@ EXTERN void omp_set_lock(omp_lock_t *lock) {
      for (;;) {
        now = clock();
        clock_t cycles = now > start ? now - start : now + (0xffffffff - start);
-      if (cycles >= __OMP_SPIN * blockIdx.x) {
+      if (cycles >= __OMP_SPIN * GetBlockIdInKernel()) {
          break;
        }
      }
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/support.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/support.cu

index a7aa8aa814ab70513b600e98070d1324d655d005..7a022e1680ff25211b3a0d2c2057dfc98eaa2bb6 100644 (file)
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/support.cu
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/support.cu
@@ -106,9 +106,9 @@ DEVICE int GetNumberOfBlocksInKernel() { return gridDim.x; }
  
  DEVICE int GetNumberOfThreadsInBlock() { return blockDim.x; }
  
-DEVICE unsigned GetWarpId() { return threadIdx.x / WARPSIZE; }
+DEVICE unsigned GetWarpId() { return GetThreadIdInBlock() / WARPSIZE; }
  
-DEVICE unsigned GetLaneId() { return threadIdx.x & (WARPSIZE - 1); }
+DEVICE unsigned GetLaneId() { return GetThreadIdInBlock() & (WARPSIZE - 1); }
  
  ////////////////////////////////////////////////////////////////////////////////
  //
@@ -124,7 +124,7 @@ DEVICE unsigned GetLaneId() { return threadIdx.x & (WARPSIZE - 1); }
  //      If NumThreads is 1024, master id is 992.
  //
  // Called in Generic Execution Mode only.
-DEVICE int GetMasterThreadID() { return (blockDim.x - 1) & ~(WARPSIZE - 1); }
+DEVICE int GetMasterThreadID() { return (GetNumberOfThreadsInBlock() - 1) & ~(WARPSIZE - 1); }
  
  // The last warp is reserved for the master; other warps are workers.
  // Called in Generic Execution Mode only.
author	JonChesterfield <jonathanchesterfield@gmail.com>
	Thu, 14 Nov 2019 12:43:56 +0000 (12:43 +0000)
committer	Jon Chesterfield <jonathanchesterfield@gmail.com>
	Thu, 14 Nov 2019 12:45:09 +0000 (12:45 +0000)
openmp/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu		patch \| blob \| history
openmp/libomptarget/deviceRTLs/nvptx/src/debug.h		patch \| blob \| history
openmp/libomptarget/deviceRTLs/nvptx/src/libcall.cu		patch \| blob \| history
openmp/libomptarget/deviceRTLs/nvptx/src/support.cu		patch \| blob \| history