[libomptarget] Refactor shfl_down_sync macro to inline function

author Jon Chesterfield <jonathanchesterfield@gmail.com>

Wed, 28 Aug 2019 01:47:41 +0000 (01:47 +0000)

committer Jon Chesterfield <jonathanchesterfield@gmail.com>

Wed, 28 Aug 2019 01:47:41 +0000 (01:47 +0000)
author Jon Chesterfield <jonathanchesterfield@gmail.com>
Wed, 28 Aug 2019 01:47:41 +0000 (01:47 +0000)
committer Jon Chesterfield <jonathanchesterfield@gmail.com>
Wed, 28 Aug 2019 01:47:41 +0000 (01:47 +0000)
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h b/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h

index 9fcd1a925834e4abef9c91a448a94ee695a284f6..0a0c6cc4334c46f6cb7662174358c6d308afa2ad 100644 (file)
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
@@ -51,13 +51,9 @@
  #ifndef CUDA_VERSION
  #error CUDA_VERSION macro is undefined, something wrong with cuda.
  #elif CUDA_VERSION >= 9000
-#define __SHFL_DOWN_SYNC(mask, var, delta, width)                              \
-  __shfl_down_sync((mask), (var), (delta), (width))
  #define __ACTIVEMASK() __activemask()
  #define __SYNCWARP(Mask) __syncwarp(Mask)
  #else
-#define __SHFL_DOWN_SYNC(mask, var, delta, width)                              \
-  __shfl_down((var), (delta), (width))
  #define __ACTIVEMASK() __ballot(1)
  // In Cuda < 9.0 no need to sync threads in warps.
  #define __SYNCWARP(Mask)
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/reduction.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/reduction.cu

index c9256383487854d9d060378d7fc7876375a5e675..e5e76d553117e08194f1f0ca87002ed127ae1825 100644 (file)
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/reduction.cu
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/reduction.cu
@@ -15,6 +15,7 @@
  #include <stdio.h>
  
  #include "omptarget-nvptx.h"
+#include "target_impl.h"
  
  EXTERN
  void __kmpc_nvptx_end_reduce(int32_t global_tid) {}
@@ -23,14 +24,14 @@ EXTERN
  void __kmpc_nvptx_end_reduce_nowait(int32_t global_tid) {}
  
  EXTERN int32_t __kmpc_shuffle_int32(int32_t val, int16_t delta, int16_t size) {
-  return __SHFL_DOWN_SYNC(0xFFFFFFFF, val, delta, size);
+  return __kmpc_impl_shfl_down_sync(0xFFFFFFFF, val, delta, size);
  }
  
  EXTERN int64_t __kmpc_shuffle_int64(int64_t val, int16_t delta, int16_t size) {
     int lo, hi;
     asm volatile("mov.b64 {%0,%1}, %2;" : "=r"(lo), "=r"(hi) : "l"(val));
-   hi = __SHFL_DOWN_SYNC(0xFFFFFFFF, hi, delta, size);
-   lo = __SHFL_DOWN_SYNC(0xFFFFFFFF, lo, delta, size);
+   hi = __kmpc_impl_shfl_down_sync(0xFFFFFFFF, hi, delta, size);
+   lo = __kmpc_impl_shfl_down_sync(0xFFFFFFFF, lo, delta, size);
     asm volatile("mov.b64 %0, {%1,%2};" : "=l"(val) : "r"(lo), "r"(hi));
     return val;
  }
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h b/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h

index caa9feafe037c220dfab49acfecbadae5adfcc2e..0f548289f9c3b7756612453e9b38306619e07c9a 100644 (file)
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h
@@ -43,6 +43,7 @@ INLINE int __kmpc_impl_popc(uint32_t x) { return __popc(x); }
  #endif
  
  // In Cuda 9.0, the *_sync() version takes an extra argument 'mask'.
+
  INLINE int32_t __kmpc_impl_shfl_sync(__kmpc_impl_lanemask_t Mask, int32_t Var,
                                       int32_t SrcLane) {
  #if CUDA_VERSION >= 9000
@@ -50,6 +51,15 @@ INLINE int32_t __kmpc_impl_shfl_sync(__kmpc_impl_lanemask_t Mask, int32_t Var,
  #else
    return __shfl(Var, SrcLane);
  #endif // CUDA_VERSION
+
+INLINE int32_t __kmpc_impl_shfl_down_sync(__kmpc_impl_lanemask_t Mask,
+                                          int32_t Var, uint32_t Delta,
+                                          int32_t Width) {
+#if CUDA_VERSION >= 9000
+  return __shfl_down_sync(Mask, Var, Delta, Width);
+#else
+  return __shfl_down(Var, Delta, Width);
+#endif // CUDA_VERSION
  }
  
  INLINE void __kmpc_impl_syncwarp(int32_t Mask) { __SYNCWARP(Mask); }
author	Jon Chesterfield <jonathanchesterfield@gmail.com>
	Wed, 28 Aug 2019 01:47:41 +0000 (01:47 +0000)
committer	Jon Chesterfield <jonathanchesterfield@gmail.com>
	Wed, 28 Aug 2019 01:47:41 +0000 (01:47 +0000)
openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h		patch \| blob \| history
openmp/libomptarget/deviceRTLs/nvptx/src/reduction.cu		patch \| blob \| history
openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h		patch \| blob \| history