From 4f75a73796fff940fd30d38e3185c9e733ddbf2c Mon Sep 17 00:00:00 2001 From: Jon Chesterfield Date: Fri, 4 Oct 2019 21:39:22 +0000 Subject: [PATCH] Use named constant to indicate all lanes, to handle 32 and 64 wide architectures Summary: Use named constant to indicate all lanes, to handle 32 and 64 wide architectures Reviewers: ABataev, jdoerfert, grokos, ronlieb Reviewed By: grokos Subscribers: ronlieb, openmp-commits Tags: #openmp Differential Revision: https://reviews.llvm.org/D68369 llvm-svn: 373793 --- openmp/libomptarget/deviceRTLs/nvptx/src/parallel.cu | 4 ++-- openmp/libomptarget/deviceRTLs/nvptx/src/reduction.cu | 12 ++++++------ openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h | 2 ++ 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/parallel.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/parallel.cu index 5db443c..24a235d 100644 --- a/openmp/libomptarget/deviceRTLs/nvptx/src/parallel.cu +++ b/openmp/libomptarget/deviceRTLs/nvptx/src/parallel.cu @@ -320,7 +320,7 @@ EXTERN bool __kmpc_kernel_parallel(void **WorkFn, // can be changed incorrectly because of threads divergence. bool IsActiveParallelRegion = threadsInTeam != 1; IncParallelLevel(IsActiveParallelRegion, - IsActiveParallelRegion ? 0xFFFFFFFF : 1u); + IsActiveParallelRegion ? __kmpc_impl_all_lanes : 1u); } return isActive; @@ -347,7 +347,7 @@ EXTERN void __kmpc_kernel_end_parallel() { // be changed incorrectly because of threads divergence. bool IsActiveParallelRegion = threadsInTeam != 1; DecParallelLevel(IsActiveParallelRegion, - IsActiveParallelRegion ? 0xFFFFFFFF : 1u); + IsActiveParallelRegion ? __kmpc_impl_all_lanes : 1u); } //////////////////////////////////////////////////////////////////////////////// diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/reduction.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/reduction.cu index 347c556..cee3e5d 100644 --- a/openmp/libomptarget/deviceRTLs/nvptx/src/reduction.cu +++ b/openmp/libomptarget/deviceRTLs/nvptx/src/reduction.cu @@ -24,14 +24,14 @@ EXTERN void __kmpc_nvptx_end_reduce_nowait(int32_t global_tid) {} EXTERN int32_t __kmpc_shuffle_int32(int32_t val, int16_t delta, int16_t size) { - return __kmpc_impl_shfl_down_sync(0xFFFFFFFF, val, delta, size); + return __kmpc_impl_shfl_down_sync(__kmpc_impl_all_lanes, val, delta, size); } EXTERN int64_t __kmpc_shuffle_int64(int64_t val, int16_t delta, int16_t size) { uint32_t lo, hi; __kmpc_impl_unpack(val, lo, hi); - hi = __kmpc_impl_shfl_down_sync(0xFFFFFFFF, hi, delta, size); - lo = __kmpc_impl_shfl_down_sync(0xFFFFFFFF, lo, delta, size); + hi = __kmpc_impl_shfl_down_sync(__kmpc_impl_all_lanes, hi, delta, size); + lo = __kmpc_impl_shfl_down_sync(__kmpc_impl_all_lanes, lo, delta, size); return __kmpc_impl_pack(lo, hi); } @@ -82,7 +82,7 @@ int32_t __kmpc_nvptx_simd_reduce_nowait(int32_t global_tid, int32_t num_vars, kmp_ShuffleReductFctPtr shflFct, kmp_InterWarpCopyFctPtr cpyFct) { __kmpc_impl_lanemask_t Liveness = __kmpc_impl_activemask(); - if (Liveness == 0xffffffff) { + if (Liveness == __kmpc_impl_all_lanes) { gpu_regular_warp_reduce(reduce_data, shflFct); return GetThreadIdInBlock() % WARPSIZE == 0; // Result on lane 0 of the simd warp. @@ -143,7 +143,7 @@ static int32_t nvptx_parallel_reduce_nowait( return BlockThreadId == 0; #else __kmpc_impl_lanemask_t Liveness = __kmpc_impl_activemask(); - if (Liveness == 0xffffffff) // Full warp + if (Liveness == __kmpc_impl_all_lanes) // Full warp gpu_regular_warp_reduce(reduce_data, shflFct); else if (!(Liveness & (Liveness + 1))) // Partial warp but contiguous lanes gpu_irregular_warp_reduce(reduce_data, shflFct, @@ -318,7 +318,7 @@ static int32_t nvptx_teams_reduce_nowait(int32_t global_tid, int32_t num_vars, // Reduce across warps to the warp master. __kmpc_impl_lanemask_t Liveness = __kmpc_impl_activemask(); - if (Liveness == 0xffffffff) // Full warp + if (Liveness == __kmpc_impl_all_lanes) // Full warp gpu_regular_warp_reduce(reduce_data, shflFct); else // Partial warp but contiguous lanes gpu_irregular_warp_reduce(reduce_data, shflFct, diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h b/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h index 8008143..37a125d 100644 --- a/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h +++ b/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h @@ -27,6 +27,8 @@ INLINE uint64_t __kmpc_impl_pack(uint32_t lo, uint32_t hi) { } typedef uint32_t __kmpc_impl_lanemask_t; +static const __kmpc_impl_lanemask_t __kmpc_impl_all_lanes = + UINT32_C(0xffffffff); INLINE __kmpc_impl_lanemask_t __kmpc_impl_lanemask_lt() { __kmpc_impl_lanemask_t res; -- 2.7.4