return __ballot64(1);
}
-EXTERN bool __kmpc_impl_is_first_active_thread();
-
EXTERN int32_t __kmpc_impl_shfl_sync(__kmpc_impl_lanemask_t, int32_t Var,
int32_t SrcLane);
INLINE int GetNumberOfBlocksInKernel() { return __ockl_get_num_groups(0); }
INLINE int GetNumberOfThreadsInBlock() { return __ockl_get_local_size(0); }
+EXTERN bool __kmpc_impl_is_first_active_thread();
+
// Locks
EXTERN void __kmpc_impl_init_lock(omp_lock_t *lock);
EXTERN void __kmpc_impl_destroy_lock(omp_lock_t *lock);
#endif
}
-// Return true if this is the first active thread in the warp.
-INLINE bool __kmpc_impl_is_first_active_thread() {
- unsigned long long Mask = __kmpc_impl_activemask();
- unsigned long long ShNum = WARPSIZE - (GetThreadIdInBlock() % WARPSIZE);
- unsigned long long Sh = Mask << ShNum;
- // Truncate Sh to the 32 lower bits
- return (unsigned)Sh == 0;
-}
-
// In Cuda 9.0, the *_sync() version takes an extra argument 'mask'.
INLINE int32_t __kmpc_impl_shfl_sync(__kmpc_impl_lanemask_t Mask, int32_t Var,
INLINE int GetNumberOfBlocksInKernel() { return gridDim.x; }
INLINE int GetNumberOfThreadsInBlock() { return blockDim.x; }
+// Return true if this is the first active thread in the warp.
+INLINE bool __kmpc_impl_is_first_active_thread() {
+ unsigned long long Mask = __kmpc_impl_activemask();
+ unsigned long long ShNum = WARPSIZE - (GetThreadIdInBlock() % WARPSIZE);
+ unsigned long long Sh = Mask << ShNum;
+ // Truncate Sh to the 32 lower bits
+ return (unsigned)Sh == 0;
+}
+
// Locks
EXTERN void __kmpc_impl_init_lock(omp_lock_t *lock);
EXTERN void __kmpc_impl_destroy_lock(omp_lock_t *lock);