Summary:
This reverts commit
dd8a7fcdd73dd63529b81bf9f72c7529dfe99ec3.
Alexey reports undefined symbols for the new inline functions defined in target_impl.h
This does not reproduce for me for nvptx, or amdgcn, under release or debug builds.
I believe the patch is fine, based on:
- the semantics of an inline function in C++ (the cuda INLINE functions end
up as linkonce_odr in IR), which are only legal to drop if they have no uses
- the code generated from a debug build of clang 9 does not show these undef symbols
- the tests pass
- the code is trivial
To progress from here I either need:
- A tie break - someone to play the role of CI in determining whether the patch works
- Alexey to provide sufficient information about his build for me to reproduce the failure
- Alexey to debug why the symbols are disappearing for him and report back
Reviewers: ABataev, jdoerfert, grokos
Subscribers: jvesely, openmp-commits
Tags: #openmp
Differential Revision: https://reviews.llvm.org/D71502
EXTERN void __kmpc_impl_threadfence_block(void);
EXTERN void __kmpc_impl_threadfence_system(void);
+// Calls to the AMDGCN layer (assuming 1D layout)
+EXTERN uint64_t __ockl_get_local_size(uint32_t);
+EXTERN uint64_t __ockl_get_num_groups(uint32_t);
+INLINE int GetThreadIdInBlock() { return __builtin_amdgcn_workitem_id_x(); }
+INLINE int GetBlockIdInKernel() { return __builtin_amdgcn_workgroup_id_x(); }
+INLINE int GetNumberOfBlocksInKernel() { return __ockl_get_num_groups(0); }
+INLINE int GetNumberOfThreadsInBlock() { return __ockl_get_local_size(0); }
+
// DEVICE versions of part of libc
extern "C" {
DEVICE __attribute__((noreturn)) void
//
////////////////////////////////////////////////////////////////////////////////
-DEVICE int GetThreadIdInBlock() { return threadIdx.x; }
-
-DEVICE int GetBlockIdInKernel() { return blockIdx.x; }
-
-DEVICE int GetNumberOfBlocksInKernel() { return gridDim.x; }
-
-DEVICE int GetNumberOfThreadsInBlock() { return blockDim.x; }
-
DEVICE unsigned GetWarpId() { return GetThreadIdInBlock() / WARPSIZE; }
DEVICE unsigned GetLaneId() { return GetThreadIdInBlock() & (WARPSIZE - 1); }
-//===--------- support.h - NVPTX OpenMP support functions -------- CUDA -*-===//
+//===--------- support.h - OpenMP GPU support functions ---------- CUDA -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
////////////////////////////////////////////////////////////////////////////////
// get low level ids of resources
-DEVICE int GetThreadIdInBlock();
-DEVICE int GetBlockIdInKernel();
-DEVICE int GetNumberOfBlocksInKernel();
-DEVICE int GetNumberOfThreadsInBlock();
DEVICE unsigned GetWarpId();
DEVICE unsigned GetLaneId();
INLINE void __kmpc_impl_threadfence_block(void) { __threadfence_block(); }
INLINE void __kmpc_impl_threadfence_system(void) { __threadfence_system(); }
+// Calls to the NVPTX layer (assuming 1D layout)
+INLINE int GetThreadIdInBlock() { return threadIdx.x; }
+INLINE int GetBlockIdInKernel() { return blockIdx.x; }
+INLINE int GetNumberOfBlocksInKernel() { return gridDim.x; }
+INLINE int GetNumberOfThreadsInBlock() { return blockDim.x; }
+
#endif