unsigned char RegParmMax, SSERegParmMax;
TargetCXXABI TheCXXABI;
const LangASMap *AddrSpaceMap;
- const unsigned *GridValues =
- nullptr; // Array of target-specific GPU grid values that must be
+ const llvm::omp::GV *GridValues =
+ nullptr; // target-specific GPU grid values that must be
// consistent between host RTL (plugin), device RTL, and clang.
mutable StringRef PlatformName;
return LangAS::Default;
}
- /// Return a target-specific GPU grid value based on the GVIDX enum \p gv
- unsigned getGridValue(llvm::omp::GVIDX gv) const {
+ /// Return a target-specific GPU grid values
+ const llvm::omp::GV &getGridValue() const {
assert(GridValues != nullptr && "GridValues not initialized");
- return GridValues[gv];
+ return *GridValues;
}
/// Retrieve the name of the platform as it is used in the
llvm::AMDGPU::getArchAttrR600(GPUKind)) {
resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN
: DataLayoutStringR600);
- GridValues = llvm::omp::AMDGPUGpuGridValues;
+ GridValues = &llvm::omp::AMDGPUGridValues;
setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
!isAMDGCN(Triple));
TLSSupported = false;
VLASupported = false;
AddrSpaceMap = &NVPTXAddrSpaceMap;
- GridValues = llvm::omp::NVPTXGpuGridValues;
+ GridValues = &llvm::omp::NVPTXGridValues;
UseAddrSpaceMapMangling = true;
// Define available target features
#include "clang/AST/StmtVisitor.h"
#include "clang/Basic/Cuda.h"
#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Frontend/OpenMP/OMPGridValues.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
using namespace clang;
llvm::Value *CGOpenMPRuntimeAMDGCN::getGPUWarpSize(CodeGenFunction &CGF) {
CGBuilderTy &Bld = CGF.Builder;
// return constant compile-time target-specific warp size
- unsigned WarpSize = CGF.getTarget().getGridValue(llvm::omp::GV_Warp_Size);
+ unsigned WarpSize = CGF.getTarget().getGridValue().GV_Warp_Size;
return Bld.getInt32(WarpSize);
}
assert(!GlobalizedRD &&
"Record for globalized variables is built already.");
ArrayRef<const ValueDecl *> EscapedDeclsForParallel, EscapedDeclsForTeams;
- unsigned WarpSize = CGF.getTarget().getGridValue(llvm::omp::GV_Warp_Size);
+ unsigned WarpSize = CGF.getTarget().getGridValue().GV_Warp_Size;
if (IsInTTDRegion)
EscapedDeclsForTeams = EscapedDecls.getArrayRef();
else
/// on the NVPTX device, to generate more efficient code.
static llvm::Value *getNVPTXWarpID(CodeGenFunction &CGF) {
CGBuilderTy &Bld = CGF.Builder;
- unsigned LaneIDBits =
- CGF.getTarget().getGridValue(llvm::omp::GV_Warp_Size_Log2);
+ unsigned LaneIDBits = CGF.getTarget().getGridValue().GV_Warp_Size_Log2;
auto &RT = static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime());
return Bld.CreateAShr(RT.getGPUThreadID(CGF), LaneIDBits, "nvptx_warp_id");
}
/// on the NVPTX device, to generate more efficient code.
static llvm::Value *getNVPTXLaneID(CodeGenFunction &CGF) {
CGBuilderTy &Bld = CGF.Builder;
- unsigned LaneIDMask = CGF.getContext().getTargetInfo().getGridValue(
- llvm::omp::GV_Warp_Size_Log2_Mask);
+ unsigned LaneIDMask =
+ CGF.getContext().getTargetInfo().getGridValue().GV_Warp_Size_Log2_Mask;
auto &RT = static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime());
return Bld.CreateAnd(RT.getGPUThreadID(CGF), Bld.getInt32(LaneIDMask),
"nvptx_lane_id");
const RecordDecl *GlobalizedRD = nullptr;
llvm::SmallVector<const ValueDecl *, 4> LastPrivatesReductions;
llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *> MappedDeclsFields;
- unsigned WarpSize = CGM.getTarget().getGridValue(llvm::omp::GV_Warp_Size);
+ unsigned WarpSize = CGM.getTarget().getGridValue().GV_Warp_Size;
// Globalize team reductions variable unconditionally in all modes.
if (getExecutionMode() != CGOpenMPRuntimeGPU::EM_SPMD)
getTeamsReductionVars(CGM.getContext(), D, LastPrivatesReductions);
"__openmp_nvptx_data_transfer_temporary_storage";
llvm::GlobalVariable *TransferMedium =
M.getGlobalVariable(TransferMediumName);
- unsigned WarpSize = CGF.getTarget().getGridValue(llvm::omp::GV_Warp_Size);
+ unsigned WarpSize = CGF.getTarget().getGridValue().GV_Warp_Size;
if (!TransferMedium) {
auto *Ty = llvm::ArrayType::get(CGM.Int32Ty, WarpSize);
unsigned SharedAddressSpace = C.getTargetAddressSpace(LangAS::cuda_shared);
#include "CGOpenMPRuntime.h"
#include "CodeGenFunction.h"
#include "clang/AST/StmtOpenMP.h"
-#include "llvm/Frontend/OpenMP/OMPGridValues.h"
namespace clang {
namespace CodeGen {
///
/// Example usage in clang:
/// const unsigned slot_size =
-/// ctx.GetTargetInfo().getGridValue(llvm::omp::GVIDX::GV_Warp_Size);
+/// ctx.GetTargetInfo().getGridValue().GV_Warp_Size;
///
/// Example usage in libomptarget/deviceRTLs:
/// #include "llvm/Frontend/OpenMP/OMPGridValues.h"
/// #ifdef __AMDGPU__
-/// #define GRIDVAL AMDGPUGpuGridValues
+/// #define GRIDVAL AMDGPUGridValues
/// #else
-/// #define GRIDVAL NVPTXGpuGridValues
+/// #define GRIDVAL NVPTXGridValues
/// #endif
/// ... Then use this reference for GV_Warp_Size in the deviceRTL source.
-/// llvm::omp::GRIDVAL[llvm::omp::GVIDX::GV_Warp_Size]
+/// llvm::omp::GRIDVAL().GV_Warp_Size
///
/// Example usage in libomptarget hsa plugin:
/// #include "llvm/Frontend/OpenMP/OMPGridValues.h"
-/// #define GRIDVAL AMDGPUGpuGridValues
+/// #define GRIDVAL AMDGPUGridValues
/// ... Then use this reference to access GV_Warp_Size in the hsa plugin.
-/// llvm::omp::GRIDVAL[llvm::omp::GVIDX::GV_Warp_Size]
+/// llvm::omp::GRIDVAL().GV_Warp_Size
///
/// Example usage in libomptarget cuda plugin:
/// #include "llvm/Frontend/OpenMP/OMPGridValues.h"
-/// #define GRIDVAL NVPTXGpuGridValues
+/// #define GRIDVAL NVPTXGridValues
/// ... Then use this reference to access GV_Warp_Size in the cuda plugin.
-/// llvm::omp::GRIDVAL[llvm::omp::GVIDX::GV_Warp_Size]
+/// llvm::omp::GRIDVAL().GV_Warp_Size
///
-enum GVIDX {
+
+struct GV {
/// The maximum number of workers in a kernel.
/// (THREAD_ABSOLUTE_LIMIT) - (GV_Warp_Size), might be issue for blockDim.z
- GV_Threads,
+ const unsigned GV_Threads;
/// The size reserved for data in a shared memory slot.
- GV_Slot_Size,
+ const unsigned GV_Slot_Size;
/// The default value of maximum number of threads in a worker warp.
- GV_Warp_Size,
+ const unsigned GV_Warp_Size;
/// Alternate warp size for some AMDGCN architectures. Same as GV_Warp_Size
/// for NVPTX.
- GV_Warp_Size_32,
+ const unsigned GV_Warp_Size_32;
/// The number of bits required to represent the max number of threads in warp
- GV_Warp_Size_Log2,
+ const unsigned GV_Warp_Size_Log2;
/// GV_Warp_Size * GV_Slot_Size,
- GV_Warp_Slot_Size,
+ const unsigned GV_Warp_Slot_Size;
/// the maximum number of teams.
- GV_Max_Teams,
+ const unsigned GV_Max_Teams;
/// Global Memory Alignment
- GV_Mem_Align,
+ const unsigned GV_Mem_Align;
/// (~0u >> (GV_Warp_Size - GV_Warp_Size_Log2))
- GV_Warp_Size_Log2_Mask,
+ const unsigned GV_Warp_Size_Log2_Mask;
// An alternative to the heavy data sharing infrastructure that uses global
// memory is one that uses device __shared__ memory. The amount of such space
// (in bytes) reserved by the OpenMP runtime is noted here.
- GV_SimpleBufferSize,
+ const unsigned GV_SimpleBufferSize;
// The absolute maximum team size for a working group
- GV_Max_WG_Size,
+ const unsigned GV_Max_WG_Size;
// The default maximum team size for a working group
- GV_Default_WG_Size,
+ const unsigned GV_Default_WG_Size;
// This is GV_Max_WG_Size / GV_WarpSize. 32 for NVPTX and 16 for AMDGCN.
- GV_Max_Warp_Number,
+ const unsigned GV_Max_Warp_Number;
/// The slot size that should be reserved for a working warp.
/// (~0u >> (GV_Warp_Size - GV_Warp_Size_Log2))
- GV_Warp_Size_Log2_MaskL
+ const unsigned GV_Warp_Size_Log2_MaskL;
};
/// For AMDGPU GPUs
-static constexpr unsigned AMDGPUGpuGridValues[] = {
+static constexpr GV AMDGPUGridValues = {
448, // GV_Threads
256, // GV_Slot_Size
64, // GV_Warp_Size
};
/// For Nvidia GPUs
-static constexpr unsigned NVPTXGpuGridValues[] = {
+static constexpr GV NVPTXGridValues = {
992, // GV_Threads
256, // GV_Slot_Size
32, // GV_Warp_Size
static const unsigned HardTeamLimit =
(1 << 16) - 1; // 64K needed to fit in uint16
static const int DefaultNumTeams = 128;
- static const int Max_Teams =
- llvm::omp::AMDGPUGpuGridValues[llvm::omp::GVIDX::GV_Max_Teams];
- static const int Warp_Size =
- llvm::omp::AMDGPUGpuGridValues[llvm::omp::GVIDX::GV_Warp_Size];
- static const int Max_WG_Size =
- llvm::omp::AMDGPUGpuGridValues[llvm::omp::GVIDX::GV_Max_WG_Size];
+ static const int Max_Teams = llvm::omp::AMDGPUGridValues.GV_Max_Teams;
+ static const int Warp_Size = llvm::omp::AMDGPUGridValues.GV_Warp_Size;
+ static const int Max_WG_Size = llvm::omp::AMDGPUGridValues.GV_Max_WG_Size;
static const int Default_WG_Size =
- llvm::omp::AMDGPUGpuGridValues[llvm::omp::GVIDX::GV_Default_WG_Size];
+ llvm::omp::AMDGPUGridValues.GV_Default_WG_Size;
using MemcpyFunc = hsa_status_t (*)(hsa_signal_t, void *, const void *,
size_t size, hsa_agent_t);
DeviceInfo.WarpSize[device_id] = wavefront_size;
} else {
DP("Default wavefront size: %d\n",
- llvm::omp::AMDGPUGpuGridValues[llvm::omp::GVIDX::GV_Warp_Size]);
- DeviceInfo.WarpSize[device_id] =
- llvm::omp::AMDGPUGpuGridValues[llvm::omp::GVIDX::GV_Warp_Size];
+ llvm::omp::AMDGPUGridValues.GV_Warp_Size);
+ DeviceInfo.WarpSize[device_id] = llvm::omp::AMDGPUGridValues.GV_Warp_Size;
}
// Adjust teams to the env variables