#define LLVM_CLANG_BASIC_TARGETINFO_H
#include "clang/Basic/AddressSpaces.h"
+#include "clang/Basic/CodeGenOptions.h"
#include "clang/Basic/LLVM.h"
#include "clang/Basic/LangOptions.h"
#include "clang/Basic/Specifiers.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
+#include "llvm/Frontend/OpenMP/OMPGridValues.h"
#include "llvm/Support/DataTypes.h"
#include "llvm/Support/VersionTuple.h"
#include <cassert>
unsigned char RegParmMax, SSERegParmMax;
TargetCXXABI TheCXXABI;
const LangASMap *AddrSpaceMap;
+ const unsigned *GridValues =
+ nullptr; // Array of target-specific GPU grid values that must be
+ // consistent between host RTL (plugin), device RTL, and clang.
mutable StringRef PlatformName;
mutable VersionTuple PlatformMinVersion;
return LangAS::Default;
}
+ /// Return a target-specific GPU grid value based on the GVIDX enum \param gv
+ unsigned getGridValue(llvm::omp::GVIDX gv) const {
+ assert(GridValues != nullptr && "GridValues not initialized");
+ return GridValues[gv];
+ }
+
/// Retrieve the name of the platform as it is used in the
/// availability attribute.
StringRef getPlatformName() const { return PlatformName; }
--- /dev/null
+//====--- OMPGridValues.h - Language-specific address spaces --*- C++ -*-====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief Provides definitions for Target specific Grid Values
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_OPENMP_GRIDVALUES_H
+#define LLVM_OPENMP_GRIDVALUES_H
+
+namespace llvm {
+
+namespace omp {
+
+/// \brief Defines various target-specific GPU grid values that must be
+/// consistent between host RTL (plugin), device RTL, and clang.
+/// We can change grid values for a "fat" binary so that different
+/// passes get the correct values when generating code for a
+/// multi-target binary. Both amdgcn and nvptx values are stored in
+/// this file. In the future, should there be differences between GPUs
+/// of the same architecture, then simply make a different array and
+/// use the new array name.
+///
+/// Example usage in clang:
+/// const unsigned slot_size = ctx.GetTargetInfo().getGridValue(GV_Warp_Size);
+///
+/// Example usage in libomptarget/deviceRTLs:
+/// #include "OMPGridValues.h"
+/// #ifdef __AMDGPU__
+/// #define GRIDVAL AMDGPUGpuGridValues
+/// #else
+/// #define GRIDVAL NVPTXGpuGridValues
+/// #endif
+/// ... Then use this reference for GV_Warp_Size in the deviceRTL source.
+/// GRIDVAL[GV_Warp_Size]
+///
+/// Example usage in libomptarget hsa plugin:
+/// #include "OMPGridValues.h"
+/// #define GRIDVAL AMDGPUGpuGridValues
+/// ... Then use this reference to access GV_Warp_Size in the hsa plugin.
+/// GRIDVAL[GV_Warp_Size]
+///
+/// Example usage in libomptarget cuda plugin:
+/// #include "OMPGridValues.h"
+/// #define GRIDVAL NVPTXGpuGridValues
+/// ... Then use this reference to access GV_Warp_Size in the cuda plugin.
+/// GRIDVAL[GV_Warp_Size]
+///
+enum GVIDX {
+ /// The maximum number of workers in a kernel.
+ /// (THREAD_ABSOLUTE_LIMIT) - (GV_Warp_Size), might be issue for blockDim.z
+ GV_Threads,
+ /// The size reserved for data in a shared memory slot.
+ GV_Slot_Size,
+ /// The default value of maximum number of threads in a worker warp.
+ GV_Warp_Size,
+ /// Alternate warp size for some AMDGCN architectures. Same as GV_Warp_Size
+ /// for NVPTX.
+ GV_Warp_Size_32,
+ /// The number of bits required to represent the max number of threads in warp
+ GV_Warp_Size_Log2,
+ /// GV_Warp_Size * GV_Slot_Size,
+ GV_Warp_Slot_Size,
+ /// the maximum number of teams.
+ GV_Max_Teams,
+ /// Global Memory Alignment
+ GV_Mem_Align,
+ /// (~0u >> (GV_Warp_Size - GV_Warp_Size_Log2))
+ GV_Warp_Size_Log2_Mask,
+ // An alternative to the heavy data sharing infrastructure that uses global
+ // memory is one that uses device __shared__ memory. The amount of such space
+ // (in bytes) reserved by the OpenMP runtime is noted here.
+ GV_SimpleBufferSize,
+ // The absolute maximum team size for a working group
+ GV_Max_WG_Size,
+ // The default maximum team size for a working group
+ GV_Default_WG_Size,
+ // This is GV_Max_WG_Size / GV_WarpSize. 32 for NVPTX and 16 for AMDGCN.
+ GV_Max_Warp_Number,
+ /// The slot size that should be reserved for a working warp.
+ /// (~0u >> (GV_Warp_Size - GV_Warp_Size_Log2))
+ GV_Warp_Size_Log2_MaskL
+};
+
+/// For AMDGPU GPUs
+static constexpr unsigned AMDGPUGpuGridValues[] = {
+ 448, // GV_Threads
+ 256, // GV_Slot_Size
+ 64, // GV_Warp_Size
+ 32, // GV_Warp_Size_32
+ 6, // GV_Warp_Size_Log2
+ 64 * 256, // GV_Warp_Slot_Size
+ 128, // GV_Max_Teams
+ 256, // GV_Mem_Align
+ 63, // GV_Warp_Size_Log2_Mask
+ 896, // GV_SimpleBufferSize
+ 1024, // GV_Max_WG_Size,
+ 256, // GV_Defaut_WG_Size
+ 1024 / 64, // GV_Max_WG_Size / GV_WarpSize
+ 63 // GV_Warp_Size_Log2_MaskL
+};
+
+/// For Nvidia GPUs
+static constexpr unsigned NVPTXGpuGridValues[] = {
+ 992, // GV_Threads
+ 256, // GV_Slot_Size
+ 32, // GV_Warp_Size
+ 32, // GV_Warp_Size_32
+ 5, // GV_Warp_Size_Log2
+ 32 * 256, // GV_Warp_Slot_Size
+ 1024, // GV_Max_Teams
+ 256, // GV_Mem_Align
+ (~0u >> (32 - 5)), // GV_Warp_Size_Log2_Mask
+ 896, // GV_SimpleBufferSize
+ 1024, // GV_Max_WG_Size
+ 128, // GV_Defaut_WG_Size
+ 1024 / 32, // GV_Max_WG_Size / GV_WarpSize
+ 31 // GV_Warp_Size_Log2_MaskL
+};
+
+} // namespace omp
+} // namespace llvm
+
+#endif // LLVM_OPENMP_GRIDVALUES_H