[openmp][nfc] Replace OMPGridValues array with struct

author Jon Chesterfield <jonathanchesterfield@gmail.com>

Thu, 19 Aug 2021 12:25:41 +0000 (13:25 +0100)

committer Jon Chesterfield <jonathanchesterfield@gmail.com>

Thu, 19 Aug 2021 12:25:42 +0000 (13:25 +0100)
author Jon Chesterfield <jonathanchesterfield@gmail.com>
Thu, 19 Aug 2021 12:25:41 +0000 (13:25 +0100)
committer Jon Chesterfield <jonathanchesterfield@gmail.com>
Thu, 19 Aug 2021 12:25:42 +0000 (13:25 +0100)
diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h

index 21289b0..ab85594 100644 (file)
--- a/clang/include/clang/Basic/TargetInfo.h
+++ b/clang/include/clang/Basic/TargetInfo.h
@@ -210,8 +210,8 @@ protected:
    unsigned char RegParmMax, SSERegParmMax;
    TargetCXXABI TheCXXABI;
    const LangASMap *AddrSpaceMap;
-  const unsigned *GridValues =
-      nullptr; // Array of target-specific GPU grid values that must be
+  const llvm::omp::GV *GridValues =
+      nullptr; // target-specific GPU grid values that must be
                 // consistent between host RTL (plugin), device RTL, and clang.
  
    mutable StringRef PlatformName;
@@ -1410,10 +1410,10 @@ public:
      return LangAS::Default;
    }
  
-  /// Return a target-specific GPU grid value based on the GVIDX enum \p gv
-  unsigned getGridValue(llvm::omp::GVIDX gv) const {
+  /// Return a target-specific GPU grid values
+  const llvm::omp::GV &getGridValue() const {
      assert(GridValues != nullptr && "GridValues not initialized");
-    return GridValues[gv];
+    return *GridValues;
    }
  
    /// Retrieve the name of the platform as it is used in the
diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp

index fac786d..cebb19e 100644 (file)
--- a/clang/lib/Basic/Targets/AMDGPU.cpp
+++ b/clang/lib/Basic/Targets/AMDGPU.cpp
@@ -335,7 +335,7 @@ AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
                    llvm::AMDGPU::getArchAttrR600(GPUKind)) {
    resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN
                                          : DataLayoutStringR600);
-  GridValues = llvm::omp::AMDGPUGpuGridValues;
+  GridValues = &llvm::omp::AMDGPUGridValues;
  
    setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
                       !isAMDGCN(Triple));
diff --git a/clang/lib/Basic/Targets/NVPTX.cpp b/clang/lib/Basic/Targets/NVPTX.cpp

index 56f8a17..d1a34e4 100644 (file)
--- a/clang/lib/Basic/Targets/NVPTX.cpp
+++ b/clang/lib/Basic/Targets/NVPTX.cpp
@@ -65,7 +65,7 @@ NVPTXTargetInfo::NVPTXTargetInfo(const llvm::Triple &Triple,
    TLSSupported = false;
    VLASupported = false;
    AddrSpaceMap = &NVPTXAddrSpaceMap;
-  GridValues = llvm::omp::NVPTXGpuGridValues;
+  GridValues = &llvm::omp::NVPTXGridValues;
    UseAddrSpaceMapMangling = true;
  
    // Define available target features
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.cpp

index 33d4ab8..cac5faa 100644 (file)
--- a/clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.cpp
@@ -20,6 +20,7 @@
  #include "clang/AST/StmtVisitor.h"
  #include "clang/Basic/Cuda.h"
  #include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Frontend/OpenMP/OMPGridValues.h"
  #include "llvm/IR/IntrinsicsAMDGPU.h"
  
  using namespace clang;
@@ -35,7 +36,7 @@ CGOpenMPRuntimeAMDGCN::CGOpenMPRuntimeAMDGCN(CodeGenModule &CGM)
  llvm::Value *CGOpenMPRuntimeAMDGCN::getGPUWarpSize(CodeGenFunction &CGF) {
    CGBuilderTy &Bld = CGF.Builder;
    // return constant compile-time target-specific warp size
-  unsigned WarpSize = CGF.getTarget().getGridValue(llvm::omp::GV_Warp_Size);
+  unsigned WarpSize = CGF.getTarget().getGridValue().GV_Warp_Size;
    return Bld.getInt32(WarpSize);
  }
  
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp

index 63feced..b13d559 100644 (file)
--- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
@@ -339,7 +339,7 @@ class CheckVarsEscapingDeclContext final
      assert(!GlobalizedRD &&
             "Record for globalized variables is built already.");
      ArrayRef<const ValueDecl *> EscapedDeclsForParallel, EscapedDeclsForTeams;
-    unsigned WarpSize = CGF.getTarget().getGridValue(llvm::omp::GV_Warp_Size);
+    unsigned WarpSize = CGF.getTarget().getGridValue().GV_Warp_Size;
      if (IsInTTDRegion)
        EscapedDeclsForTeams = EscapedDecls.getArrayRef();
      else
@@ -535,8 +535,7 @@ public:
  /// on the NVPTX device, to generate more efficient code.
  static llvm::Value *getNVPTXWarpID(CodeGenFunction &CGF) {
    CGBuilderTy &Bld = CGF.Builder;
-  unsigned LaneIDBits =
-      CGF.getTarget().getGridValue(llvm::omp::GV_Warp_Size_Log2);
+  unsigned LaneIDBits = CGF.getTarget().getGridValue().GV_Warp_Size_Log2;
    auto &RT = static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime());
    return Bld.CreateAShr(RT.getGPUThreadID(CGF), LaneIDBits, "nvptx_warp_id");
  }
@@ -546,8 +545,8 @@ static llvm::Value *getNVPTXWarpID(CodeGenFunction &CGF) {
  /// on the NVPTX device, to generate more efficient code.
  static llvm::Value *getNVPTXLaneID(CodeGenFunction &CGF) {
    CGBuilderTy &Bld = CGF.Builder;
-  unsigned LaneIDMask = CGF.getContext().getTargetInfo().getGridValue(
-      llvm::omp::GV_Warp_Size_Log2_Mask);
+  unsigned LaneIDMask =
+      CGF.getContext().getTargetInfo().getGridValue().GV_Warp_Size_Log2_Mask;
    auto &RT = static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime());
    return Bld.CreateAnd(RT.getGPUThreadID(CGF), Bld.getInt32(LaneIDMask),
                         "nvptx_lane_id");
@@ -1308,7 +1307,7 @@ llvm::Function *CGOpenMPRuntimeGPU::emitTeamsOutlinedFunction(
    const RecordDecl *GlobalizedRD = nullptr;
    llvm::SmallVector<const ValueDecl *, 4> LastPrivatesReductions;
    llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *> MappedDeclsFields;
-  unsigned WarpSize = CGM.getTarget().getGridValue(llvm::omp::GV_Warp_Size);
+  unsigned WarpSize = CGM.getTarget().getGridValue().GV_Warp_Size;
    // Globalize team reductions variable unconditionally in all modes.
    if (getExecutionMode() != CGOpenMPRuntimeGPU::EM_SPMD)
      getTeamsReductionVars(CGM.getContext(), D, LastPrivatesReductions);
@@ -2089,7 +2088,7 @@ static llvm::Value *emitInterWarpCopyFunction(CodeGenModule &CGM,
        "__openmp_nvptx_data_transfer_temporary_storage";
    llvm::GlobalVariable *TransferMedium =
        M.getGlobalVariable(TransferMediumName);
-  unsigned WarpSize = CGF.getTarget().getGridValue(llvm::omp::GV_Warp_Size);
+  unsigned WarpSize = CGF.getTarget().getGridValue().GV_Warp_Size;
    if (!TransferMedium) {
      auto *Ty = llvm::ArrayType::get(CGM.Int32Ty, WarpSize);
      unsigned SharedAddressSpace = C.getTargetAddressSpace(LangAS::cuda_shared);
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h

index b5f1b84..5d3b711 100644 (file)
--- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h
@@ -17,7 +17,6 @@
  #include "CGOpenMPRuntime.h"
  #include "CodeGenFunction.h"
  #include "clang/AST/StmtOpenMP.h"
-#include "llvm/Frontend/OpenMP/OMPGridValues.h"
  
  namespace clang {
  namespace CodeGen {
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h b/llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h

index 0b6aed1..1d7735e 100644 (file)
--- a/llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h
@@ -29,68 +29,69 @@ namespace omp {
  ///
  /// Example usage in clang:
  ///   const unsigned slot_size =
-///   ctx.GetTargetInfo().getGridValue(llvm::omp::GVIDX::GV_Warp_Size);
+///   ctx.GetTargetInfo().getGridValue().GV_Warp_Size;
  ///
  /// Example usage in libomptarget/deviceRTLs:
  ///   #include "llvm/Frontend/OpenMP/OMPGridValues.h"
  ///   #ifdef __AMDGPU__
-///     #define GRIDVAL AMDGPUGpuGridValues
+///     #define GRIDVAL AMDGPUGridValues
  ///   #else
-///     #define GRIDVAL NVPTXGpuGridValues
+///     #define GRIDVAL NVPTXGridValues
  ///   #endif
  ///   ... Then use this reference for GV_Warp_Size in the deviceRTL source.
-///   llvm::omp::GRIDVAL[llvm::omp::GVIDX::GV_Warp_Size]
+///   llvm::omp::GRIDVAL().GV_Warp_Size
  ///
  /// Example usage in libomptarget hsa plugin:
  ///   #include "llvm/Frontend/OpenMP/OMPGridValues.h"
-///   #define GRIDVAL AMDGPUGpuGridValues
+///   #define GRIDVAL AMDGPUGridValues
  ///   ... Then use this reference to access GV_Warp_Size in the hsa plugin.
-///   llvm::omp::GRIDVAL[llvm::omp::GVIDX::GV_Warp_Size]
+///   llvm::omp::GRIDVAL().GV_Warp_Size
  ///
  /// Example usage in libomptarget cuda plugin:
  ///    #include "llvm/Frontend/OpenMP/OMPGridValues.h"
-///    #define GRIDVAL NVPTXGpuGridValues
+///    #define GRIDVAL NVPTXGridValues
  ///   ... Then use this reference to access GV_Warp_Size in the cuda plugin.
-///    llvm::omp::GRIDVAL[llvm::omp::GVIDX::GV_Warp_Size]
+///    llvm::omp::GRIDVAL().GV_Warp_Size
  ///
-enum GVIDX {
+
+struct GV {
    /// The maximum number of workers in a kernel.
    /// (THREAD_ABSOLUTE_LIMIT) - (GV_Warp_Size), might be issue for blockDim.z
-  GV_Threads,
+  const unsigned GV_Threads;
    /// The size reserved for data in a shared memory slot.
-  GV_Slot_Size,
+  const unsigned GV_Slot_Size;
    /// The default value of maximum number of threads in a worker warp.
-  GV_Warp_Size,
+  const unsigned GV_Warp_Size;
    /// Alternate warp size for some AMDGCN architectures. Same as GV_Warp_Size
    /// for NVPTX.
-  GV_Warp_Size_32,
+  const unsigned GV_Warp_Size_32;
    /// The number of bits required to represent the max number of threads in warp
-  GV_Warp_Size_Log2,
+  const unsigned GV_Warp_Size_Log2;
    /// GV_Warp_Size * GV_Slot_Size,
-  GV_Warp_Slot_Size,
+  const unsigned GV_Warp_Slot_Size;
    /// the maximum number of teams.
-  GV_Max_Teams,
+  const unsigned GV_Max_Teams;
    /// Global Memory Alignment
-  GV_Mem_Align,
+  const unsigned GV_Mem_Align;
    /// (~0u >> (GV_Warp_Size - GV_Warp_Size_Log2))
-  GV_Warp_Size_Log2_Mask,
+  const unsigned GV_Warp_Size_Log2_Mask;
    // An alternative to the heavy data sharing infrastructure that uses global
    // memory is one that uses device __shared__ memory.  The amount of such space
    // (in bytes) reserved by the OpenMP runtime is noted here.
-  GV_SimpleBufferSize,
+  const unsigned GV_SimpleBufferSize;
    // The absolute maximum team size for a working group
-  GV_Max_WG_Size,
+  const unsigned GV_Max_WG_Size;
    // The default maximum team size for a working group
-  GV_Default_WG_Size,
+  const unsigned GV_Default_WG_Size;
    // This is GV_Max_WG_Size / GV_WarpSize. 32 for NVPTX and 16 for AMDGCN.
-  GV_Max_Warp_Number,
+  const unsigned GV_Max_Warp_Number;
    /// The slot size that should be reserved for a working warp.
    /// (~0u >> (GV_Warp_Size - GV_Warp_Size_Log2))
-  GV_Warp_Size_Log2_MaskL
+  const unsigned GV_Warp_Size_Log2_MaskL;
  };
  
  /// For AMDGPU GPUs
-static constexpr unsigned AMDGPUGpuGridValues[] = {
+static constexpr GV AMDGPUGridValues = {
      448,       // GV_Threads
      256,       // GV_Slot_Size
      64,        // GV_Warp_Size
@@ -108,7 +109,7 @@ static constexpr unsigned AMDGPUGpuGridValues[] = {
  };
  
  /// For Nvidia GPUs
-static constexpr unsigned NVPTXGpuGridValues[] = {
+static constexpr GV NVPTXGridValues = {
      992,               // GV_Threads
      256,               // GV_Slot_Size
      32,                // GV_Warp_Size
diff --git a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp

index 4865ef6..31adc72 100644 (file)
--- a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
+++ b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
@@ -501,14 +501,11 @@ public:
    static const unsigned HardTeamLimit =
        (1 << 16) - 1; // 64K needed to fit in uint16
    static const int DefaultNumTeams = 128;
-  static const int Max_Teams =
-      llvm::omp::AMDGPUGpuGridValues[llvm::omp::GVIDX::GV_Max_Teams];
-  static const int Warp_Size =
-      llvm::omp::AMDGPUGpuGridValues[llvm::omp::GVIDX::GV_Warp_Size];
-  static const int Max_WG_Size =
-      llvm::omp::AMDGPUGpuGridValues[llvm::omp::GVIDX::GV_Max_WG_Size];
+  static const int Max_Teams = llvm::omp::AMDGPUGridValues.GV_Max_Teams;
+  static const int Warp_Size = llvm::omp::AMDGPUGridValues.GV_Warp_Size;
+  static const int Max_WG_Size = llvm::omp::AMDGPUGridValues.GV_Max_WG_Size;
    static const int Default_WG_Size =
-      llvm::omp::AMDGPUGpuGridValues[llvm::omp::GVIDX::GV_Default_WG_Size];
+      llvm::omp::AMDGPUGridValues.GV_Default_WG_Size;
  
    using MemcpyFunc = hsa_status_t (*)(hsa_signal_t, void *, const void *,
                                        size_t size, hsa_agent_t);
@@ -1058,9 +1055,8 @@ int32_t __tgt_rtl_init_device(int device_id) {
      DeviceInfo.WarpSize[device_id] = wavefront_size;
    } else {
      DP("Default wavefront size: %d\n",
-       llvm::omp::AMDGPUGpuGridValues[llvm::omp::GVIDX::GV_Warp_Size]);
-    DeviceInfo.WarpSize[device_id] =
-        llvm::omp::AMDGPUGpuGridValues[llvm::omp::GVIDX::GV_Warp_Size];
+       llvm::omp::AMDGPUGridValues.GV_Warp_Size);
+    DeviceInfo.WarpSize[device_id] = llvm::omp::AMDGPUGridValues.GV_Warp_Size;
    }
  
    // Adjust teams to the env variables
author	Jon Chesterfield <jonathanchesterfield@gmail.com>
	Thu, 19 Aug 2021 12:25:41 +0000 (13:25 +0100)
committer	Jon Chesterfield <jonathanchesterfield@gmail.com>
	Thu, 19 Aug 2021 12:25:42 +0000 (13:25 +0100)
clang/include/clang/Basic/TargetInfo.h		patch \| blob \| history
clang/lib/Basic/Targets/AMDGPU.cpp		patch \| blob \| history
clang/lib/Basic/Targets/NVPTX.cpp		patch \| blob \| history
clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.cpp		patch \| blob \| history
clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp		patch \| blob \| history
clang/lib/CodeGen/CGOpenMPRuntimeGPU.h		patch \| blob \| history
llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h		patch \| blob \| history
openmp/libomptarget/plugins/amdgpu/src/rtl.cpp		patch \| blob \| history