From: Jon Chesterfield Date: Fri, 20 Aug 2021 15:41:25 +0000 (+0100) Subject: [openmp][nfc] Refactor GridValues X-Git-Tag: upstream/15.0.7~33374 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=2a47a84b40115b01e03e4d89c1d47ba74beb7bf3;p=platform%2Fupstream%2Fllvm.git [openmp][nfc] Refactor GridValues Remove redundant fields and replace pointer with virtual function Of fourteen fields, three are dead and four can be computed from the remainder. This leaves a couple of currently dead fields in place as they are expected to be used from the deviceRTL shortly. Two of the fields that can be computed are only used from codegen and require a log2() implementation so are inlined into codegen instead. This change leaves the new methods in the same location in the struct as the previous fields for convenience at review. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D108380 --- diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h index ab85594..fe6f67d 100644 --- a/clang/include/clang/Basic/TargetInfo.h +++ b/clang/include/clang/Basic/TargetInfo.h @@ -210,9 +210,6 @@ protected: unsigned char RegParmMax, SSERegParmMax; TargetCXXABI TheCXXABI; const LangASMap *AddrSpaceMap; - const llvm::omp::GV *GridValues = - nullptr; // target-specific GPU grid values that must be - // consistent between host RTL (plugin), device RTL, and clang. mutable StringRef PlatformName; mutable VersionTuple PlatformMinVersion; @@ -1410,10 +1407,10 @@ public: return LangAS::Default; } - /// Return a target-specific GPU grid values - const llvm::omp::GV &getGridValue() const { - assert(GridValues != nullptr && "GridValues not initialized"); - return *GridValues; + // access target-specific GPU grid values that must be consistent between + // host RTL (plugin), deviceRTL and clang. + virtual const llvm::omp::GV &getGridValue() const { + llvm_unreachable("getGridValue not implemented on this target"); } /// Retrieve the name of the platform as it is used in the diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp index cebb19e..ba7ffa3 100644 --- a/clang/lib/Basic/Targets/AMDGPU.cpp +++ b/clang/lib/Basic/Targets/AMDGPU.cpp @@ -17,7 +17,6 @@ #include "clang/Basic/MacroBuilder.h" #include "clang/Basic/TargetBuiltins.h" #include "llvm/ADT/StringSwitch.h" -#include "llvm/Frontend/OpenMP/OMPGridValues.h" using namespace clang; using namespace clang::targets; @@ -335,7 +334,6 @@ AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple, llvm::AMDGPU::getArchAttrR600(GPUKind)) { resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN : DataLayoutStringR600); - GridValues = &llvm::omp::AMDGPUGridValues; setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D || !isAMDGCN(Triple)); diff --git a/clang/lib/Basic/Targets/AMDGPU.h b/clang/lib/Basic/Targets/AMDGPU.h index 77c2c5f..e791a83 100644 --- a/clang/lib/Basic/Targets/AMDGPU.h +++ b/clang/lib/Basic/Targets/AMDGPU.h @@ -370,6 +370,10 @@ public: return getLangASFromTargetAS(Constant); } + const llvm::omp::GV &getGridValue() const override { + return llvm::omp::AMDGPUGridValues; + } + /// \returns Target specific vtbl ptr address space. unsigned getVtblPtrAddressSpace() const override { return static_cast(Constant); diff --git a/clang/lib/Basic/Targets/NVPTX.cpp b/clang/lib/Basic/Targets/NVPTX.cpp index d1a34e4..c245753 100644 --- a/clang/lib/Basic/Targets/NVPTX.cpp +++ b/clang/lib/Basic/Targets/NVPTX.cpp @@ -16,7 +16,6 @@ #include "clang/Basic/MacroBuilder.h" #include "clang/Basic/TargetBuiltins.h" #include "llvm/ADT/StringSwitch.h" -#include "llvm/Frontend/OpenMP/OMPGridValues.h" using namespace clang; using namespace clang::targets; @@ -65,7 +64,6 @@ NVPTXTargetInfo::NVPTXTargetInfo(const llvm::Triple &Triple, TLSSupported = false; VLASupported = false; AddrSpaceMap = &NVPTXAddrSpaceMap; - GridValues = &llvm::omp::NVPTXGridValues; UseAddrSpaceMapMangling = true; // Define available target features diff --git a/clang/lib/Basic/Targets/NVPTX.h b/clang/lib/Basic/Targets/NVPTX.h index c7db3cd..ef751b8 100644 --- a/clang/lib/Basic/Targets/NVPTX.h +++ b/clang/lib/Basic/Targets/NVPTX.h @@ -147,6 +147,10 @@ public: Opts["cl_khr_local_int32_extended_atomics"] = true; } + const llvm::omp::GV &getGridValue() const override { + return llvm::omp::NVPTXGridValues; + } + /// \returns If a target requires an address within a target specific address /// space \p AddressSpace to be converted in order to be used, then return the /// corresponding target specific DWARF address space. diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp index b13d559..0e392c2 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp @@ -22,6 +22,7 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Frontend/OpenMP/OMPGridValues.h" #include "llvm/IR/IntrinsicsNVPTX.h" +#include "llvm/Support/MathExtras.h" using namespace clang; using namespace CodeGen; @@ -106,8 +107,7 @@ public: /// is the same for all known NVPTX architectures. enum MachineConfiguration : unsigned { /// See "llvm/Frontend/OpenMP/OMPGridValues.h" for various related target - /// specific Grid Values like GV_Warp_Size, GV_Warp_Size_Log2, - /// and GV_Warp_Size_Log2_Mask. + /// specific Grid Values like GV_Warp_Size, GV_Slot_Size /// Global memory alignment for performance. GlobalMemoryAlignment = 128, @@ -535,7 +535,8 @@ public: /// on the NVPTX device, to generate more efficient code. static llvm::Value *getNVPTXWarpID(CodeGenFunction &CGF) { CGBuilderTy &Bld = CGF.Builder; - unsigned LaneIDBits = CGF.getTarget().getGridValue().GV_Warp_Size_Log2; + unsigned LaneIDBits = + llvm::Log2_32(CGF.getTarget().getGridValue().GV_Warp_Size); auto &RT = static_cast(CGF.CGM.getOpenMPRuntime()); return Bld.CreateAShr(RT.getGPUThreadID(CGF), LaneIDBits, "nvptx_warp_id"); } @@ -545,8 +546,9 @@ static llvm::Value *getNVPTXWarpID(CodeGenFunction &CGF) { /// on the NVPTX device, to generate more efficient code. static llvm::Value *getNVPTXLaneID(CodeGenFunction &CGF) { CGBuilderTy &Bld = CGF.Builder; - unsigned LaneIDMask = - CGF.getContext().getTargetInfo().getGridValue().GV_Warp_Size_Log2_Mask; + unsigned LaneIDBits = + llvm::Log2_32(CGF.getTarget().getGridValue().GV_Warp_Size); + unsigned LaneIDMask = ~0 >> (32u - LaneIDBits); auto &RT = static_cast(CGF.CGM.getOpenMPRuntime()); return Bld.CreateAnd(RT.getGPUThreadID(CGF), Bld.getInt32(LaneIDMask), "nvptx_lane_id"); diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h b/llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h index 1d7735e..2130b9b 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h @@ -62,19 +62,13 @@ struct GV { const unsigned GV_Slot_Size; /// The default value of maximum number of threads in a worker warp. const unsigned GV_Warp_Size; - /// Alternate warp size for some AMDGCN architectures. Same as GV_Warp_Size - /// for NVPTX. - const unsigned GV_Warp_Size_32; - /// The number of bits required to represent the max number of threads in warp - const unsigned GV_Warp_Size_Log2; - /// GV_Warp_Size * GV_Slot_Size, - const unsigned GV_Warp_Slot_Size; + + constexpr unsigned warpSlotSize() const { + return GV_Warp_Size * GV_Slot_Size; + } + /// the maximum number of teams. const unsigned GV_Max_Teams; - /// Global Memory Alignment - const unsigned GV_Mem_Align; - /// (~0u >> (GV_Warp_Size - GV_Warp_Size_Log2)) - const unsigned GV_Warp_Size_Log2_Mask; // An alternative to the heavy data sharing infrastructure that uses global // memory is one that uses device __shared__ memory. The amount of such space // (in bytes) reserved by the OpenMP runtime is noted here. @@ -83,47 +77,32 @@ struct GV { const unsigned GV_Max_WG_Size; // The default maximum team size for a working group const unsigned GV_Default_WG_Size; - // This is GV_Max_WG_Size / GV_WarpSize. 32 for NVPTX and 16 for AMDGCN. - const unsigned GV_Max_Warp_Number; - /// The slot size that should be reserved for a working warp. - /// (~0u >> (GV_Warp_Size - GV_Warp_Size_Log2)) - const unsigned GV_Warp_Size_Log2_MaskL; + + constexpr unsigned maxWarpNumber() const { + return GV_Max_WG_Size / GV_Warp_Size; + } }; /// For AMDGPU GPUs static constexpr GV AMDGPUGridValues = { - 448, // GV_Threads - 256, // GV_Slot_Size - 64, // GV_Warp_Size - 32, // GV_Warp_Size_32 - 6, // GV_Warp_Size_Log2 - 64 * 256, // GV_Warp_Slot_Size - 128, // GV_Max_Teams - 256, // GV_Mem_Align - 63, // GV_Warp_Size_Log2_Mask - 896, // GV_SimpleBufferSize - 1024, // GV_Max_WG_Size, - 256, // GV_Defaut_WG_Size - 1024 / 64, // GV_Max_WG_Size / GV_WarpSize - 63 // GV_Warp_Size_Log2_MaskL + 448, // GV_Threads + 256, // GV_Slot_Size + 64, // GV_Warp_Size + 128, // GV_Max_Teams + 896, // GV_SimpleBufferSize + 1024, // GV_Max_WG_Size, + 256, // GV_Default_WG_Size }; /// For Nvidia GPUs static constexpr GV NVPTXGridValues = { - 992, // GV_Threads - 256, // GV_Slot_Size - 32, // GV_Warp_Size - 32, // GV_Warp_Size_32 - 5, // GV_Warp_Size_Log2 - 32 * 256, // GV_Warp_Slot_Size - 1024, // GV_Max_Teams - 256, // GV_Mem_Align - (~0u >> (32 - 5)), // GV_Warp_Size_Log2_Mask - 896, // GV_SimpleBufferSize - 1024, // GV_Max_WG_Size - 128, // GV_Defaut_WG_Size - 1024 / 32, // GV_Max_WG_Size / GV_WarpSize - 31 // GV_Warp_Size_Log2_MaskL + 992, // GV_Threads + 256, // GV_Slot_Size + 32, // GV_Warp_Size + 1024, // GV_Max_Teams + 896, // GV_SimpleBufferSize + 1024, // GV_Max_WG_Size + 128, // GV_Default_WG_Size }; } // namespace omp