AMDGPU/GlobalISel: Don't hardcode maximum register size

author Matt Arsenault <Matthew.Arsenault@amd.com>

Sat, 13 Jun 2020 22:32:30 +0000 (18:32 -0400)

committer Matt Arsenault <Matthew.Arsenault@amd.com>

Mon, 15 Jun 2020 19:01:19 +0000 (15:01 -0400)
author Matt Arsenault <Matthew.Arsenault@amd.com>
Sat, 13 Jun 2020 22:32:30 +0000 (18:32 -0400)
committer Matt Arsenault <Matthew.Arsenault@amd.com>
Mon, 15 Jun 2020 19:01:19 +0000 (15:01 -0400)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

index 2f701ca..0ee1a6b 100644 (file)
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -44,6 +44,7 @@ static cl::opt<bool> EnableNewLegality(
    cl::init(false),
    cl::ReallyHidden);
  
+static constexpr unsigned MaxRegisterSize = 1024;
  
  // Round the number of elements to the next power of two elements
  static LLT getPow2VectorType(LLT Ty) {
@@ -153,7 +154,7 @@ static LegalityPredicate numElementsNotEven(unsigned TypeIdx) {
  }
  
  static bool isRegisterSize(unsigned Size) {
-  return Size % 32 == 0 && Size <= 1024;
+  return Size % 32 == 0 && Size <= MaxRegisterSize;
  }
  
  static bool isRegisterVectorElementType(LLT EltTy) {
@@ -178,8 +179,8 @@ static bool isRegisterType(LLT Ty) {
    return true;
  }
  
-// Any combination of 32 or 64-bit elements up to 1024 bits, and multiples of
-// v2s16.
+// Any combination of 32 or 64-bit elements up the maximum register size, and
+// multiples of v2s16.
  static LegalityPredicate isRegisterType(unsigned TypeIdx) {
    return [=](const LegalityQuery &Query) {
      return isRegisterType(Query.Types[TypeIdx]);
@@ -334,7 +335,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
    const LLT S128 = LLT::scalar(128);
    const LLT S256 = LLT::scalar(256);
    const LLT S512 = LLT::scalar(512);
-  const LLT S1024 = LLT::scalar(1024);
+  const LLT MaxScalar = LLT::scalar(MaxRegisterSize);
  
    const LLT V2S16 = LLT::vector(2, 16);
    const LLT V4S16 = LLT::vector(4, 16);
@@ -492,7 +493,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
        // them, but don't really occupy registers in the normal way.
        .legalFor({S1, S16})
        .moreElementsIf(isSmallOddVector(0), oneMoreElement(0))
-      .clampScalarOrElt(0, S32, S1024)
+      .clampScalarOrElt(0, S32, MaxScalar)
        .widenScalarToNextPow2(0, 32)
        .clampMaxNumElements(0, S32, 16);
  
@@ -1228,7 +1229,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
            return (EltTy.getSizeInBits() == 16 ||
                    EltTy.getSizeInBits() % 32 == 0) &&
                   VecTy.getSizeInBits() % 32 == 0 &&
-                 VecTy.getSizeInBits() <= 1024 &&
+                 VecTy.getSizeInBits() <= MaxRegisterSize &&
                   IdxTy.getSizeInBits() == 32;
          })
        .clampScalar(EltTypeIdx, S32, S64)
@@ -1358,7 +1359,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
        .fewerElementsIf(
          [=](const LegalityQuery &Query) { return notValidElt(Query, BigTyIdx); },
          scalarize(1))
-      .clampScalar(BigTyIdx, S32, S1024);
+      .clampScalar(BigTyIdx, S32, MaxScalar);
  
      if (Op == G_MERGE_VALUES) {
        Builder.widenScalarIf(
@@ -1399,7 +1400,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
  
            return BigTy.getSizeInBits() % 16 == 0 &&
                   LitTy.getSizeInBits() % 16 == 0 &&
-                 BigTy.getSizeInBits() <= 1024;
+                 BigTy.getSizeInBits() <= MaxRegisterSize;
          })
        // Any vectors left are the wrong size. Scalarize them.
        .scalarize(0)
author	Matt Arsenault <Matthew.Arsenault@amd.com>
	Sat, 13 Jun 2020 22:32:30 +0000 (18:32 -0400)
committer	Matt Arsenault <Matthew.Arsenault@amd.com>
	Mon, 15 Jun 2020 19:01:19 +0000 (15:01 -0400)