[SLP] Remove cap on schedule window size

author Philip Reames <listmail@philipreames.com>

Wed, 23 Feb 2022 04:00:43 +0000 (20:00 -0800)

committer Philip Reames <listmail@philipreames.com>

Wed, 23 Feb 2022 16:27:45 +0000 (08:27 -0800)
author Philip Reames <listmail@philipreames.com>
Wed, 23 Feb 2022 04:00:43 +0000 (20:00 -0800)
committer Philip Reames <listmail@philipreames.com>
Wed, 23 Feb 2022 16:27:45 +0000 (08:27 -0800)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

index 4ea8a77..3e87b3b 100644 (file)
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -138,14 +138,6 @@ static cl::opt<int>
  MaxStoreLookup("slp-max-store-lookup", cl::init(32), cl::Hidden,
      cl::desc("Maximum depth of the lookup for consecutive stores."));
  
-/// Limits the size of scheduling regions in a block.
-/// It avoid long compile times for _very_ large blocks where vector
-/// instructions are spread over a wide range.
-/// This limit is way higher than needed by real-world functions.
-static cl::opt<int>
-ScheduleRegionSizeBudget("slp-schedule-budget", cl::init(100000), cl::Hidden,
-    cl::desc("Limit the size of the SLP scheduling region per block"));
-
  static cl::opt<int> MinVectorRegSizeOption(
      "slp-min-reg-size", cl::init(128), cl::Hidden,
      cl::desc("Attempt to vectorize for this register size in bits"));
@@ -177,10 +169,6 @@ static const unsigned AliasedCheckLimit = 10;
  // This limit is useful for very large basic blocks.
  static const unsigned MaxMemDepDistance = 160;
  
-/// If the ScheduleRegionSizeBudget is exhausted, we allow small scheduling
-/// regions to be handled.
-static const int MinScheduleRegionSize = 16;
-
  /// Predicate for the element types that the SLP vectorizer supports.
  ///
  /// The most important thing to filter here are types which are invalid in LLVM
@@ -2627,13 +2615,6 @@ private:
        FirstLoadStoreInRegion = nullptr;
        LastLoadStoreInRegion = nullptr;
  
-      // Reduce the maximum schedule region size by the size of the
-      // previous scheduling run.
-      ScheduleRegionSizeLimit -= ScheduleRegionSize;
-      if (ScheduleRegionSizeLimit < MinScheduleRegionSize)
-        ScheduleRegionSizeLimit = MinScheduleRegionSize;
-      ScheduleRegionSize = 0;
-
        // Make a new scheduling region, i.e. all existing ScheduleData is not
        // in the new region yet.
        ++SchedulingRegionID;
@@ -2814,7 +2795,7 @@ private:
  
      /// Extends the scheduling region so that V is inside the region.
      /// \returns true if the region size is within the limit.
-    bool extendSchedulingRegion(Value *V, const InstructionsState &S);
+    void extendSchedulingRegion(Value *V, const InstructionsState &S);
  
      /// Initialize the ScheduleData structures for new instructions in the
      /// scheduling region.
@@ -2868,12 +2849,6 @@ private:
      /// (can be null).
      ScheduleData *LastLoadStoreInRegion = nullptr;
  
-    /// The current size of the scheduling region.
-    int ScheduleRegionSize = 0;
-
-    /// The maximum size allowed for the scheduling region.
-    int ScheduleRegionSizeLimit = ScheduleRegionSizeBudget;
-
      /// The ID of the scheduling region. For a new vectorization iteration this
      /// is incremented which "removes" all ScheduleData from the region.
      /// Make sure that the initial SchedulingRegionID is greater than the
@@ -7517,11 +7492,9 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
          doForAllOpcodes(I, [](ScheduleData *SD) { SD->clearDependencies(); });
        ReSchedule = true;
      }
-    if (Bundle) {
-      LLVM_DEBUG(dbgs() << "SLP: try schedule bundle " << *Bundle
-                        << " in block " << BB->getName() << "\n");
-      calculateDependencies(Bundle, /*InsertInReadyList=*/true, SLP);
-    }
+    LLVM_DEBUG(dbgs() << "SLP: try schedule bundle " << *Bundle
+                      << " in block " << BB->getName() << "\n");
+    calculateDependencies(Bundle, /*InsertInReadyList=*/true, SLP);
  
      if (ReSchedule) {
        resetSchedule();
@@ -7532,8 +7505,7 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
      // dependencies. As soon as the bundle is "ready" it means that there are no
      // cyclic dependencies and we can schedule it. Note that's important that we
      // don't "schedule" the bundle yet (see cancelScheduling).
-    while (((!Bundle && ReSchedule) || (Bundle && !Bundle->isReady())) &&
-           !ReadyInsts.empty()) {
+    while (!Bundle->isReady() && !ReadyInsts.empty()) {
        ScheduleData *Picked = ReadyInsts.pop_back_val();
        assert(Picked->isSchedulingEntity() && Picked->isReady() &&
               "must be ready to schedule");
@@ -7543,18 +7515,8 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
  
    // Make sure that the scheduling region contains all
    // instructions of the bundle.
-  for (Value *V : VL) {
-    if (!extendSchedulingRegion(V, S)) {
-      // If the scheduling region got new instructions at the lower end (or it
-      // is a new region for the first bundle). This makes it necessary to
-      // recalculate all dependencies.
-      // Otherwise the compiler may crash trying to incorrectly calculate
-      // dependencies and emit instruction in the wrong order at the actual
-      // scheduling.
-      TryScheduleBundleImpl(/*ReSchedule=*/false, nullptr);
-      return None;
-    }
-  }
+  for (Value *V : VL)
+    extendSchedulingRegion(V, S);
  
    bool ReSchedule = false;
    for (Value *V : VL) {
@@ -7624,10 +7586,11 @@ BoUpSLP::ScheduleData *BoUpSLP::BlockScheduling::allocateScheduleDataChunks() {
    return &(ScheduleDataChunks.back()[ChunkPos++]);
  }
  
-bool BoUpSLP::BlockScheduling::extendSchedulingRegion(Value *V,
-                                                      const InstructionsState &S) {
+void
+BoUpSLP::BlockScheduling::extendSchedulingRegion(Value *V,
+                                                 const InstructionsState &S) {
    if (getScheduleData(V, isOneOf(S, V)))
-    return true;
+    return;
    Instruction *I = dyn_cast<Instruction>(V);
    assert(I && "bundle member must be an instruction");
    assert(!isa<PHINode>(I) && !isVectorLikeInstWithConstOps(I) &&
@@ -7646,7 +7609,7 @@ bool BoUpSLP::BlockScheduling::extendSchedulingRegion(Value *V,
      return true;
    };
    if (CheckSheduleForI(I))
-    return true;
+    return;
    if (!ScheduleStart) {
      // It's the first instruction in the new region.
      initScheduleData(I, I->getNextNode(), nullptr, nullptr);
@@ -7656,7 +7619,7 @@ bool BoUpSLP::BlockScheduling::extendSchedulingRegion(Value *V,
        CheckSheduleForI(I);
      assert(ScheduleEnd && "tried to vectorize a terminator?");
      LLVM_DEBUG(dbgs() << "SLP:  initialize schedule region to " << *I << "\n");
-    return true;
+    return;
    }
    // Search up and down at the same time, because we don't know if the new
    // instruction is above or below the existing scheduling region.
@@ -7667,11 +7630,6 @@ bool BoUpSLP::BlockScheduling::extendSchedulingRegion(Value *V,
    BasicBlock::iterator LowerEnd = BB->end();
    while (UpIter != UpperEnd && DownIter != LowerEnd && &*UpIter != I &&
           &*DownIter != I) {
-    if (++ScheduleRegionSize > ScheduleRegionSizeLimit) {
-      LLVM_DEBUG(dbgs() << "SLP:  exceeded schedule region size limit\n");
-      return false;
-    }
-
      ++UpIter;
      ++DownIter;
    }
@@ -7684,7 +7642,7 @@ bool BoUpSLP::BlockScheduling::extendSchedulingRegion(Value *V,
        CheckSheduleForI(I);
      LLVM_DEBUG(dbgs() << "SLP:  extend schedule region start to " << *I
                        << "\n");
-    return true;
+    return;
    }
    assert((UpIter == UpperEnd || (DownIter != LowerEnd && &*DownIter == I)) &&
           "Expected to reach top of the basic block or instruction down the "
@@ -7698,7 +7656,7 @@ bool BoUpSLP::BlockScheduling::extendSchedulingRegion(Value *V,
      CheckSheduleForI(I);
    assert(ScheduleEnd && "tried to vectorize a terminator?");
    LLVM_DEBUG(dbgs() << "SLP:  extend schedule region end to " << *I << "\n");
-  return true;
+  return;
  }
  
  void BoUpSLP::BlockScheduling::initScheduleData(Instruction *FromI,
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/gather-root.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/gather-root.ll

index e9c502b..1faadab 100644 (file)
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/gather-root.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/gather-root.ll
@@ -1,7 +1,7 @@
  ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
  ; RUN: opt < %s -slp-vectorizer -S | FileCheck %s --check-prefix=DEFAULT
-; RUN: opt < %s -slp-schedule-budget=0 -slp-min-tree-size=0 -slp-threshold=-30 -slp-vectorizer -S | FileCheck %s --check-prefix=GATHER
-; RUN: opt < %s -slp-schedule-budget=0 -slp-threshold=-30 -slp-vectorizer -S | FileCheck %s --check-prefix=MAX-COST
+; RUN: opt < %s -slp-min-tree-size=0 -slp-threshold=-30 -slp-vectorizer -S | FileCheck %s --check-prefix=GATHER
+; RUN: opt < %s -slp-threshold=-30 -slp-vectorizer -S | FileCheck %s --check-prefix=MAX-COST
  
  target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
  target triple = "aarch64--linux-gnu"
@@ -35,41 +35,14 @@ define void @PR28330(i32 %n) {
  ;
  ; MAX-COST-LABEL: @PR28330(
  ; MAX-COST-NEXT:  entry:
-; MAX-COST-NEXT:    [[P0:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 1), align 1
-; MAX-COST-NEXT:    [[P1:%.*]] = icmp eq i8 [[P0]], 0
-; MAX-COST-NEXT:    [[P2:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 2), align 2
-; MAX-COST-NEXT:    [[P3:%.*]] = icmp eq i8 [[P2]], 0
-; MAX-COST-NEXT:    [[P4:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 3), align 1
-; MAX-COST-NEXT:    [[P5:%.*]] = icmp eq i8 [[P4]], 0
-; MAX-COST-NEXT:    [[P6:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 4), align 4
-; MAX-COST-NEXT:    [[P7:%.*]] = icmp eq i8 [[P6]], 0
-; MAX-COST-NEXT:    [[P8:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 5), align 1
-; MAX-COST-NEXT:    [[P9:%.*]] = icmp eq i8 [[P8]], 0
-; MAX-COST-NEXT:    [[P10:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 6), align 2
-; MAX-COST-NEXT:    [[P11:%.*]] = icmp eq i8 [[P10]], 0
-; MAX-COST-NEXT:    [[P12:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 7), align 1
-; MAX-COST-NEXT:    [[P13:%.*]] = icmp eq i8 [[P12]], 0
-; MAX-COST-NEXT:    [[P14:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 8), align 8
-; MAX-COST-NEXT:    [[P15:%.*]] = icmp eq i8 [[P14]], 0
+; MAX-COST-NEXT:    [[TMP0:%.*]] = load <8 x i8>, <8 x i8>* bitcast (i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 1) to <8 x i8>*), align 1
+; MAX-COST-NEXT:    [[TMP1:%.*]] = icmp eq <8 x i8> [[TMP0]], zeroinitializer
  ; MAX-COST-NEXT:    br label [[FOR_BODY:%.*]]
  ; MAX-COST:       for.body:
-; MAX-COST-NEXT:    [[P17:%.*]] = phi i32 [ [[P34:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
-; MAX-COST-NEXT:    [[P19:%.*]] = select i1 [[P1]], i32 -720, i32 -80
-; MAX-COST-NEXT:    [[P20:%.*]] = add i32 [[P17]], [[P19]]
-; MAX-COST-NEXT:    [[P21:%.*]] = select i1 [[P3]], i32 -720, i32 -80
-; MAX-COST-NEXT:    [[P22:%.*]] = add i32 [[P20]], [[P21]]
-; MAX-COST-NEXT:    [[P23:%.*]] = select i1 [[P5]], i32 -720, i32 -80
-; MAX-COST-NEXT:    [[P24:%.*]] = add i32 [[P22]], [[P23]]
-; MAX-COST-NEXT:    [[P25:%.*]] = select i1 [[P7]], i32 -720, i32 -80
-; MAX-COST-NEXT:    [[P26:%.*]] = add i32 [[P24]], [[P25]]
-; MAX-COST-NEXT:    [[P27:%.*]] = select i1 [[P9]], i32 -720, i32 -80
-; MAX-COST-NEXT:    [[P28:%.*]] = add i32 [[P26]], [[P27]]
-; MAX-COST-NEXT:    [[P29:%.*]] = select i1 [[P11]], i32 -720, i32 -80
-; MAX-COST-NEXT:    [[P30:%.*]] = add i32 [[P28]], [[P29]]
-; MAX-COST-NEXT:    [[P31:%.*]] = select i1 [[P13]], i32 -720, i32 -80
-; MAX-COST-NEXT:    [[P32:%.*]] = add i32 [[P30]], [[P31]]
-; MAX-COST-NEXT:    [[P33:%.*]] = select i1 [[P15]], i32 -720, i32 -80
-; MAX-COST-NEXT:    [[P34]] = add i32 [[P32]], [[P33]]
+; MAX-COST-NEXT:    [[P17:%.*]] = phi i32 [ [[OP_EXTRA:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
+; MAX-COST-NEXT:    [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> <i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720>, <8 x i32> <i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80>
+; MAX-COST-NEXT:    [[TMP3:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP2]])
+; MAX-COST-NEXT:    [[OP_EXTRA]] = add i32 [[TMP3]], [[P17]]
  ; MAX-COST-NEXT:    br label [[FOR_BODY]]
  ;
  entry:
@@ -139,30 +112,14 @@ define void @PR32038(i32 %n) {
  ;
  ; MAX-COST-LABEL: @PR32038(
  ; MAX-COST-NEXT:  entry:
-; MAX-COST-NEXT:    [[TMP0:%.*]] = load <4 x i8>, <4 x i8>* bitcast (i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 1) to <4 x i8>*), align 1
-; MAX-COST-NEXT:    [[TMP1:%.*]] = icmp eq <4 x i8> [[TMP0]], zeroinitializer
-; MAX-COST-NEXT:    [[P8:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 5), align 1
-; MAX-COST-NEXT:    [[P9:%.*]] = icmp eq i8 [[P8]], 0
-; MAX-COST-NEXT:    [[P10:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 6), align 2
-; MAX-COST-NEXT:    [[P11:%.*]] = icmp eq i8 [[P10]], 0
-; MAX-COST-NEXT:    [[P12:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 7), align 1
-; MAX-COST-NEXT:    [[P13:%.*]] = icmp eq i8 [[P12]], 0
-; MAX-COST-NEXT:    [[P14:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 8), align 8
-; MAX-COST-NEXT:    [[P15:%.*]] = icmp eq i8 [[P14]], 0
+; MAX-COST-NEXT:    [[TMP0:%.*]] = load <8 x i8>, <8 x i8>* bitcast (i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 1) to <8 x i8>*), align 1
+; MAX-COST-NEXT:    [[TMP1:%.*]] = icmp eq <8 x i8> [[TMP0]], zeroinitializer
  ; MAX-COST-NEXT:    br label [[FOR_BODY:%.*]]
  ; MAX-COST:       for.body:
-; MAX-COST-NEXT:    [[P17:%.*]] = phi i32 [ [[P34:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
-; MAX-COST-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> <i32 -720, i32 -720, i32 -720, i32 -720>, <4 x i32> <i32 -80, i32 -80, i32 -80, i32 -80>
-; MAX-COST-NEXT:    [[P27:%.*]] = select i1 [[P9]], i32 -720, i32 -80
-; MAX-COST-NEXT:    [[P29:%.*]] = select i1 [[P11]], i32 -720, i32 -80
-; MAX-COST-NEXT:    [[TMP3:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP2]])
-; MAX-COST-NEXT:    [[TMP4:%.*]] = add i32 [[TMP3]], [[P27]]
-; MAX-COST-NEXT:    [[TMP5:%.*]] = add i32 [[TMP4]], [[P29]]
-; MAX-COST-NEXT:    [[OP_EXTRA:%.*]] = add i32 [[TMP5]], -5
-; MAX-COST-NEXT:    [[P31:%.*]] = select i1 [[P13]], i32 -720, i32 -80
-; MAX-COST-NEXT:    [[P32:%.*]] = add i32 [[OP_EXTRA]], [[P31]]
-; MAX-COST-NEXT:    [[P33:%.*]] = select i1 [[P15]], i32 -720, i32 -80
-; MAX-COST-NEXT:    [[P34]] = add i32 [[P32]], [[P33]]
+; MAX-COST-NEXT:    [[P17:%.*]] = phi i32 [ [[OP_EXTRA:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
+; MAX-COST-NEXT:    [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> <i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720>, <8 x i32> <i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80>
+; MAX-COST-NEXT:    [[TMP3:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP2]])
+; MAX-COST-NEXT:    [[OP_EXTRA]] = add i32 [[TMP3]], -5
  ; MAX-COST-NEXT:    br label [[FOR_BODY]]
  ;
  entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_exceed_scheduling.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_exceed_scheduling.ll

index 7b6e6ca..56f6b7b 100644 (file)
--- a/llvm/test/Transforms/SLPVectorizer/X86/crash_exceed_scheduling.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_exceed_scheduling.ll
@@ -1,5 +1,5 @@
  ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -slp-vectorizer -slp-min-tree-size=2 -slp-threshold=-1000 -slp-max-look-ahead-depth=1 -slp-schedule-budget=27 -S -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
+; RUN: opt < %s -slp-vectorizer -slp-min-tree-size=2 -slp-threshold=-1000 -slp-max-look-ahead-depth=1 -S -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
  
  define void @exceed(double %0, double %1) {
  ; CHECK-LABEL: @exceed(
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extract-shuffle-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/extract-shuffle-inseltpoison.ll

index 293dcc0..a1b5f29 100644 (file)
--- a/llvm/test/Transforms/SLPVectorizer/X86/extract-shuffle-inseltpoison.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/extract-shuffle-inseltpoison.ll
@@ -1,5 +1,5 @@
  ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -slp-vectorizer -S -o - -mtriple=x86_64-unknown-linux -mcpu=bdver2 -slp-schedule-budget=1 | FileCheck %s
+; RUN: opt < %s -slp-vectorizer -S -o - -mtriple=x86_64-unknown-linux -mcpu=bdver2 | FileCheck %s
  
  define <2 x i8> @g(<2 x i8> %x, <2 x i8> %y) {
  ; CHECK-LABEL: @g(
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extract-shuffle.ll b/llvm/test/Transforms/SLPVectorizer/X86/extract-shuffle.ll

index 61f25dd..ecffc1a 100644 (file)
--- a/llvm/test/Transforms/SLPVectorizer/X86/extract-shuffle.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/extract-shuffle.ll
@@ -1,5 +1,5 @@
  ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -slp-vectorizer -S -o - -mtriple=x86_64-unknown-linux -mcpu=bdver2 -slp-schedule-budget=1 | FileCheck %s
+; RUN: opt < %s -slp-vectorizer -S -o - -mtriple=x86_64-unknown-linux -mcpu=bdver2 | FileCheck %s
  
  define <2 x i8> @g(<2 x i8> %x, <2 x i8> %y) {
  ; CHECK-LABEL: @g(
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/schedule_budget.ll b/llvm/test/Transforms/SLPVectorizer/X86/schedule_budget.ll

index 3e4cfe6..fa55347 100644 (file)
--- a/llvm/test/Transforms/SLPVectorizer/X86/schedule_budget.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/schedule_budget.ll
@@ -1,5 +1,5 @@
  ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -basic-aa -slp-vectorizer -S  -slp-schedule-budget=16 -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
+; RUN: opt < %s -basic-aa -slp-vectorizer -S  -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
  
  target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
  target triple = "x86_64-apple-macosx10.9.0"
@@ -15,6 +15,9 @@ define void @test(float * %a, float * %b, float * %c, float * %d) {
  ; CHECK-NEXT:    [[A1:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 1
  ; CHECK-NEXT:    [[A2:%.*]] = getelementptr inbounds float, float* [[A]], i64 2
  ; CHECK-NEXT:    [[A3:%.*]] = getelementptr inbounds float, float* [[A]], i64 3
+; CHECK-NEXT:    [[B1:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 1
+; CHECK-NEXT:    [[B2:%.*]] = getelementptr inbounds float, float* [[B]], i64 2
+; CHECK-NEXT:    [[B3:%.*]] = getelementptr inbounds float, float* [[B]], i64 3
  ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast float* [[A]] to <4 x float>*
  ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4
  ; CHECK-NEXT:    call void @unknown()
@@ -45,9 +48,6 @@ define void @test(float * %a, float * %b, float * %c, float * %d) {
  ; CHECK-NEXT:    call void @unknown()
  ; CHECK-NEXT:    call void @unknown()
  ; CHECK-NEXT:    call void @unknown()
-; CHECK-NEXT:    [[B1:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 1
-; CHECK-NEXT:    [[B2:%.*]] = getelementptr inbounds float, float* [[B]], i64 2
-; CHECK-NEXT:    [[B3:%.*]] = getelementptr inbounds float, float* [[B]], i64 3
  ; CHECK-NEXT:    [[TMP2:%.*]] = bitcast float* [[B]] to <4 x float>*
  ; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float>* [[TMP2]], align 4
  ; CHECK-NEXT:    [[C1:%.*]] = getelementptr inbounds float, float* [[C:%.*]], i64 1
author	Philip Reames <listmail@philipreames.com>
	Wed, 23 Feb 2022 04:00:43 +0000 (20:00 -0800)
committer	Philip Reames <listmail@philipreames.com>
	Wed, 23 Feb 2022 16:27:45 +0000 (08:27 -0800)
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp		patch \| blob \| history
llvm/test/Transforms/SLPVectorizer/AArch64/gather-root.ll		patch \| blob \| history
llvm/test/Transforms/SLPVectorizer/X86/crash_exceed_scheduling.ll		patch \| blob \| history
llvm/test/Transforms/SLPVectorizer/X86/extract-shuffle-inseltpoison.ll		patch \| blob \| history
llvm/test/Transforms/SLPVectorizer/X86/extract-shuffle.ll		patch \| blob \| history
llvm/test/Transforms/SLPVectorizer/X86/schedule_budget.ll		patch \| blob \| history