Revert "[PGO][PGSO] Add profile guided size optimization to loop vectorization legality."

author Arthur Eubanks <aeubanks@google.com>

Fri, 17 Jul 2020 23:47:41 +0000 (16:47 -0700)

committer Arthur Eubanks <aeubanks@google.com>

Fri, 17 Jul 2020 23:47:41 +0000 (16:47 -0700)
author Arthur Eubanks <aeubanks@google.com>
Fri, 17 Jul 2020 23:47:41 +0000 (16:47 -0700)
committer Arthur Eubanks <aeubanks@google.com>
Fri, 17 Jul 2020 23:47:41 +0000 (16:47 -0700)
diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h

index 7235aa5861120d1320ae8b05348f4e29f6fa850f..c6c3450f77608a30f3dcb3508477a5e054f37e8f 100644 (file)
--- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
+++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
@@ -202,10 +202,9 @@ public:
        Function *F, std::function<const LoopAccessInfo &(Loop &)> *GetLAA,
        LoopInfo *LI, OptimizationRemarkEmitter *ORE,
        LoopVectorizationRequirements *R, LoopVectorizeHints *H, DemandedBits *DB,
-      AssumptionCache *AC, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI)
+      AssumptionCache *AC)
        : TheLoop(L), LI(LI), PSE(PSE), TTI(TTI), TLI(TLI), DT(DT),
-        GetLAA(GetLAA), ORE(ORE), Requirements(R), Hints(H), DB(DB), AC(AC),
-        BFI(BFI), PSI(PSI) {}
+        GetLAA(GetLAA), ORE(ORE), Requirements(R), Hints(H), DB(DB), AC(AC) {}
  
    /// ReductionList contains the reduction descriptors for all
    /// of the reductions that were found in the loop.
@@ -479,10 +478,6 @@ private:
    /// Assume instructions in predicated blocks must be dropped if the CFG gets
    /// flattened.
    SmallPtrSet<Instruction *, 8> ConditionalAssumes;
-
-  /// BFI and PSI are used to check for profile guided size optimizations.
-  BlockFrequencyInfo *BFI;
-  ProfileSummaryInfo *PSI;
  };
  
  } // namespace llvm
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp

index 120b544808bed40a032dbf8270c6025c5ab185da..23613775d896d10e59f32385087a28ea86558645 100644 (file)
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -20,7 +20,6 @@
  #include "llvm/Analysis/VectorUtils.h"
  #include "llvm/IR/IntrinsicInst.h"
  #include "llvm/IR/PatternMatch.h"
-#include "llvm/Transforms/Utils/SizeOpts.h"
  #include "llvm/Transforms/Vectorize/LoopVectorize.h"
  
  using namespace llvm;
@@ -413,11 +412,7 @@ int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
    const ValueToValueMap &Strides =
        getSymbolicStrides() ? *getSymbolicStrides() : ValueToValueMap();
  
-  Function *F = TheLoop->getHeader()->getParent();
-  bool OptForSize = F->hasOptSize() ||
-                    llvm::shouldOptimizeForSize(TheLoop->getHeader(), PSI, BFI,
-                                                PGSOQueryType::IRPass);
-  bool CanAddPredicate = !OptForSize;
+  bool CanAddPredicate = !TheLoop->getHeader()->getParent()->hasOptSize();
    int Stride = getPtrStride(PSE, Ptr, TheLoop, Strides, CanAddPredicate, false);
    if (Stride == 1 || Stride == -1)
      return Stride;
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

index e9e0ccb8b2ee7dba575a2c6a6e116848e9d3e3bb..7f13a689cedbd1c5e45194c73b379c5283987e06 100644 (file)
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -395,13 +395,11 @@ public:
                        const TargetTransformInfo *TTI, AssumptionCache *AC,
                        OptimizationRemarkEmitter *ORE, unsigned VecWidth,
                        unsigned UnrollFactor, LoopVectorizationLegality *LVL,
-                      LoopVectorizationCostModel *CM, BlockFrequencyInfo *BFI,
-                      ProfileSummaryInfo *PSI)
+                      LoopVectorizationCostModel *CM)
        : OrigLoop(OrigLoop), PSE(PSE), LI(LI), DT(DT), TLI(TLI), TTI(TTI),
          AC(AC), ORE(ORE), VF(VecWidth), UF(UnrollFactor),
          Builder(PSE.getSE()->getContext()),
-        VectorLoopValueMap(UnrollFactor, VecWidth), Legal(LVL), Cost(CM),
-        BFI(BFI), PSI(PSI) {}
+        VectorLoopValueMap(UnrollFactor, VecWidth), Legal(LVL), Cost(CM) {}
    virtual ~InnerLoopVectorizer() = default;
  
    /// Create a new empty loop. Unlink the old loop and connect the new one.
@@ -781,10 +779,6 @@ protected:
    // Vector of original scalar PHIs whose corresponding widened PHIs need to be
    // fixed up at the end of vector code generation.
    SmallVector<PHINode *, 8> OrigPHIsToFix;
-
-  /// BFI and PSI are used to check for profile guided size optimizations.
-  BlockFrequencyInfo *BFI;
-  ProfileSummaryInfo *PSI;
  };
  
  class InnerLoopUnroller : public InnerLoopVectorizer {
@@ -795,10 +789,9 @@ public:
                      const TargetTransformInfo *TTI, AssumptionCache *AC,
                      OptimizationRemarkEmitter *ORE, unsigned UnrollFactor,
                      LoopVectorizationLegality *LVL,
-                    LoopVectorizationCostModel *CM, BlockFrequencyInfo *BFI,
-                    ProfileSummaryInfo *PSI)
+                    LoopVectorizationCostModel *CM)
        : InnerLoopVectorizer(OrigLoop, PSE, LI, DT, TLI, TTI, AC, ORE, 1,
-                            UnrollFactor, LVL, CM, BFI, PSI) {}
+                            UnrollFactor, LVL, CM) {}
  
  private:
    Value *getBroadcastInstrs(Value *V) override;
@@ -2761,9 +2754,7 @@ void InnerLoopVectorizer::emitSCEVChecks(Loop *L, BasicBlock *Bypass) {
      if (C->isZero())
        return;
  
-  assert(!(SCEVCheckBlock->getParent()->hasOptSize() ||
-           llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI,
-                                       PGSOQueryType::IRPass)) &&
+  assert(!SCEVCheckBlock->getParent()->hasOptSize() &&
           "Cannot SCEV check stride or overflow when optimizing for size");
  
    SCEVCheckBlock->setName("vector.scevcheck");
@@ -2809,9 +2800,7 @@ void InnerLoopVectorizer::emitMemRuntimeChecks(Loop *L, BasicBlock *Bypass) {
    assert(MemRuntimeCheck && "no RT checks generated although RtPtrChecking "
                              "claimed checks are required");
  
-  if (MemCheckBlock->getParent()->hasOptSize() ||
-      llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI,
-                                  PGSOQueryType::IRPass)) {
+  if (MemCheckBlock->getParent()->hasOptSize()) {
      assert(Cost->Hints->getForce() == LoopVectorizeHints::FK_Enabled &&
             "Cannot emit memory checks when optimizing for size, unless forced "
             "to vectorize.");
@@ -7740,7 +7729,7 @@ static bool processLoopInVPlanNativePath(
    LVP.setBestPlan(VF.Width, 1);
  
    InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, VF.Width, 1, LVL,
-                         &CM, BFI, PSI);
+                         &CM);
    LLVM_DEBUG(dbgs() << "Vectorizing outer loop in \""
                      << L->getHeader()->getParent()->getName() << "\"\n");
    LVP.executePlan(LB, DT);
@@ -7804,7 +7793,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
    // Check if it is legal to vectorize the loop.
    LoopVectorizationRequirements Requirements(*ORE);
    LoopVectorizationLegality LVL(L, PSE, DT, TTI, TLI, AA, F, GetLAA, LI, ORE,
-                                &Requirements, &Hints, DB, AC, BFI, PSI);
+                                &Requirements, &Hints, DB, AC);
    if (!LVL.canVectorize(EnableVPlanNativePath)) {
      LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Cannot prove legality.\n");
      Hints.emitRemarkWithHints();
@@ -8004,8 +7993,8 @@ bool LoopVectorizePass::processLoop(Loop *L) {
      assert(IC > 1 && "interleave count should not be 1 or 0");
      // If we decided that it is not legal to vectorize the loop, then
      // interleave it.
-    InnerLoopUnroller Unroller(L, PSE, LI, DT, TLI, TTI, AC, ORE, IC, &LVL, &CM,
-                               BFI, PSI);
+    InnerLoopUnroller Unroller(L, PSE, LI, DT, TLI, TTI, AC, ORE, IC, &LVL,
+                               &CM);
      LVP.executePlan(Unroller, DT);
  
      ORE->emit([&]() {
@@ -8017,7 +8006,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
    } else {
      // If we decided that it is *legal* to vectorize the loop, then do it.
      InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, VF.Width, IC,
-                           &LVL, &CM, BFI, PSI);
+                           &LVL, &CM);
      LVP.executePlan(LB, DT);
      ++LoopsVectorized;
  
diff --git a/llvm/test/Transforms/LoopVectorize/optsize.ll b/llvm/test/Transforms/LoopVectorize/optsize.ll

index b4233e6751cbc047ff3f641f8021675d4131ff96..0e88f362746fba645c41a2a07eee666f68d7c872 100644 (file)
--- a/llvm/test/Transforms/LoopVectorize/optsize.ll
+++ b/llvm/test/Transforms/LoopVectorize/optsize.ll
@@ -121,38 +121,6 @@ for.body29:
    br i1 %cmp26, label %for.body29, label %for.cond.cleanup28
  }
  
-define void @pr43371_pgso() !prof !14 {
-;
-; CHECK-LABEL: @pr43371_pgso
-; CHECK-NOT:   vector.scevcheck
-;
-; We do not want to generate SCEV predicates when optimising for size, because
-; that will lead to extra code generation such as the SCEV overflow runtime
-; checks. Not generating SCEV predicates can still result in vectorisation as
-; the non-consecutive loads/stores can be scalarized:
-;
-; CHECK: vector.body:
-; CHECK: store i16 0, i16* %{{.*}}, align 1
-; CHECK: store i16 0, i16* %{{.*}}, align 1
-; CHECK: br i1 {{.*}}, label %vector.body
-;
-entry:
-  br label %for.body29
-
-for.cond.cleanup28:
-  unreachable
-
-for.body29:
-  %i24.0170 = phi i16 [ 0, %entry], [ %inc37, %for.body29]
-  %add33 = add i16 undef, %i24.0170
-  %idxprom34 = zext i16 %add33 to i32
-  %arrayidx35 = getelementptr [2592 x i16], [2592 x i16] * @cm_array, i32 0, i32 %idxprom34
-  store i16 0, i16 * %arrayidx35, align 1
-  %inc37 = add i16 %i24.0170, 1
-  %cmp26 = icmp ult i16 %inc37, 756
-  br i1 %cmp26, label %for.body29, label %for.cond.cleanup28
-}
-
  ; PR45526: don't vectorize with fold-tail if first-order-recurrence is live-out.
  ;
  define i32 @pr45526() optsize {
@@ -186,37 +154,6 @@ exit:
    ret i32 %for
  }
  
-define i32 @pr45526_pgso() !prof !14 {
-;
-; CHECK-LABEL: @pr45526_pgso
-; CHECK-NEXT: entry:
-; CHECK-NEXT:   br label %loop
-; CHECK-EMPTY:
-; CHECK-NEXT: loop:
-; CHECK-NEXT:   %piv = phi i32 [ 0, %entry ], [ %pivPlus1, %loop ]
-; CHECK-NEXT:   %for = phi i32 [ 5, %entry ], [ %pivPlus1, %loop ]
-; CHECK-NEXT:   %pivPlus1 = add nuw nsw i32 %piv, 1
-; CHECK-NEXT:   %cond = icmp ult i32 %piv, 510
-; CHECK-NEXT:   br i1 %cond, label %loop, label %exit
-; CHECK-EMPTY:
-; CHECK-NEXT: exit:
-; CHECK-NEXT:   %for.lcssa = phi i32 [ %for, %loop ]
-; CHECK-NEXT:   ret i32 %for.lcssa
-;
-entry:
-  br label %loop
-
-loop:
-  %piv = phi i32 [ 0, %entry ], [ %pivPlus1, %loop ]
-  %for = phi i32 [ 5, %entry ], [ %pivPlus1, %loop ]
-  %pivPlus1 = add nuw nsw i32 %piv, 1
-  %cond = icmp ult i32 %piv, 510
-  br i1 %cond, label %loop, label %exit
-
-exit:
-  ret i32 %for
-}
-
  ; PR46228: Vectorize w/o versioning for unit stride under optsize and enabled
  ; vectorization.
  
@@ -253,7 +190,7 @@ define void @stride1(i16* noalias %B, i32 %BStride) optsize {
  ; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 2
  ; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
  ; CHECK-NEXT:    [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1026
-; CHECK-NEXT:    br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !21
+; CHECK-NEXT:    br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !19
  ; CHECK:       middle.block:
  ; CHECK-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
  ; CHECK:       scalar.ph:
author	Arthur Eubanks <aeubanks@google.com>
	Fri, 17 Jul 2020 23:47:41 +0000 (16:47 -0700)
committer	Arthur Eubanks <aeubanks@google.com>
	Fri, 17 Jul 2020 23:47:41 +0000 (16:47 -0700)
llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h		patch \| blob \| history
llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp		patch \| blob \| history
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp		patch \| blob \| history
llvm/test/Transforms/LoopVectorize/optsize.ll		patch \| blob \| history