From 0dfa4a83fad43f95c90dd67cb23f63baaa907bd1 Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Fri, 17 Jul 2020 16:47:41 -0700 Subject: [PATCH] Revert "[PGO][PGSO] Add profile guided size optimization to loop vectorization legality." This reverts commit 30c382a7c6607a7d898730f8d288768110cdf1d2. See https://crbug.com/1106813. --- .../Vectorize/LoopVectorizationLegality.h | 9 +-- .../Vectorize/LoopVectorizationLegality.cpp | 7 +-- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 33 ++++------- llvm/test/Transforms/LoopVectorize/optsize.ll | 65 +--------------------- 4 files changed, 15 insertions(+), 99 deletions(-) diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h index 7235aa5..c6c3450 100644 --- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h +++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h @@ -202,10 +202,9 @@ public: Function *F, std::function *GetLAA, LoopInfo *LI, OptimizationRemarkEmitter *ORE, LoopVectorizationRequirements *R, LoopVectorizeHints *H, DemandedBits *DB, - AssumptionCache *AC, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI) + AssumptionCache *AC) : TheLoop(L), LI(LI), PSE(PSE), TTI(TTI), TLI(TLI), DT(DT), - GetLAA(GetLAA), ORE(ORE), Requirements(R), Hints(H), DB(DB), AC(AC), - BFI(BFI), PSI(PSI) {} + GetLAA(GetLAA), ORE(ORE), Requirements(R), Hints(H), DB(DB), AC(AC) {} /// ReductionList contains the reduction descriptors for all /// of the reductions that were found in the loop. @@ -479,10 +478,6 @@ private: /// Assume instructions in predicated blocks must be dropped if the CFG gets /// flattened. SmallPtrSet ConditionalAssumes; - - /// BFI and PSI are used to check for profile guided size optimizations. - BlockFrequencyInfo *BFI; - ProfileSummaryInfo *PSI; }; } // namespace llvm diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp index 120b544..2361377 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp @@ -20,7 +20,6 @@ #include "llvm/Analysis/VectorUtils.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/PatternMatch.h" -#include "llvm/Transforms/Utils/SizeOpts.h" #include "llvm/Transforms/Vectorize/LoopVectorize.h" using namespace llvm; @@ -413,11 +412,7 @@ int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) { const ValueToValueMap &Strides = getSymbolicStrides() ? *getSymbolicStrides() : ValueToValueMap(); - Function *F = TheLoop->getHeader()->getParent(); - bool OptForSize = F->hasOptSize() || - llvm::shouldOptimizeForSize(TheLoop->getHeader(), PSI, BFI, - PGSOQueryType::IRPass); - bool CanAddPredicate = !OptForSize; + bool CanAddPredicate = !TheLoop->getHeader()->getParent()->hasOptSize(); int Stride = getPtrStride(PSE, Ptr, TheLoop, Strides, CanAddPredicate, false); if (Stride == 1 || Stride == -1) return Stride; diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index e9e0ccb..7f13a68 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -395,13 +395,11 @@ public: const TargetTransformInfo *TTI, AssumptionCache *AC, OptimizationRemarkEmitter *ORE, unsigned VecWidth, unsigned UnrollFactor, LoopVectorizationLegality *LVL, - LoopVectorizationCostModel *CM, BlockFrequencyInfo *BFI, - ProfileSummaryInfo *PSI) + LoopVectorizationCostModel *CM) : OrigLoop(OrigLoop), PSE(PSE), LI(LI), DT(DT), TLI(TLI), TTI(TTI), AC(AC), ORE(ORE), VF(VecWidth), UF(UnrollFactor), Builder(PSE.getSE()->getContext()), - VectorLoopValueMap(UnrollFactor, VecWidth), Legal(LVL), Cost(CM), - BFI(BFI), PSI(PSI) {} + VectorLoopValueMap(UnrollFactor, VecWidth), Legal(LVL), Cost(CM) {} virtual ~InnerLoopVectorizer() = default; /// Create a new empty loop. Unlink the old loop and connect the new one. @@ -781,10 +779,6 @@ protected: // Vector of original scalar PHIs whose corresponding widened PHIs need to be // fixed up at the end of vector code generation. SmallVector OrigPHIsToFix; - - /// BFI and PSI are used to check for profile guided size optimizations. - BlockFrequencyInfo *BFI; - ProfileSummaryInfo *PSI; }; class InnerLoopUnroller : public InnerLoopVectorizer { @@ -795,10 +789,9 @@ public: const TargetTransformInfo *TTI, AssumptionCache *AC, OptimizationRemarkEmitter *ORE, unsigned UnrollFactor, LoopVectorizationLegality *LVL, - LoopVectorizationCostModel *CM, BlockFrequencyInfo *BFI, - ProfileSummaryInfo *PSI) + LoopVectorizationCostModel *CM) : InnerLoopVectorizer(OrigLoop, PSE, LI, DT, TLI, TTI, AC, ORE, 1, - UnrollFactor, LVL, CM, BFI, PSI) {} + UnrollFactor, LVL, CM) {} private: Value *getBroadcastInstrs(Value *V) override; @@ -2761,9 +2754,7 @@ void InnerLoopVectorizer::emitSCEVChecks(Loop *L, BasicBlock *Bypass) { if (C->isZero()) return; - assert(!(SCEVCheckBlock->getParent()->hasOptSize() || - llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI, - PGSOQueryType::IRPass)) && + assert(!SCEVCheckBlock->getParent()->hasOptSize() && "Cannot SCEV check stride or overflow when optimizing for size"); SCEVCheckBlock->setName("vector.scevcheck"); @@ -2809,9 +2800,7 @@ void InnerLoopVectorizer::emitMemRuntimeChecks(Loop *L, BasicBlock *Bypass) { assert(MemRuntimeCheck && "no RT checks generated although RtPtrChecking " "claimed checks are required"); - if (MemCheckBlock->getParent()->hasOptSize() || - llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI, - PGSOQueryType::IRPass)) { + if (MemCheckBlock->getParent()->hasOptSize()) { assert(Cost->Hints->getForce() == LoopVectorizeHints::FK_Enabled && "Cannot emit memory checks when optimizing for size, unless forced " "to vectorize."); @@ -7740,7 +7729,7 @@ static bool processLoopInVPlanNativePath( LVP.setBestPlan(VF.Width, 1); InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, VF.Width, 1, LVL, - &CM, BFI, PSI); + &CM); LLVM_DEBUG(dbgs() << "Vectorizing outer loop in \"" << L->getHeader()->getParent()->getName() << "\"\n"); LVP.executePlan(LB, DT); @@ -7804,7 +7793,7 @@ bool LoopVectorizePass::processLoop(Loop *L) { // Check if it is legal to vectorize the loop. LoopVectorizationRequirements Requirements(*ORE); LoopVectorizationLegality LVL(L, PSE, DT, TTI, TLI, AA, F, GetLAA, LI, ORE, - &Requirements, &Hints, DB, AC, BFI, PSI); + &Requirements, &Hints, DB, AC); if (!LVL.canVectorize(EnableVPlanNativePath)) { LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Cannot prove legality.\n"); Hints.emitRemarkWithHints(); @@ -8004,8 +7993,8 @@ bool LoopVectorizePass::processLoop(Loop *L) { assert(IC > 1 && "interleave count should not be 1 or 0"); // If we decided that it is not legal to vectorize the loop, then // interleave it. - InnerLoopUnroller Unroller(L, PSE, LI, DT, TLI, TTI, AC, ORE, IC, &LVL, &CM, - BFI, PSI); + InnerLoopUnroller Unroller(L, PSE, LI, DT, TLI, TTI, AC, ORE, IC, &LVL, + &CM); LVP.executePlan(Unroller, DT); ORE->emit([&]() { @@ -8017,7 +8006,7 @@ bool LoopVectorizePass::processLoop(Loop *L) { } else { // If we decided that it is *legal* to vectorize the loop, then do it. InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, VF.Width, IC, - &LVL, &CM, BFI, PSI); + &LVL, &CM); LVP.executePlan(LB, DT); ++LoopsVectorized; diff --git a/llvm/test/Transforms/LoopVectorize/optsize.ll b/llvm/test/Transforms/LoopVectorize/optsize.ll index b4233e6..0e88f36 100644 --- a/llvm/test/Transforms/LoopVectorize/optsize.ll +++ b/llvm/test/Transforms/LoopVectorize/optsize.ll @@ -121,38 +121,6 @@ for.body29: br i1 %cmp26, label %for.body29, label %for.cond.cleanup28 } -define void @pr43371_pgso() !prof !14 { -; -; CHECK-LABEL: @pr43371_pgso -; CHECK-NOT: vector.scevcheck -; -; We do not want to generate SCEV predicates when optimising for size, because -; that will lead to extra code generation such as the SCEV overflow runtime -; checks. Not generating SCEV predicates can still result in vectorisation as -; the non-consecutive loads/stores can be scalarized: -; -; CHECK: vector.body: -; CHECK: store i16 0, i16* %{{.*}}, align 1 -; CHECK: store i16 0, i16* %{{.*}}, align 1 -; CHECK: br i1 {{.*}}, label %vector.body -; -entry: - br label %for.body29 - -for.cond.cleanup28: - unreachable - -for.body29: - %i24.0170 = phi i16 [ 0, %entry], [ %inc37, %for.body29] - %add33 = add i16 undef, %i24.0170 - %idxprom34 = zext i16 %add33 to i32 - %arrayidx35 = getelementptr [2592 x i16], [2592 x i16] * @cm_array, i32 0, i32 %idxprom34 - store i16 0, i16 * %arrayidx35, align 1 - %inc37 = add i16 %i24.0170, 1 - %cmp26 = icmp ult i16 %inc37, 756 - br i1 %cmp26, label %for.body29, label %for.cond.cleanup28 -} - ; PR45526: don't vectorize with fold-tail if first-order-recurrence is live-out. ; define i32 @pr45526() optsize { @@ -186,37 +154,6 @@ exit: ret i32 %for } -define i32 @pr45526_pgso() !prof !14 { -; -; CHECK-LABEL: @pr45526_pgso -; CHECK-NEXT: entry: -; CHECK-NEXT: br label %loop -; CHECK-EMPTY: -; CHECK-NEXT: loop: -; CHECK-NEXT: %piv = phi i32 [ 0, %entry ], [ %pivPlus1, %loop ] -; CHECK-NEXT: %for = phi i32 [ 5, %entry ], [ %pivPlus1, %loop ] -; CHECK-NEXT: %pivPlus1 = add nuw nsw i32 %piv, 1 -; CHECK-NEXT: %cond = icmp ult i32 %piv, 510 -; CHECK-NEXT: br i1 %cond, label %loop, label %exit -; CHECK-EMPTY: -; CHECK-NEXT: exit: -; CHECK-NEXT: %for.lcssa = phi i32 [ %for, %loop ] -; CHECK-NEXT: ret i32 %for.lcssa -; -entry: - br label %loop - -loop: - %piv = phi i32 [ 0, %entry ], [ %pivPlus1, %loop ] - %for = phi i32 [ 5, %entry ], [ %pivPlus1, %loop ] - %pivPlus1 = add nuw nsw i32 %piv, 1 - %cond = icmp ult i32 %piv, 510 - br i1 %cond, label %loop, label %exit - -exit: - ret i32 %for -} - ; PR46228: Vectorize w/o versioning for unit stride under optsize and enabled ; vectorization. @@ -253,7 +190,7 @@ define void @stride1(i16* noalias %B, i32 %BStride) optsize { ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1026 -; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !21 +; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !19 ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -- 2.7.4