From dd61726d5bf30ee603fd2c471401d9b8107958d9 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Mon, 19 Sep 2022 14:44:39 +0200 Subject: [PATCH] Revert "[SimplifyCFG] accumulate bonus insts cost" This reverts commit e5581df60a35fffb0c69589777e4e126c849405f. This causes major compile-time regressions, about 2-3% end-to-end on CTMark. --- llvm/include/llvm/Transforms/Utils/Local.h | 27 +--------- llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp | 12 ++--- llvm/lib/Transforms/Utils/LoopSimplify.cpp | 3 +- llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 63 +++++++--------------- llvm/test/Transforms/LoopUnroll/peel-loop-inner.ll | 2 +- .../PhaseOrdering/X86/vector-reductions-logical.ll | 2 +- .../Transforms/SimplifyCFG/branch-fold-multiple.ll | 21 +++----- .../SimplifyCFG/branch-fold-threshold.ll | 8 +-- .../fold-branch-to-common-dest-two-preds-cost.ll | 4 +- 9 files changed, 41 insertions(+), 101 deletions(-) diff --git a/llvm/include/llvm/Transforms/Utils/Local.h b/llvm/include/llvm/Transforms/Utils/Local.h index 1397420..4db697c 100644 --- a/llvm/include/llvm/Transforms/Utils/Local.h +++ b/llvm/include/llvm/Transforms/Utils/Local.h @@ -16,7 +16,6 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/IR/Dominators.h" -#include "llvm/IR/ValueMap.h" #include "llvm/Support/CommandLine.h" #include "llvm/Transforms/Utils/SimplifyCFGOptions.h" #include @@ -165,26 +164,6 @@ bool TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB, /// values, but instcombine orders them so it usually won't matter. bool EliminateDuplicatePHINodes(BasicBlock *BB); -/// Class to track cost of simplify CFG transformations. -class SimplifyCFGCostTracker { - /// Number of bonus instructions due to folding branches into predecessors. - /// E.g. folding - /// if (cond1) return false; - /// if (cond2) return false; - /// return true; - /// into - /// if (cond1 | cond2) return false; - /// return true; - /// In this case cond2 is always executed whereas originally it may be - /// evicted due to early exit of cond1. 'cond2' is called bonus instructions - /// and such bonus instructions could accumulate for unrolled loops, therefore - /// use a value map to accumulate their costs across transformations. - ValueMap NumBonusInsts; - -public: - void updateNumBonusInsts(BasicBlock *Parent, unsigned InstCount); - unsigned getNumBonusInsts(BasicBlock *Parent); -}; /// This function is used to do simplification of a CFG. For example, it /// adjusts branches to branches to eliminate the extra hop, it eliminates /// unreachable basic blocks, and does other peephole optimization of the CFG. @@ -195,8 +174,7 @@ extern cl::opt RequireAndPreserveDomTree; bool simplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI, DomTreeUpdater *DTU = nullptr, const SimplifyCFGOptions &Options = {}, - ArrayRef LoopHeaders = {}, - SimplifyCFGCostTracker *CostTracker = nullptr); + ArrayRef LoopHeaders = {}); /// This function is used to flatten a CFG. For example, it uses parallel-and /// and parallel-or mode to collapse if-conditions and merge if-regions with @@ -206,8 +184,7 @@ bool FlattenCFG(BasicBlock *BB, AAResults *AA = nullptr); /// If this basic block is ONLY a setcc and a branch, and if a predecessor /// branches to us and one of our successors, fold the setcc into the /// predecessor and use logical operations to pick the right destination. -bool FoldBranchToCommonDest(BranchInst *BI, SimplifyCFGCostTracker &CostTracker, - DomTreeUpdater *DTU = nullptr, +bool FoldBranchToCommonDest(BranchInst *BI, llvm::DomTreeUpdater *DTU = nullptr, MemorySSAUpdater *MSSAU = nullptr, const TargetTransformInfo *TTI = nullptr, unsigned BonusInstThreshold = 1); diff --git a/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp index e2646ed..fb2d812 100644 --- a/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp +++ b/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp @@ -221,8 +221,7 @@ static bool tailMergeBlocksWithSimilarFunctionTerminators(Function &F, /// iterating until no more changes are made. static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI, DomTreeUpdater *DTU, - const SimplifyCFGOptions &Options, - SimplifyCFGCostTracker &CostTracker) { + const SimplifyCFGOptions &Options) { bool Changed = false; bool LocalChange = true; @@ -253,7 +252,7 @@ static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI, while (BBIt != F.end() && DTU->isBBPendingDeletion(&*BBIt)) ++BBIt; } - if (simplifyCFG(&BB, TTI, DTU, Options, LoopHeaders, &CostTracker)) { + if (simplifyCFG(&BB, TTI, DTU, Options, LoopHeaders)) { LocalChange = true; ++NumSimpl; } @@ -267,13 +266,11 @@ static bool simplifyFunctionCFGImpl(Function &F, const TargetTransformInfo &TTI, DominatorTree *DT, const SimplifyCFGOptions &Options) { DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager); - SimplifyCFGCostTracker CostTracker; bool EverChanged = removeUnreachableBlocks(F, DT ? &DTU : nullptr); EverChanged |= tailMergeBlocksWithSimilarFunctionTerminators(F, DT ? &DTU : nullptr); - EverChanged |= - iterativelySimplifyCFG(F, TTI, DT ? &DTU : nullptr, Options, CostTracker); + EverChanged |= iterativelySimplifyCFG(F, TTI, DT ? &DTU : nullptr, Options); // If neither pass changed anything, we're done. if (!EverChanged) return false; @@ -287,8 +284,7 @@ static bool simplifyFunctionCFGImpl(Function &F, const TargetTransformInfo &TTI, return true; do { - EverChanged = iterativelySimplifyCFG(F, TTI, DT ? &DTU : nullptr, Options, - CostTracker); + EverChanged = iterativelySimplifyCFG(F, TTI, DT ? &DTU : nullptr, Options); EverChanged |= removeUnreachableBlocks(F, DT ? &DTU : nullptr); } while (EverChanged); diff --git a/llvm/lib/Transforms/Utils/LoopSimplify.cpp b/llvm/lib/Transforms/Utils/LoopSimplify.cpp index 15f9a96..16085f4 100644 --- a/llvm/lib/Transforms/Utils/LoopSimplify.cpp +++ b/llvm/lib/Transforms/Utils/LoopSimplify.cpp @@ -480,7 +480,6 @@ static bool simplifyOneLoop(Loop *L, SmallVectorImpl &Worklist, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, AssumptionCache *AC, MemorySSAUpdater *MSSAU, bool PreserveLCSSA) { - SimplifyCFGCostTracker CostTracker; bool Changed = false; if (MSSAU && VerifyMemorySSA) MSSAU->getMemorySSA()->verifyMemorySSA(); @@ -667,7 +666,7 @@ ReprocessLoop: // The block has now been cleared of all instructions except for // a comparison and a conditional branch. SimplifyCFG may be able // to fold it now. - if (!FoldBranchToCommonDest(BI, CostTracker, /*DTU=*/nullptr, MSSAU)) + if (!FoldBranchToCommonDest(BI, /*DTU=*/nullptr, MSSAU)) continue; // Success. The block is now dead, so remove it from the loop, diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 755f4be..fcdd858 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -207,21 +207,6 @@ STATISTIC(NumInvokes, STATISTIC(NumInvokesMerged, "Number of invokes that were merged together"); STATISTIC(NumInvokeSetsFormed, "Number of invoke sets that were formed"); -namespace llvm { - -void SimplifyCFGCostTracker::updateNumBonusInsts(BasicBlock *BB, - unsigned InstCount) { - auto Loc = NumBonusInsts.find(BB); - if (Loc == NumBonusInsts.end()) - Loc = NumBonusInsts.insert({BB, 0}).first; - Loc->second = Loc->second + InstCount; -} -unsigned SimplifyCFGCostTracker::getNumBonusInsts(BasicBlock *BB) { - return NumBonusInsts.lookup(BB); -} - -} // namespace llvm - namespace { // The first field contains the value that the switch produces when a certain @@ -258,10 +243,6 @@ class SimplifyCFGOpt { ArrayRef LoopHeaders; const SimplifyCFGOptions &Options; bool Resimplify; - // Accumulates number of bonus instructions due to merging basic blocks - // of common destination. - SimplifyCFGCostTracker *CostTracker; - SimplifyCFGCostTracker LocalCostTracker; Value *isValueEqualityComparison(Instruction *TI); BasicBlock *GetValueEqualityComparisonCases( @@ -305,10 +286,8 @@ class SimplifyCFGOpt { public: SimplifyCFGOpt(const TargetTransformInfo &TTI, DomTreeUpdater *DTU, const DataLayout &DL, ArrayRef LoopHeaders, - const SimplifyCFGOptions &Opts, - SimplifyCFGCostTracker *CostTracker_) - : TTI(TTI), DTU(DTU), DL(DL), LoopHeaders(LoopHeaders), Options(Opts), - CostTracker(CostTracker_ ? CostTracker_ : &LocalCostTracker) { + const SimplifyCFGOptions &Opts) + : TTI(TTI), DTU(DTU), DL(DL), LoopHeaders(LoopHeaders), Options(Opts) { assert((!DTU || !DTU->hasPostDomTree()) && "SimplifyCFG is not yet capable of maintaining validity of a " "PostDomTree, so don't ask for it."); @@ -3645,9 +3624,8 @@ static bool isVectorOp(Instruction &I) { /// If this basic block is simple enough, and if a predecessor branches to us /// and one of our successors, fold the block into the predecessor and use /// logical operations to pick the right destination. -bool llvm::FoldBranchToCommonDest(BranchInst *BI, - SimplifyCFGCostTracker &CostTracker, - DomTreeUpdater *DTU, MemorySSAUpdater *MSSAU, +bool llvm::FoldBranchToCommonDest(BranchInst *BI, DomTreeUpdater *DTU, + MemorySSAUpdater *MSSAU, const TargetTransformInfo *TTI, unsigned BonusInstThreshold) { // If this block ends with an unconditional branch, @@ -3719,6 +3697,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, // as "bonus instructions", and only allow this transformation when the // number of the bonus instructions we'll need to create when cloning into // each predecessor does not exceed a certain threshold. + unsigned NumBonusInsts = 0; bool SawVectorOp = false; const unsigned PredCount = Preds.size(); for (Instruction &I : *BB) { @@ -3737,13 +3716,12 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, // predecessor. Ignore free instructions. if (!TTI || TTI->getInstructionCost(&I, CostKind) != TargetTransformInfo::TCC_Free) { - for (auto PredBB : Preds) { - CostTracker.updateNumBonusInsts(PredBB, PredCount); - // Early exits once we reach the limit. - if (CostTracker.getNumBonusInsts(PredBB) > - BonusInstThreshold * BranchFoldToCommonDestVectorMultiplier) - return false; - } + NumBonusInsts += PredCount; + + // Early exits once we reach the limit. + if (NumBonusInsts > + BonusInstThreshold * BranchFoldToCommonDestVectorMultiplier) + return false; } auto IsBCSSAUse = [BB, &I](Use &U) { @@ -3757,12 +3735,10 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, if (!all_of(I.uses(), IsBCSSAUse)) return false; } - for (auto PredBB : Preds) { - if (CostTracker.getNumBonusInsts(PredBB) > - BonusInstThreshold * - (SawVectorOp ? BranchFoldToCommonDestVectorMultiplier : 1)) - return false; - } + if (NumBonusInsts > + BonusInstThreshold * + (SawVectorOp ? BranchFoldToCommonDestVectorMultiplier : 1)) + return false; // Ok, we have the budget. Perform the transformation. for (BasicBlock *PredBlock : Preds) { @@ -6913,7 +6889,7 @@ bool SimplifyCFGOpt::simplifyUncondBranch(BranchInst *BI, // branches to us and our successor, fold the comparison into the // predecessor and use logical operations to update the incoming value // for PHI nodes in common successor. - if (FoldBranchToCommonDest(BI, *CostTracker, DTU, /*MSSAU=*/nullptr, &TTI, + if (FoldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI, Options.BonusInstThreshold)) return requestResimplify(); return false; @@ -6982,7 +6958,7 @@ bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { // If this basic block is ONLY a compare and a branch, and if a predecessor // branches to us and one of our successors, fold the comparison into the // predecessor and use logical operations to pick the right destination. - if (FoldBranchToCommonDest(BI, *CostTracker, DTU, /*MSSAU=*/nullptr, &TTI, + if (FoldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI, Options.BonusInstThreshold)) return requestResimplify(); @@ -7281,9 +7257,8 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) { bool llvm::simplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI, DomTreeUpdater *DTU, const SimplifyCFGOptions &Options, - ArrayRef LoopHeaders, - SimplifyCFGCostTracker *CostTracker) { + ArrayRef LoopHeaders) { return SimplifyCFGOpt(TTI, DTU, BB->getModule()->getDataLayout(), LoopHeaders, - Options, CostTracker) + Options) .run(BB); } diff --git a/llvm/test/Transforms/LoopUnroll/peel-loop-inner.ll b/llvm/test/Transforms/LoopUnroll/peel-loop-inner.ll index 40493b4..fa39b77 100644 --- a/llvm/test/Transforms/LoopUnroll/peel-loop-inner.ll +++ b/llvm/test/Transforms/LoopUnroll/peel-loop-inner.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -S -passes='require,loop-unroll,simplifycfg,instcombine' -unroll-force-peel-count=3 -verify-dom-info | FileCheck %s +; RUN: opt < %s -S -passes='require,loop-unroll,simplifycfg,instcombine' -unroll-force-peel-count=3 -verify-dom-info | FileCheck %s define void @basic(i32 %K, i32 %N) { ; CHECK-LABEL: @basic( diff --git a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-logical.ll b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-logical.ll index c126dbc..05ef21d 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-logical.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-logical.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -bonus-inst-threshold=4 -O2 -S < %s | FileCheck %s +; RUN: opt -O2 -S < %s | FileCheck %s target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64--" diff --git a/llvm/test/Transforms/SimplifyCFG/branch-fold-multiple.ll b/llvm/test/Transforms/SimplifyCFG/branch-fold-multiple.ll index f332cf8..fd40063 100644 --- a/llvm/test/Transforms/SimplifyCFG/branch-fold-multiple.ll +++ b/llvm/test/Transforms/SimplifyCFG/branch-fold-multiple.ll @@ -3,12 +3,9 @@ %struct.S = type { [4 x i32] } -; Check the second basic block is folded into the first basic block -; since it has one bonus intruction. The third basic block is not -; folded into the first basic block since the accumulated bonus -; instructions will exceed the default threshold of 1. The fourth basic -; block is foled into the third basic block since the accumulated -; bonus instruction cost is 1. +; Check the second, third, and fourth basic blocks are folded into +; the first basic block since each has one bonus intruction, which +; does not exceed the default bouns instruction threshold of 1. define i1 @test1(i32 %0, i32 %1, i32 %2, i32 %3) { ; CHECK-LABEL: @test1( @@ -18,18 +15,14 @@ define i1 @test1(i32 %0, i32 %1, i32 %2, i32 %3) { ; CHECK-NEXT: [[MUL1:%.*]] = mul i32 [[TMP1:%.*]], [[TMP1]] ; CHECK-NEXT: [[CMP2_1:%.*]] = icmp sgt i32 [[MUL1]], 0 ; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[CMP2]], i1 true, i1 [[CMP2_1]] -; CHECK-NEXT: br i1 [[OR_COND]], label [[CLEANUP:%.*]], label [[FOR_COND_1:%.*]] -; CHECK: for.cond.1: ; CHECK-NEXT: [[MUL2:%.*]] = mul i32 [[TMP2:%.*]], [[TMP2]] ; CHECK-NEXT: [[CMP2_2:%.*]] = icmp sgt i32 [[MUL2]], 0 +; CHECK-NEXT: [[OR_COND1:%.*]] = select i1 [[OR_COND]], i1 true, i1 [[CMP2_2]] ; CHECK-NEXT: [[MUL3:%.*]] = mul i32 [[TMP3:%.*]], [[TMP3]] ; CHECK-NEXT: [[CMP2_3:%.*]] = icmp sgt i32 [[MUL3]], 0 -; CHECK-NEXT: [[OR_COND1:%.*]] = select i1 [[CMP2_2]], i1 true, i1 [[CMP2_3]] -; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[OR_COND1]], i1 false, i1 true -; CHECK-NEXT: br label [[CLEANUP]] -; CHECK: cleanup: -; CHECK-NEXT: [[CMP:%.*]] = phi i1 [ false, [[ENTRY:%.*]] ], [ [[SPEC_SELECT]], [[FOR_COND_1]] ] -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: [[OR_COND2:%.*]] = select i1 [[OR_COND1]], i1 true, i1 [[CMP2_3]] +; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[OR_COND2]], i1 false, i1 true +; CHECK-NEXT: ret i1 [[SPEC_SELECT]] ; entry: %mul0 = mul i32 %0, %0 diff --git a/llvm/test/Transforms/SimplifyCFG/branch-fold-threshold.ll b/llvm/test/Transforms/SimplifyCFG/branch-fold-threshold.ll index 0482fa5..c1fd267 100644 --- a/llvm/test/Transforms/SimplifyCFG/branch-fold-threshold.ll +++ b/llvm/test/Transforms/SimplifyCFG/branch-fold-threshold.ll @@ -1,9 +1,9 @@ ; RUN: opt %s -simplifycfg -simplifycfg-require-and-preserve-domtree=1 -S | FileCheck %s --check-prefix=NORMAL -; RUN: opt %s -simplifycfg -simplifycfg-require-and-preserve-domtree=1 -S -bonus-inst-threshold=3 | FileCheck %s --check-prefix=AGGRESSIVE -; RUN: opt %s -simplifycfg -simplifycfg-require-and-preserve-domtree=1 -S -bonus-inst-threshold=6 | FileCheck %s --check-prefix=WAYAGGRESSIVE +; RUN: opt %s -simplifycfg -simplifycfg-require-and-preserve-domtree=1 -S -bonus-inst-threshold=2 | FileCheck %s --check-prefix=AGGRESSIVE +; RUN: opt %s -simplifycfg -simplifycfg-require-and-preserve-domtree=1 -S -bonus-inst-threshold=4 | FileCheck %s --check-prefix=WAYAGGRESSIVE ; RUN: opt %s -passes=simplifycfg -S | FileCheck %s --check-prefix=NORMAL -; RUN: opt %s -passes='simplifycfg' -S | FileCheck %s --check-prefix=AGGRESSIVE -; RUN: opt %s -passes='simplifycfg' -S | FileCheck %s --check-prefix=WAYAGGRESSIVE +; RUN: opt %s -passes='simplifycfg' -S | FileCheck %s --check-prefix=AGGRESSIVE +; RUN: opt %s -passes='simplifycfg' -S | FileCheck %s --check-prefix=WAYAGGRESSIVE define i32 @foo(i32 %a, i32 %b, i32 %c, i32 %d, i32* %input) { ; NORMAL-LABEL: @foo( diff --git a/llvm/test/Transforms/SimplifyCFG/fold-branch-to-common-dest-two-preds-cost.ll b/llvm/test/Transforms/SimplifyCFG/fold-branch-to-common-dest-two-preds-cost.ll index 6b8ebd9..71b8ef5c 100644 --- a/llvm/test/Transforms/SimplifyCFG/fold-branch-to-common-dest-two-preds-cost.ll +++ b/llvm/test/Transforms/SimplifyCFG/fold-branch-to-common-dest-two-preds-cost.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -S -simplifycfg -simplifycfg-require-and-preserve-domtree=1 -bonus-inst-threshold=1 | FileCheck --check-prefixes=ALL,THR1 %s -; RUN: opt < %s -S -simplifycfg -simplifycfg-require-and-preserve-domtree=1 -bonus-inst-threshold=3 | FileCheck --check-prefixes=ALL,THR2 %s +; RUN: opt < %s -S -simplifycfg -simplifycfg-require-and-preserve-domtree=1 -bonus-inst-threshold=2 | FileCheck --check-prefixes=ALL,THR2 %s declare void @sideeffect0() declare void @sideeffect1() @@ -10,7 +10,7 @@ declare i1 @gen1() ; Here we'd want to duplicate %v3_adj into two predecessors, ; but -bonus-inst-threshold=1 says that we can only clone it into one. -; With -bonus-inst-threshold=3 we can clone it into both though. +; With -bonus-inst-threshold=2 we can clone it into both though. define void @two_preds_with_extra_op(i8 %v0, i8 %v1, i8 %v2, i8 %v3) { ; THR1-LABEL: @two_preds_with_extra_op( ; THR1-NEXT: entry: -- 2.7.4