#include "llvm/ADT/ArrayRef.h"
#include "llvm/IR/Dominators.h"
+#include "llvm/IR/ValueMap.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/Utils/SimplifyCFGOptions.h"
#include <cstdint>
/// values, but instcombine orders them so it usually won't matter.
bool EliminateDuplicatePHINodes(BasicBlock *BB);
+/// Class to track cost of simplify CFG transformations.
+class SimplifyCFGCostTracker {
+ /// Number of bonus instructions due to folding branches into predecessors.
+ /// E.g. folding
+ /// if (cond1) return false;
+ /// if (cond2) return false;
+ /// return true;
+ /// into
+ /// if (cond1 | cond2) return false;
+ /// return true;
+ /// In this case cond2 is always executed whereas originally it may be
+ /// evicted due to early exit of cond1. 'cond2' is called bonus instructions
+ /// and such bonus instructions could accumulate for unrolled loops, therefore
+ /// use a value map to accumulate their costs across transformations.
+ ValueMap<BasicBlock *, unsigned> NumBonusInsts;
+
+public:
+ void updateNumBonusInsts(BasicBlock *Parent, unsigned InstCount);
+ unsigned getNumBonusInsts(BasicBlock *Parent);
+};
/// This function is used to do simplification of a CFG. For example, it
/// adjusts branches to branches to eliminate the extra hop, it eliminates
/// unreachable basic blocks, and does other peephole optimization of the CFG.
bool simplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI,
DomTreeUpdater *DTU = nullptr,
const SimplifyCFGOptions &Options = {},
- ArrayRef<WeakVH> LoopHeaders = {});
+ ArrayRef<WeakVH> LoopHeaders = {},
+ SimplifyCFGCostTracker *CostTracker = nullptr);
/// This function is used to flatten a CFG. For example, it uses parallel-and
/// and parallel-or mode to collapse if-conditions and merge if-regions with
/// If this basic block is ONLY a setcc and a branch, and if a predecessor
/// branches to us and one of our successors, fold the setcc into the
/// predecessor and use logical operations to pick the right destination.
-bool FoldBranchToCommonDest(BranchInst *BI, llvm::DomTreeUpdater *DTU = nullptr,
+bool FoldBranchToCommonDest(BranchInst *BI, SimplifyCFGCostTracker &CostTracker,
+ DomTreeUpdater *DTU = nullptr,
MemorySSAUpdater *MSSAU = nullptr,
const TargetTransformInfo *TTI = nullptr,
unsigned BonusInstThreshold = 1);
/// iterating until no more changes are made.
static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI,
DomTreeUpdater *DTU,
- const SimplifyCFGOptions &Options) {
+ const SimplifyCFGOptions &Options,
+ SimplifyCFGCostTracker &CostTracker) {
bool Changed = false;
bool LocalChange = true;
while (BBIt != F.end() && DTU->isBBPendingDeletion(&*BBIt))
++BBIt;
}
- if (simplifyCFG(&BB, TTI, DTU, Options, LoopHeaders)) {
+ if (simplifyCFG(&BB, TTI, DTU, Options, LoopHeaders, &CostTracker)) {
LocalChange = true;
++NumSimpl;
}
DominatorTree *DT,
const SimplifyCFGOptions &Options) {
DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
+ SimplifyCFGCostTracker CostTracker;
bool EverChanged = removeUnreachableBlocks(F, DT ? &DTU : nullptr);
EverChanged |=
tailMergeBlocksWithSimilarFunctionTerminators(F, DT ? &DTU : nullptr);
- EverChanged |= iterativelySimplifyCFG(F, TTI, DT ? &DTU : nullptr, Options);
+ EverChanged |=
+ iterativelySimplifyCFG(F, TTI, DT ? &DTU : nullptr, Options, CostTracker);
// If neither pass changed anything, we're done.
if (!EverChanged) return false;
return true;
do {
- EverChanged = iterativelySimplifyCFG(F, TTI, DT ? &DTU : nullptr, Options);
+ EverChanged = iterativelySimplifyCFG(F, TTI, DT ? &DTU : nullptr, Options,
+ CostTracker);
EverChanged |= removeUnreachableBlocks(F, DT ? &DTU : nullptr);
} while (EverChanged);
DominatorTree *DT, LoopInfo *LI,
ScalarEvolution *SE, AssumptionCache *AC,
MemorySSAUpdater *MSSAU, bool PreserveLCSSA) {
+ SimplifyCFGCostTracker CostTracker;
bool Changed = false;
if (MSSAU && VerifyMemorySSA)
MSSAU->getMemorySSA()->verifyMemorySSA();
// The block has now been cleared of all instructions except for
// a comparison and a conditional branch. SimplifyCFG may be able
// to fold it now.
- if (!FoldBranchToCommonDest(BI, /*DTU=*/nullptr, MSSAU))
+ if (!FoldBranchToCommonDest(BI, CostTracker, /*DTU=*/nullptr, MSSAU))
continue;
// Success. The block is now dead, so remove it from the loop,
STATISTIC(NumInvokesMerged, "Number of invokes that were merged together");
STATISTIC(NumInvokeSetsFormed, "Number of invoke sets that were formed");
+namespace llvm {
+
+void SimplifyCFGCostTracker::updateNumBonusInsts(BasicBlock *BB,
+ unsigned InstCount) {
+ auto Loc = NumBonusInsts.find(BB);
+ if (Loc == NumBonusInsts.end())
+ Loc = NumBonusInsts.insert({BB, 0}).first;
+ Loc->second = Loc->second + InstCount;
+}
+unsigned SimplifyCFGCostTracker::getNumBonusInsts(BasicBlock *BB) {
+ return NumBonusInsts.lookup(BB);
+}
+
+} // namespace llvm
+
namespace {
// The first field contains the value that the switch produces when a certain
ArrayRef<WeakVH> LoopHeaders;
const SimplifyCFGOptions &Options;
bool Resimplify;
+ // Accumulates number of bonus instructions due to merging basic blocks
+ // of common destination.
+ SimplifyCFGCostTracker *CostTracker;
+ SimplifyCFGCostTracker LocalCostTracker;
Value *isValueEqualityComparison(Instruction *TI);
BasicBlock *GetValueEqualityComparisonCases(
public:
SimplifyCFGOpt(const TargetTransformInfo &TTI, DomTreeUpdater *DTU,
const DataLayout &DL, ArrayRef<WeakVH> LoopHeaders,
- const SimplifyCFGOptions &Opts)
- : TTI(TTI), DTU(DTU), DL(DL), LoopHeaders(LoopHeaders), Options(Opts) {
+ const SimplifyCFGOptions &Opts,
+ SimplifyCFGCostTracker *CostTracker_)
+ : TTI(TTI), DTU(DTU), DL(DL), LoopHeaders(LoopHeaders), Options(Opts),
+ CostTracker(CostTracker_ ? CostTracker_ : &LocalCostTracker) {
assert((!DTU || !DTU->hasPostDomTree()) &&
"SimplifyCFG is not yet capable of maintaining validity of a "
"PostDomTree, so don't ask for it.");
/// If this basic block is simple enough, and if a predecessor branches to us
/// and one of our successors, fold the block into the predecessor and use
/// logical operations to pick the right destination.
-bool llvm::FoldBranchToCommonDest(BranchInst *BI, DomTreeUpdater *DTU,
- MemorySSAUpdater *MSSAU,
+bool llvm::FoldBranchToCommonDest(BranchInst *BI,
+ SimplifyCFGCostTracker &CostTracker,
+ DomTreeUpdater *DTU, MemorySSAUpdater *MSSAU,
const TargetTransformInfo *TTI,
unsigned BonusInstThreshold) {
// If this block ends with an unconditional branch,
// as "bonus instructions", and only allow this transformation when the
// number of the bonus instructions we'll need to create when cloning into
// each predecessor does not exceed a certain threshold.
- unsigned NumBonusInsts = 0;
bool SawVectorOp = false;
const unsigned PredCount = Preds.size();
for (Instruction &I : *BB) {
// predecessor. Ignore free instructions.
if (!TTI || TTI->getInstructionCost(&I, CostKind) !=
TargetTransformInfo::TCC_Free) {
- NumBonusInsts += PredCount;
-
- // Early exits once we reach the limit.
- if (NumBonusInsts >
- BonusInstThreshold * BranchFoldToCommonDestVectorMultiplier)
- return false;
+ for (auto PredBB : Preds) {
+ CostTracker.updateNumBonusInsts(PredBB, PredCount);
+ // Early exits once we reach the limit.
+ if (CostTracker.getNumBonusInsts(PredBB) >
+ BonusInstThreshold * BranchFoldToCommonDestVectorMultiplier)
+ return false;
+ }
}
auto IsBCSSAUse = [BB, &I](Use &U) {
if (!all_of(I.uses(), IsBCSSAUse))
return false;
}
- if (NumBonusInsts >
- BonusInstThreshold *
- (SawVectorOp ? BranchFoldToCommonDestVectorMultiplier : 1))
- return false;
+ for (auto PredBB : Preds) {
+ if (CostTracker.getNumBonusInsts(PredBB) >
+ BonusInstThreshold *
+ (SawVectorOp ? BranchFoldToCommonDestVectorMultiplier : 1))
+ return false;
+ }
// Ok, we have the budget. Perform the transformation.
for (BasicBlock *PredBlock : Preds) {
// branches to us and our successor, fold the comparison into the
// predecessor and use logical operations to update the incoming value
// for PHI nodes in common successor.
- if (FoldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI,
+ if (FoldBranchToCommonDest(BI, *CostTracker, DTU, /*MSSAU=*/nullptr, &TTI,
Options.BonusInstThreshold))
return requestResimplify();
return false;
// If this basic block is ONLY a compare and a branch, and if a predecessor
// branches to us and one of our successors, fold the comparison into the
// predecessor and use logical operations to pick the right destination.
- if (FoldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI,
+ if (FoldBranchToCommonDest(BI, *CostTracker, DTU, /*MSSAU=*/nullptr, &TTI,
Options.BonusInstThreshold))
return requestResimplify();
bool llvm::simplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI,
DomTreeUpdater *DTU, const SimplifyCFGOptions &Options,
- ArrayRef<WeakVH> LoopHeaders) {
+ ArrayRef<WeakVH> LoopHeaders,
+ SimplifyCFGCostTracker *CostTracker) {
return SimplifyCFGOpt(TTI, DTU, BB->getModule()->getDataLayout(), LoopHeaders,
- Options)
+ Options, CostTracker)
.run(BB);
}
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -S -passes='require<opt-remark-emit>,loop-unroll<peeling;no-runtime>,simplifycfg,instcombine' -unroll-force-peel-count=3 -verify-dom-info | FileCheck %s
+; RUN: opt < %s -S -passes='require<opt-remark-emit>,loop-unroll<peeling;no-runtime>,simplifycfg<bonus-inst-threshold=3>,instcombine' -unroll-force-peel-count=3 -verify-dom-info | FileCheck %s
define void @basic(i32 %K, i32 %N) {
; CHECK-LABEL: @basic(
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -O2 -S < %s | FileCheck %s
+; RUN: opt -bonus-inst-threshold=4 -O2 -S < %s | FileCheck %s
target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64--"
%struct.S = type { [4 x i32] }
-; Check the second, third, and fourth basic blocks are folded into
-; the first basic block since each has one bonus intruction, which
-; does not exceed the default bouns instruction threshold of 1.
+; Check the second basic block is folded into the first basic block
+; since it has one bonus intruction. The third basic block is not
+; folded into the first basic block since the accumulated bonus
+; instructions will exceed the default threshold of 1. The fourth basic
+; block is foled into the third basic block since the accumulated
+; bonus instruction cost is 1.
define i1 @test1(i32 %0, i32 %1, i32 %2, i32 %3) {
; CHECK-LABEL: @test1(
; CHECK-NEXT: [[MUL1:%.*]] = mul i32 [[TMP1:%.*]], [[TMP1]]
; CHECK-NEXT: [[CMP2_1:%.*]] = icmp sgt i32 [[MUL1]], 0
; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[CMP2]], i1 true, i1 [[CMP2_1]]
+; CHECK-NEXT: br i1 [[OR_COND]], label [[CLEANUP:%.*]], label [[FOR_COND_1:%.*]]
+; CHECK: for.cond.1:
; CHECK-NEXT: [[MUL2:%.*]] = mul i32 [[TMP2:%.*]], [[TMP2]]
; CHECK-NEXT: [[CMP2_2:%.*]] = icmp sgt i32 [[MUL2]], 0
-; CHECK-NEXT: [[OR_COND1:%.*]] = select i1 [[OR_COND]], i1 true, i1 [[CMP2_2]]
; CHECK-NEXT: [[MUL3:%.*]] = mul i32 [[TMP3:%.*]], [[TMP3]]
; CHECK-NEXT: [[CMP2_3:%.*]] = icmp sgt i32 [[MUL3]], 0
-; CHECK-NEXT: [[OR_COND2:%.*]] = select i1 [[OR_COND1]], i1 true, i1 [[CMP2_3]]
-; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[OR_COND2]], i1 false, i1 true
-; CHECK-NEXT: ret i1 [[SPEC_SELECT]]
+; CHECK-NEXT: [[OR_COND1:%.*]] = select i1 [[CMP2_2]], i1 true, i1 [[CMP2_3]]
+; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[OR_COND1]], i1 false, i1 true
+; CHECK-NEXT: br label [[CLEANUP]]
+; CHECK: cleanup:
+; CHECK-NEXT: [[CMP:%.*]] = phi i1 [ false, [[ENTRY:%.*]] ], [ [[SPEC_SELECT]], [[FOR_COND_1]] ]
+; CHECK-NEXT: ret i1 [[CMP]]
;
entry:
%mul0 = mul i32 %0, %0
; RUN: opt %s -simplifycfg -simplifycfg-require-and-preserve-domtree=1 -S | FileCheck %s --check-prefix=NORMAL
-; RUN: opt %s -simplifycfg -simplifycfg-require-and-preserve-domtree=1 -S -bonus-inst-threshold=2 | FileCheck %s --check-prefix=AGGRESSIVE
-; RUN: opt %s -simplifycfg -simplifycfg-require-and-preserve-domtree=1 -S -bonus-inst-threshold=4 | FileCheck %s --check-prefix=WAYAGGRESSIVE
+; RUN: opt %s -simplifycfg -simplifycfg-require-and-preserve-domtree=1 -S -bonus-inst-threshold=3 | FileCheck %s --check-prefix=AGGRESSIVE
+; RUN: opt %s -simplifycfg -simplifycfg-require-and-preserve-domtree=1 -S -bonus-inst-threshold=6 | FileCheck %s --check-prefix=WAYAGGRESSIVE
; RUN: opt %s -passes=simplifycfg -S | FileCheck %s --check-prefix=NORMAL
-; RUN: opt %s -passes='simplifycfg<bonus-inst-threshold=2>' -S | FileCheck %s --check-prefix=AGGRESSIVE
-; RUN: opt %s -passes='simplifycfg<bonus-inst-threshold=4>' -S | FileCheck %s --check-prefix=WAYAGGRESSIVE
+; RUN: opt %s -passes='simplifycfg<bonus-inst-threshold=3>' -S | FileCheck %s --check-prefix=AGGRESSIVE
+; RUN: opt %s -passes='simplifycfg<bonus-inst-threshold=6>' -S | FileCheck %s --check-prefix=WAYAGGRESSIVE
define i32 @foo(i32 %a, i32 %b, i32 %c, i32 %d, i32* %input) {
; NORMAL-LABEL: @foo(
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -S -simplifycfg -simplifycfg-require-and-preserve-domtree=1 -bonus-inst-threshold=1 | FileCheck --check-prefixes=ALL,THR1 %s
-; RUN: opt < %s -S -simplifycfg -simplifycfg-require-and-preserve-domtree=1 -bonus-inst-threshold=2 | FileCheck --check-prefixes=ALL,THR2 %s
+; RUN: opt < %s -S -simplifycfg -simplifycfg-require-and-preserve-domtree=1 -bonus-inst-threshold=3 | FileCheck --check-prefixes=ALL,THR2 %s
declare void @sideeffect0()
declare void @sideeffect1()
; Here we'd want to duplicate %v3_adj into two predecessors,
; but -bonus-inst-threshold=1 says that we can only clone it into one.
-; With -bonus-inst-threshold=2 we can clone it into both though.
+; With -bonus-inst-threshold=3 we can clone it into both though.
define void @two_preds_with_extra_op(i8 %v0, i8 %v1, i8 %v2, i8 %v3) {
; THR1-LABEL: @two_preds_with_extra_op(
; THR1-NEXT: entry: