From 6262ca3448e2698e6c65d911f13fc22e4cc40be8 Mon Sep 17 00:00:00 2001 From: Kyle Butt Date: Wed, 24 Aug 2016 21:34:24 +0000 Subject: [PATCH] IfConversion: Rescan diamonds. The cost of predicating a diamond is only the instructions that are not shared between the two branches. Additionally If a predicate clobbering instruction occurs in the shared portion of the branches (e.g. a cond move), it may still be possible to if convert the sub-cfg. This change handles these two facts by rescanning the non-shared portion of a diamond sub-cfg to recalculate both the predication cost and whether both blocks are pred-clobbering. Fixed 2 bugs before recommitting. Branch instructions must be compared and found identical before diamond conversion. Also, predicate-clobbering instructions in the shared prefix disqualifies a potential diamond conversion. Includes tests for both. llvm-svn: 279670 --- llvm/lib/CodeGen/IfConversion.cpp | 214 ++++++++++++++++----- llvm/test/CodeGen/ARM/indirectbr-3.ll | 12 +- .../CodeGen/Thumb2/ifcvt-rescan-bug-2016-08-22.ll | 36 ++++ 3 files changed, 208 insertions(+), 54 deletions(-) create mode 100644 llvm/test/CodeGen/Thumb2/ifcvt-rescan-bug-2016-08-22.ll diff --git a/llvm/lib/CodeGen/IfConversion.cpp b/llvm/lib/CodeGen/IfConversion.cpp index 0f7bc65..6a7fd67 100644 --- a/llvm/lib/CodeGen/IfConversion.cpp +++ b/llvm/lib/CodeGen/IfConversion.cpp @@ -149,11 +149,15 @@ namespace { struct IfcvtToken { BBInfo &BBI; IfcvtKind Kind; - bool NeedSubsumption; unsigned NumDups; unsigned NumDups2; - IfcvtToken(BBInfo &b, IfcvtKind k, bool s, unsigned d, unsigned d2 = 0) - : BBI(b), Kind(k), NeedSubsumption(s), NumDups(d), NumDups2(d2) {} + bool NeedSubsumption : 1; + bool TClobbersPred : 1; + bool FClobbersPred : 1; + IfcvtToken(BBInfo &b, IfcvtKind k, bool s, unsigned d, unsigned d2 = 0, + bool tc = false, bool fc = false) + : BBI(b), Kind(k), NumDups(d), NumDups2(d2), NeedSubsumption(s), + TClobbersPred(tc), FClobbersPred(fc) {} }; /// Results of if-conversion feasibility analysis indexed by basic block @@ -202,16 +206,29 @@ namespace { bool ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI, bool FalseBranch, unsigned &Dups, BranchProbability Prediction) const; + bool CountDuplicatedInstructions( + MachineBasicBlock::iterator &TIB, MachineBasicBlock::iterator &FIB, + MachineBasicBlock::iterator &TIE, MachineBasicBlock::iterator &FIE, + unsigned &Dups1, unsigned &Dups2, + MachineBasicBlock &TBB, MachineBasicBlock &FBB, + bool SkipUnconditionalBranches) const; bool ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI, - unsigned &Dups1, unsigned &Dups2) const; + unsigned &Dups1, unsigned &Dups2, + BBInfo &TrueBBICalc, BBInfo &FalseBBICalc) const; void AnalyzeBranches(BBInfo &BBI); void ScanInstructions(BBInfo &BBI, MachineBasicBlock::iterator &Begin, - MachineBasicBlock::iterator &End) const; + MachineBasicBlock::iterator &End, + bool BranchUnpredicable = false) const; + bool RescanInstructions( + MachineBasicBlock::iterator &TIB, MachineBasicBlock::iterator &FIB, + MachineBasicBlock::iterator &TIE, MachineBasicBlock::iterator &FIE, + BBInfo &TrueBBI, BBInfo &FalseBBI) const; void AnalyzeBlock(MachineBasicBlock &MBB, std::vector> &Tokens); bool FeasibilityAnalysis(BBInfo &BBI, SmallVectorImpl &Cond, - bool isTriangle = false, bool RevBranch = false); + bool isTriangle = false, bool RevBranch = false, + bool hasCommonTail = false); void AnalyzeBlocks(MachineFunction &MF, std::vector> &Tokens); void InvalidatePreds(MachineBasicBlock &MBB); @@ -219,7 +236,8 @@ namespace { bool IfConvertSimple(BBInfo &BBI, IfcvtKind Kind); bool IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind); bool IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, - unsigned NumDups1, unsigned NumDups2); + unsigned NumDups1, unsigned NumDups2, + bool TClobbers, bool FClobbers); void PredicateBlock(BBInfo &BBI, MachineBasicBlock::iterator E, SmallVectorImpl &Cond, @@ -406,7 +424,9 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << "Ifcvt (Diamond): BB#" << BBI.BB->getNumber() << " (T:" << BBI.TrueBB->getNumber() << ",F:" << BBI.FalseBB->getNumber() << ") "); - RetVal = IfConvertDiamond(BBI, Kind, NumDups, NumDups2); + RetVal = IfConvertDiamond(BBI, Kind, NumDups, NumDups2, + Token->TClobbersPred, + Token->FClobbersPred); DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n"); if (RetVal) ++NumDiamonds; break; @@ -591,14 +611,22 @@ static inline bool skipDebugInstructionsBackward( /// finally point to the first shared instruction in the tail. /// Upon return [TIB, TIE), and [FIB, FIE) mark the un-duplicated portions of /// two blocks. -static void countDuplicatedInstructions( +/// @param Dups1 count of duplicated instructions at the beginning of the 2 +/// blocks. +/// @param Dups2 count of duplicated instructions at the end of the 2 blocks. +/// @param SkipUnconditionalBranches if true, Don't make sure that +/// unconditional branches at the end of the blocks are the same. True is +/// passed when the blocks are analyzable to allow for fallthrough to be +/// handled. +/// @return false if the shared portion prevents if conversion. +bool IfConverter::CountDuplicatedInstructions( MachineBasicBlock::iterator &TIB, MachineBasicBlock::iterator &FIB, MachineBasicBlock::iterator &TIE, MachineBasicBlock::iterator &FIE, unsigned &Dups1, unsigned &Dups2, MachineBasicBlock &TBB, MachineBasicBlock &FBB, - bool SkipConditionalBranches) { + bool SkipUnconditionalBranches) const { while (TIB != TIE && FIB != FIE) { // Skip dbg_value instructions. These do not count. @@ -608,6 +636,11 @@ static void countDuplicatedInstructions( break; if (!TIB->isIdenticalTo(*FIB)) break; + // A pred-clobbering instruction in the shared portion prevents + // if-conversion. + std::vector PredDefs; + if (TII->DefinesPredicate(*TIB, PredDefs)) + return false; ++Dups1; ++TIB; ++FIB; @@ -618,7 +651,7 @@ static void countDuplicatedInstructions( // can be left unpredicated. // Check for already containing all of the block. if (TIB == TIE || FIB == FIE) - return; + return true; // Now, in preparation for counting duplicate instructions at the ends of the // blocks, move the end iterators up past any branch instructions. --TIE; @@ -641,12 +674,7 @@ static void countDuplicatedInstructions( }); if (!TBB.succ_empty() || !FBB.succ_empty()) { - if (SkipConditionalBranches) { - while (!TEmpty && TIE->isBranch()) - shrinkInclusiveRange(TIB, TIE, TEmpty); - while (!FEmpty && FIE->isBranch()) - shrinkInclusiveRange(FIB, FIE, FEmpty); - } else { + if (SkipUnconditionalBranches) { while (!TEmpty && TIE->isUnconditionalBranch()) shrinkInclusiveRange(TIB, TIE, TEmpty); while (!FEmpty && FIE->isUnconditionalBranch()) @@ -657,7 +685,7 @@ static void countDuplicatedInstructions( // If Dups1 includes all of a block, then don't count duplicate // instructions at the end of the blocks. if (TEmpty || FEmpty) - return; + return true; // Count duplicate instructions at the ends of the blocks. while (!TEmpty && !FEmpty) { @@ -668,19 +696,48 @@ static void countDuplicatedInstructions( break; if (!TIE->isIdenticalTo(*FIE)) break; - // If we are trying to make sure the conditional branches are the same, we - // still don't want to count them. - if (SkipConditionalBranches || !TIE->isBranch()) + // We have to verify that any branch instructions are the same, and then we + // don't count them toward the # of duplicate instructions. + if (!TIE->isBranch()) ++Dups2; shrinkInclusiveRange(TIB, TIE, TEmpty); shrinkInclusiveRange(FIB, FIE, FEmpty); } + return true; +} + +/// RescanInstructions - Run ScanInstructions on a pair of blocks. +/// @param TIB - True Iterator Begin, points to first non-shared instruction +/// @param FIB - False Iterator Begin, points to first non-shared instruction +/// @param TIE - True Iterator End, points past last non-shared instruction +/// @param FIE - False Iterator End, points past last non-shared instruction +/// @param TrueBBI - BBInfo to update for the true block. +/// @param FalseBBI - BBInfo to update for the false block. +/// @returns - false if either block cannot be predicated or if both blocks end +/// with a predicate-clobbering instruction. +bool IfConverter::RescanInstructions( + MachineBasicBlock::iterator &TIB, MachineBasicBlock::iterator &FIB, + MachineBasicBlock::iterator &TIE, MachineBasicBlock::iterator &FIE, + BBInfo &TrueBBI, BBInfo &FalseBBI) const { + bool BranchUnpredicable = true; + TrueBBI.IsUnpredicable = FalseBBI.IsUnpredicable = false; + ScanInstructions(TrueBBI, TIB, TIE, BranchUnpredicable); + if (TrueBBI.IsUnpredicable) + return false; + ScanInstructions(FalseBBI, FIB, FIE, BranchUnpredicable); + if (FalseBBI.IsUnpredicable) + return false; + if (TrueBBI.ClobbersPred && FalseBBI.ClobbersPred) + return false; + return true; } /// ValidDiamond - Returns true if the 'true' and 'false' blocks (along /// with their common predecessor) forms a valid diamond shape for ifcvt. -bool IfConverter::ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI, - unsigned &Dups1, unsigned &Dups2) const { +bool IfConverter::ValidDiamond( + BBInfo &TrueBBI, BBInfo &FalseBBI, + unsigned &Dups1, unsigned &Dups2, + BBInfo &TrueBBICalc, BBInfo &FalseBBICalc) const { Dups1 = Dups2 = 0; if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone || FalseBBI.IsBeingAnalyzed || FalseBBI.IsDone) @@ -701,19 +758,33 @@ bool IfConverter::ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI, return false; // FIXME: Allow true block to have an early exit? - if (TrueBBI.FalseBB || FalseBBI.FalseBB || - (TrueBBI.ClobbersPred && FalseBBI.ClobbersPred)) + if (TrueBBI.FalseBB || FalseBBI.FalseBB) return false; // Count duplicate instructions at the beginning and end of the true and // false blocks. + // Skip unconditional branches only if we are considering an analyzable + // diamond. Otherwise the branches must be the same. + bool SkipUnconditionalBranches = + TrueBBI.IsBrAnalyzable && FalseBBI.IsBrAnalyzable; MachineBasicBlock::iterator TIB = TrueBBI.BB->begin(); MachineBasicBlock::iterator FIB = FalseBBI.BB->begin(); MachineBasicBlock::iterator TIE = TrueBBI.BB->end(); MachineBasicBlock::iterator FIE = FalseBBI.BB->end(); - countDuplicatedInstructions(TIB, FIB, TIE, FIE, Dups1, Dups2, - *TrueBBI.BB, *FalseBBI.BB, - /* SkipConditionalBranches */ true); + if(!CountDuplicatedInstructions(TIB, FIB, TIE, FIE, Dups1, Dups2, + *TrueBBI.BB, *FalseBBI.BB, + SkipUnconditionalBranches)) + return false; + + TrueBBICalc.BB = TrueBBI.BB; + FalseBBICalc.BB = FalseBBI.BB; + if (!RescanInstructions(TIB, FIB, TIE, FIE, TrueBBICalc, FalseBBICalc)) + return false; + // The size is used to decide whether to if-convert, and the shared portions + // are subtracted off. Because of the subtraction, we just use the size that + // was calculated by the original ScanInstructions, as it is correct. + TrueBBICalc.NonPredSize = TrueBBI.NonPredSize; + FalseBBICalc.NonPredSize = FalseBBI.NonPredSize; return true; } @@ -748,7 +819,8 @@ void IfConverter::AnalyzeBranches(BBInfo &BBI) { /// If so, the block is not predicable unless it's the last instruction. void IfConverter::ScanInstructions(BBInfo &BBI, MachineBasicBlock::iterator &Begin, - MachineBasicBlock::iterator &End) const { + MachineBasicBlock::iterator &End, + bool BranchUnpredicable) const { if (BBI.IsDone || BBI.IsUnpredicable) return; @@ -798,6 +870,11 @@ void IfConverter::ScanInstructions(BBInfo &BBI, bool isPredicated = TII->isPredicated(MI); bool isCondBr = BBI.IsBrAnalyzable && MI.isConditionalBranch(); + if (BranchUnpredicable && MI.isBranch()) { + BBI.IsUnpredicable = true; + return; + } + // A conditional branch is not predicable, but it may be eliminated. if (isCondBr) continue; @@ -841,11 +918,22 @@ void IfConverter::ScanInstructions(BBInfo &BBI, /// Determine if the block is a suitable candidate to be predicated by the /// specified predicate. +/// @param BBI BBInfo for the block to check +/// @param Pred Predicate array for the branch that leads to BBI +/// @param isTriangle true if the Analysis is for a triangle +/// @param RevBranch true if Reverse(Pred) leads to BBI (e.g. BBI is the false +/// case +/// @param hasCommonTail true if BBI shares a tail with a sibling block that +/// contains any instruction that would make the block unpredicable. bool IfConverter::FeasibilityAnalysis(BBInfo &BBI, SmallVectorImpl &Pred, - bool isTriangle, bool RevBranch) { + bool isTriangle, bool RevBranch, + bool hasCommonTail) { // If the block is dead or unpredicable, then it cannot be predicated. - if (BBI.IsDone || BBI.IsUnpredicable) + // Two blocks may share a common unpredicable tail, but this doesn't prevent + // them from being if-converted. The non-shared portion is assumed to have + // been checked + if (BBI.IsDone || (BBI.IsUnpredicable && !hasCommonTail)) return false; // If it is already predicated but we couldn't analyze its terminator, the @@ -859,7 +947,7 @@ bool IfConverter::FeasibilityAnalysis(BBInfo &BBI, if (BBI.Predicate.size() && !TII->SubsumesPredicate(Pred, BBI.Predicate)) return false; - if (BBI.BrCond.size()) { + if (!hasCommonTail && BBI.BrCond.size()) { if (!isTriangle) return false; @@ -966,25 +1054,37 @@ void IfConverter::AnalyzeBlock( BranchProbability Prediction = MBPI->getEdgeProbability(BB, TrueBBI.BB); - if (CanRevCond && ValidDiamond(TrueBBI, FalseBBI, Dups, Dups2) && + if (CanRevCond) { + BBInfo TrueBBICalc, FalseBBICalc; + if (ValidDiamond(TrueBBI, FalseBBI, Dups, Dups2, + TrueBBICalc, FalseBBICalc) && MeetIfcvtSizeLimit(*TrueBBI.BB, (TrueBBI.NonPredSize - (Dups + Dups2) + - TrueBBI.ExtraCost), TrueBBI.ExtraCost2, + TrueBBICalc.ExtraCost), + TrueBBICalc.ExtraCost2, *FalseBBI.BB, (FalseBBI.NonPredSize - (Dups + Dups2) + - FalseBBI.ExtraCost),FalseBBI.ExtraCost2, - Prediction) && - FeasibilityAnalysis(TrueBBI, BBI.BrCond) && - FeasibilityAnalysis(FalseBBI, RevCond)) { - // Diamond: - // EBB - // / \_ - // | | - // TBB FBB - // \ / - // TailBB - // Note TailBB can be empty. - Tokens.push_back(llvm::make_unique( - BBI, ICDiamond, TNeedSub | FNeedSub, Dups, Dups2)); - Enqueued = true; + FalseBBICalc.ExtraCost), + FalseBBICalc.ExtraCost2, + Prediction) && + FeasibilityAnalysis(TrueBBI, BBI.BrCond, + /* IsTriangle */ false, /* RevCond */ false, + /* hasCommonTail */ true) && + FeasibilityAnalysis(FalseBBI, RevCond, + /* IsTriangle */ false, /* RevCond */ false, + /* hasCommonTail */ true)) { + // Diamond: + // EBB + // / \_ + // | | + // TBB FBB + // \ / + // TailBB + // Note TailBB can be empty. + Tokens.push_back(llvm::make_unique( + BBI, ICDiamond, TNeedSub | FNeedSub, Dups, Dups2, + (bool) TrueBBICalc.ClobbersPred, + (bool) FalseBBICalc.ClobbersPred)); + Enqueued = true; + } } if (ValidTriangle(TrueBBI, FalseBBI, false, Dups, Prediction) && @@ -1429,8 +1529,18 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) { } /// If convert a diamond sub-CFG. +/// \p BBI is the head of the diamond +/// \p NumDups1 - number of shared instructions at the beginning of TrueBBI and +/// FalseBBI +/// \p NumDups2 - number of shared instructions at the end of TrueBBI and +/// FalseBBI +/// \p TClobbersPred - True if the true block clobbers the predicate in the +/// non-shared portion. +/// \p TClobbersPred - True if the false block clobbers the predicate in the +/// non-shared portion. bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, - unsigned NumDups1, unsigned NumDups2) { + unsigned NumDups1, unsigned NumDups2, + bool TClobbersPred, bool FClobbersPred) { BBInfo &TrueBBI = BBAnalysis[BBI.TrueBB->getNumber()]; BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()]; MachineBasicBlock *TailBB = TrueBBI.TrueBB; @@ -1468,9 +1578,9 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, // Figure out the more profitable ordering. bool DoSwap = false; - if (TrueBBI.ClobbersPred && !FalseBBI.ClobbersPred) + if (TClobbersPred && !FClobbersPred) DoSwap = true; - else if (TrueBBI.ClobbersPred == FalseBBI.ClobbersPred) { + else if (TClobbersPred == FClobbersPred) { if (TrueBBI.NonPredSize > FalseBBI.NonPredSize) DoSwap = true; } diff --git a/llvm/test/CodeGen/ARM/indirectbr-3.ll b/llvm/test/CodeGen/ARM/indirectbr-3.ll index 291fedb..da383989 100644 --- a/llvm/test/CodeGen/ARM/indirectbr-3.ll +++ b/llvm/test/CodeGen/ARM/indirectbr-3.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=thumbv7-apple-ios -arm-atomic-cfg-tidy=0 | FileCheck %s +; RUN: llc < %s -mtriple=thumbv7-apple-ios -arm-atomic-cfg-tidy=0 -stats 2>&1 | FileCheck %s ; If ARMBaseInstrInfo::AnalyzeBlocks returns the wrong value, which was possible ; for blocks with indirect branches, the IfConverter could end up deleting @@ -9,9 +9,17 @@ define i32 @preserve_blocks(i32 %x) { ; preserve_blocks: ; CHECK: Block address taken -; CHECK: movs r0, #2 ; CHECK: movs r0, #1 +; CHECK: Block address taken +; CHECK: movs r0, #2 ; CHECK-NOT: Address of block that was removed by CodeGen + +; Separate bug. There are no valid diamonds to if-convert in this file. +; There was a bug in the if-conversion code that would if-convert a false +; diamond where one side had a return and the other had an indirect branch. +; Make sure no diamond conversions occurred while compiling this file. +; CHECK: Statistics Collected +; CHECK-NOT: 1 ifcvt - Number of diamond if-conversions performed entry: %c2 = icmp slt i32 %x, 3 %blockaddr = select i1 %c2, i8* blockaddress(@preserve_blocks, %ibt1), i8* blockaddress(@preserve_blocks, %ibt2) diff --git a/llvm/test/CodeGen/Thumb2/ifcvt-rescan-bug-2016-08-22.ll b/llvm/test/CodeGen/Thumb2/ifcvt-rescan-bug-2016-08-22.ll new file mode 100644 index 0000000..ae3084d --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/ifcvt-rescan-bug-2016-08-22.ll @@ -0,0 +1,36 @@ +; RUN: llc -O2 -o - %s | FileCheck %s +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "thumbv7-unknown-linux-gnueabihf" + +; Function Attrs: argmemonly nounwind +declare void @llvm.lifetime.start(i64, i8* nocapture) #0 + +; Function Attrs: nounwind +declare void @_ZNSaIcEC2Ev() unnamed_addr #0 align 2 + +declare void @_ZNSsC1EPKcRKSaIcE() unnamed_addr #0 + +; It isn't valid to If-Convert the following function, even though the calls +; are in common. The calls clobber the predicate info. +; CHECK: cbnz r{{[0-9]+}}, .LBB0_2 +; CHECK: BB#1 +; CHECK: .LBB0_2 +; Function Attrs: nounwind +define hidden void @_ZN4llvm14DOTGraphTraitsIPNS_13ScheduleDAGMIEE17getEdgeAttributesEPKNS_5SUnitENS_13SUnitIteratorEPKNS_11ScheduleDAGE() #0 align 2 { + br i1 undef, label %1, label %2 + +;