From a8c7371d16032ea158e67d14c6adde1759871d1d Mon Sep 17 00:00:00 2001 From: Kyle Butt Date: Wed, 24 Aug 2016 21:34:27 +0000 Subject: [PATCH] CodeGen: If Convert blocks that would form a diamond when tail-merged. The following function currently relies on tail-merging for if conversion to succeed. The common tail of cond_true and cond_false is extracted, and this then forms a diamond pattern that can be successfully if converted. If this block does not get extracted, either because tail-merging is disabled or the threshold is higher, we should still recognize this pattern and if-convert it. Fixed a regression in the original commit. Need to un-reverse branches after reversing them, or other conversions go awry. define i32 @t2(i32 %a, i32 %b) nounwind { entry: %tmp1434 = icmp eq i32 %a, %b ; [#uses=1] br i1 %tmp1434, label %bb17, label %bb.outer bb.outer: ; preds = %cond_false, %entry %b_addr.021.0.ph = phi i32 [ %b, %entry ], [ %tmp10, %cond_false ] %a_addr.026.0.ph = phi i32 [ %a, %entry ], [ %a_addr.026.0, %cond_false ] br label %bb bb: ; preds = %cond_true, %bb.outer %indvar = phi i32 [ 0, %bb.outer ], [ %indvar.next, %cond_true ] %tmp. = sub i32 0, %b_addr.021.0.ph %tmp.40 = mul i32 %indvar, %tmp. %a_addr.026.0 = add i32 %tmp.40, %a_addr.026.0.ph %tmp3 = icmp sgt i32 %a_addr.026.0, %b_addr.021.0.ph br i1 %tmp3, label %cond_true, label %cond_false cond_true: ; preds = %bb %tmp7 = sub i32 %a_addr.026.0, %b_addr.021.0.ph %tmp1437 = icmp eq i32 %tmp7, %b_addr.021.0.ph %indvar.next = add i32 %indvar, 1 br i1 %tmp1437, label %bb17, label %bb cond_false: ; preds = %bb %tmp10 = sub i32 %b_addr.021.0.ph, %a_addr.026.0 %tmp14 = icmp eq i32 %a_addr.026.0, %tmp10 br i1 %tmp14, label %bb17, label %bb.outer bb17: ; preds = %cond_false, %cond_true, %entry %a_addr.026.1 = phi i32 [ %a, %entry ], [ %tmp7, %cond_true ], [ %a_addr.026.0, %cond_false ] ret i32 %a_addr.026.1 } Without tail-merging or diamond-tail if conversion: LBB1_1: @ %bb @ =>This Inner Loop Header: Depth=1 cmp r0, r1 ble LBB1_3 @ BB#2: @ %cond_true @ in Loop: Header=BB1_1 Depth=1 subs r0, r0, r1 cmp r1, r0 it ne cmpne r0, r1 bgt LBB1_4 LBB1_3: @ %cond_false @ in Loop: Header=BB1_1 Depth=1 subs r1, r1, r0 cmp r1, r0 bne LBB1_1 LBB1_4: @ %bb17 bx lr With diamond-tail if conversion, but without tail-merging: @ BB#0: @ %entry cmp r0, r1 it eq bxeq lr LBB1_1: @ %bb @ =>This Inner Loop Header: Depth=1 cmp r0, r1 ite le suble r1, r1, r0 subgt r0, r0, r1 cmp r1, r0 bne LBB1_1 @ BB#2: @ %bb17 bx lr llvm-svn: 279671 --- llvm/lib/CodeGen/IfConversion.cpp | 351 ++++++++++++++++----- .../CodeGen/PowerPC/ifcvt-forked-bug-2016-08-08.ll | 36 +++ llvm/test/CodeGen/Thumb2/thumb2-ifcvt1.ll | 47 ++- 3 files changed, 356 insertions(+), 78 deletions(-) create mode 100644 llvm/test/CodeGen/PowerPC/ifcvt-forked-bug-2016-08-08.ll diff --git a/llvm/lib/CodeGen/IfConversion.cpp b/llvm/lib/CodeGen/IfConversion.cpp index 6a7fd67..9cddcfa 100644 --- a/llvm/lib/CodeGen/IfConversion.cpp +++ b/llvm/lib/CodeGen/IfConversion.cpp @@ -59,6 +59,8 @@ static cl::opt DisableTriangleFR("disable-ifcvt-triangle-false-rev", cl::init(false), cl::Hidden); static cl::opt DisableDiamond("disable-ifcvt-diamond", cl::init(false), cl::Hidden); +static cl::opt DisableForkedDiamond("disable-ifcvt-forked-diamond", + cl::init(false), cl::Hidden); static cl::opt IfCvtBranchFold("ifcvt-branch-fold", cl::init(true), cl::Hidden); @@ -69,6 +71,7 @@ STATISTIC(NumTriangleRev, "Number of triangle (R) if-conversions performed"); STATISTIC(NumTriangleFalse,"Number of triangle (F) if-conversions performed"); STATISTIC(NumTriangleFRev, "Number of triangle (F/R) if-conversions performed"); STATISTIC(NumDiamonds, "Number of diamond if-conversions performed"); +STATISTIC(NumForkedDiamonds, "Number of forked-diamond if-conversions performed"); STATISTIC(NumIfConvBBs, "Number of if-converted blocks"); STATISTIC(NumDupBBs, "Number of duplicated blocks"); STATISTIC(NumUnpred, "Number of true blocks of diamonds unpredicated"); @@ -83,7 +86,9 @@ namespace { ICTriangleRev, // Same as ICTriangle, but true path rev condition. ICTriangleFalse, // Same as ICTriangle, but on the false path. ICTriangle, // BB is entry of a triangle sub-CFG. - ICDiamond // BB is entry of a diamond sub-CFG. + ICDiamond, // BB is entry of a diamond sub-CFG. + ICForkedDiamond // BB is entry of an almost diamond sub-CFG, with a + // common tail that can be shared. }; /// One per MachineBasicBlock, this is used to cache the result @@ -115,6 +120,7 @@ namespace { bool IsAnalyzed : 1; bool IsEnqueued : 1; bool IsBrAnalyzable : 1; + bool IsBrReversible : 1; bool HasFallThrough : 1; bool IsUnpredicable : 1; bool CannotBeCopied : 1; @@ -129,9 +135,10 @@ namespace { SmallVector Predicate; BBInfo() : IsDone(false), IsBeingAnalyzed(false), IsAnalyzed(false), IsEnqueued(false), IsBrAnalyzable(false), - HasFallThrough(false), IsUnpredicable(false), - CannotBeCopied(false), ClobbersPred(false), NonPredSize(0), - ExtraCost(0), ExtraCost2(0), BB(nullptr), TrueBB(nullptr), + IsBrReversible(false), HasFallThrough(false), + IsUnpredicable(false), CannotBeCopied(false), + ClobbersPred(false), NonPredSize(0), ExtraCost(0), + ExtraCost2(0), BB(nullptr), TrueBB(nullptr), FalseBB(nullptr) {} }; @@ -215,6 +222,9 @@ namespace { bool ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI, unsigned &Dups1, unsigned &Dups2, BBInfo &TrueBBICalc, BBInfo &FalseBBICalc) const; + bool ValidForkedDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI, + unsigned &Dups1, unsigned &Dups2, + BBInfo &TrueBBICalc, BBInfo &FalseBBICalc) const; void AnalyzeBranches(BBInfo &BBI); void ScanInstructions(BBInfo &BBI, MachineBasicBlock::iterator &Begin, @@ -235,9 +245,17 @@ namespace { void RemoveExtraEdges(BBInfo &BBI); bool IfConvertSimple(BBInfo &BBI, IfcvtKind Kind); bool IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind); + bool IfConvertDiamondCommon(BBInfo &BBI, BBInfo &TrueBBI, BBInfo &FalseBBI, + unsigned NumDups1, unsigned NumDups2, + bool TClobbersPred, bool FClobbersPred, + bool RemoveTrueBranch, bool RemoveFalseBranch, + bool MergeAddEdges); bool IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, unsigned NumDups1, unsigned NumDups2, bool TClobbers, bool FClobbers); + bool IfConvertForkedDiamond(BBInfo &BBI, IfcvtKind Kind, + unsigned NumDups1, unsigned NumDups2, + bool TClobbers, bool FClobbers); void PredicateBlock(BBInfo &BBI, MachineBasicBlock::iterator E, SmallVectorImpl &Cond, @@ -431,6 +449,19 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { if (RetVal) ++NumDiamonds; break; } + case ICForkedDiamond: { + if (DisableForkedDiamond) break; + DEBUG(dbgs() << "Ifcvt (Forked Diamond): BB#" + << BBI.BB->getNumber() << " (T:" + << BBI.TrueBB->getNumber() << ",F:" + << BBI.FalseBB->getNumber() << ") "); + RetVal = IfConvertForkedDiamond(BBI, Kind, NumDups, NumDups2, + Token->TClobbersPred, + Token->FClobbersPred); + DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n"); + if (RetVal) ++NumForkedDiamonds; + break; + } } Change |= RetVal; @@ -646,9 +677,6 @@ bool IfConverter::CountDuplicatedInstructions( ++FIB; } - // If both blocks are returning don't skip the branches, since they will - // likely be both identical return instructions. In such cases the return - // can be left unpredicated. // Check for already containing all of the block. if (TIB == TIE || FIB == FIE) return true; @@ -732,6 +760,97 @@ bool IfConverter::RescanInstructions( return true; } +/// ValidForkedDiamond - Returns true if the 'true' and 'false' blocks (along +/// with their common predecessor) form a diamond if a common tail block is +/// extracted. +/// While not strictly a diamond, this pattern would form a diamond if +/// tail-merging had merged the shared tails. +/// EBB +/// _/ \_ +/// | | +/// TBB FBB +/// / \ / \ +/// FalseBB TrueBB FalseBB +/// Currently only handles analyzable branches. +/// Specifically excludes actual diamonds to avoid overlap. +bool IfConverter::ValidForkedDiamond( + BBInfo &TrueBBI, BBInfo &FalseBBI, + unsigned &Dups1, unsigned &Dups2, + BBInfo &TrueBBICalc, BBInfo &FalseBBICalc) const { + Dups1 = Dups2 = 0; + if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone || + FalseBBI.IsBeingAnalyzed || FalseBBI.IsDone) + return false; + + if (!TrueBBI.IsBrAnalyzable || !FalseBBI.IsBrAnalyzable) + return false; + // Don't IfConvert blocks that can't be folded into their predecessor. + if (TrueBBI.BB->pred_size() > 1 || FalseBBI.BB->pred_size() > 1) + return false; + + // This function is specifically looking for conditional tails, as + // unconditional tails are already handled by the standard diamond case. + if (TrueBBI.BrCond.size() == 0 || + FalseBBI.BrCond.size() == 0) + return false; + + MachineBasicBlock *TT = TrueBBI.TrueBB; + MachineBasicBlock *TF = TrueBBI.FalseBB; + MachineBasicBlock *FT = FalseBBI.TrueBB; + MachineBasicBlock *FF = FalseBBI.FalseBB; + + if (!TT) + TT = getNextBlock(*TrueBBI.BB); + if (!TF) + TF = getNextBlock(*TrueBBI.BB); + if (!FT) + FT = getNextBlock(*FalseBBI.BB); + if (!FF) + FF = getNextBlock(*FalseBBI.BB); + + if (!TT || !TF) + return false; + + // Check successors. If they don't match, bail. + if (!((TT == FT && TF == FF) || (TF == FT && TT == FF))) + return false; + + bool FalseReversed = false; + if (TF == FT && TT == FF) { + // If the branches are opposing, but we can't reverse, don't do it. + if (!FalseBBI.IsBrReversible) + return false; + FalseReversed = true; + ReverseBranchCondition(FalseBBI); + } + auto UnReverseOnExit = make_scope_exit([&]() { + if (FalseReversed) + ReverseBranchCondition(FalseBBI); + }); + + // Count duplicate instructions at the beginning of the true and false blocks. + MachineBasicBlock::iterator TIB = TrueBBI.BB->begin(); + MachineBasicBlock::iterator FIB = FalseBBI.BB->begin(); + MachineBasicBlock::iterator TIE = TrueBBI.BB->end(); + MachineBasicBlock::iterator FIE = FalseBBI.BB->end(); + if(!CountDuplicatedInstructions(TIB, FIB, TIE, FIE, Dups1, Dups2, + *TrueBBI.BB, *FalseBBI.BB, + /* SkipUnconditionalBranches */ true)) + return false; + + TrueBBICalc.BB = TrueBBI.BB; + FalseBBICalc.BB = FalseBBI.BB; + if (!RescanInstructions(TIB, FIB, TIE, FIE, TrueBBICalc, FalseBBICalc)) + return false; + + // The size is used to decide whether to if-convert, and the shared portions + // are subtracted off. Because of the subtraction, we just use the size that + // was calculated by the original ScanInstructions, as it is correct. + TrueBBICalc.NonPredSize = TrueBBI.NonPredSize; + FalseBBICalc.NonPredSize = FalseBBI.NonPredSize; + return true; +} + /// ValidDiamond - Returns true if the 'true' and 'false' blocks (along /// with their common predecessor) forms a valid diamond shape for ifcvt. bool IfConverter::ValidDiamond( @@ -798,6 +917,9 @@ void IfConverter::AnalyzeBranches(BBInfo &BBI) { BBI.BrCond.clear(); BBI.IsBrAnalyzable = !TII->analyzeBranch(*BBI.BB, BBI.TrueBB, BBI.FalseBB, BBI.BrCond); + SmallVector RevCond(BBI.BrCond.begin(), BBI.BrCond.end()); + BBI.IsBrReversible = (RevCond.size() == 0) || + !TII->ReverseBranchCondition(RevCond); BBI.HasFallThrough = BBI.IsBrAnalyzable && BBI.FalseBB == nullptr; if (BBI.BrCond.size()) { @@ -1056,34 +1178,56 @@ void IfConverter::AnalyzeBlock( if (CanRevCond) { BBInfo TrueBBICalc, FalseBBICalc; + auto feasibleDiamond = [&]() { + bool MeetsSize = MeetIfcvtSizeLimit( + *TrueBBI.BB, (TrueBBICalc.NonPredSize - (Dups + Dups2) + + TrueBBICalc.ExtraCost), TrueBBICalc.ExtraCost2, + *FalseBBI.BB, (FalseBBICalc.NonPredSize - (Dups + Dups2) + + FalseBBICalc.ExtraCost), FalseBBICalc.ExtraCost2, + Prediction); + bool TrueFeasible = FeasibilityAnalysis(TrueBBI, BBI.BrCond, + /* IsTriangle */ false, /* RevCond */ false, + /* hasCommonTail */ true); + bool FalseFeasible = FeasibilityAnalysis(FalseBBI, RevCond, + /* IsTriangle */ false, /* RevCond */ false, + /* hasCommonTail */ true); + return MeetsSize && TrueFeasible && FalseFeasible; + }; + if (ValidDiamond(TrueBBI, FalseBBI, Dups, Dups2, - TrueBBICalc, FalseBBICalc) && - MeetIfcvtSizeLimit(*TrueBBI.BB, (TrueBBI.NonPredSize - (Dups + Dups2) + - TrueBBICalc.ExtraCost), - TrueBBICalc.ExtraCost2, - *FalseBBI.BB, (FalseBBI.NonPredSize - (Dups + Dups2) + - FalseBBICalc.ExtraCost), - FalseBBICalc.ExtraCost2, - Prediction) && - FeasibilityAnalysis(TrueBBI, BBI.BrCond, - /* IsTriangle */ false, /* RevCond */ false, - /* hasCommonTail */ true) && - FeasibilityAnalysis(FalseBBI, RevCond, - /* IsTriangle */ false, /* RevCond */ false, - /* hasCommonTail */ true)) { - // Diamond: - // EBB - // / \_ - // | | - // TBB FBB - // \ / - // TailBB - // Note TailBB can be empty. - Tokens.push_back(llvm::make_unique( - BBI, ICDiamond, TNeedSub | FNeedSub, Dups, Dups2, - (bool) TrueBBICalc.ClobbersPred, - (bool) FalseBBICalc.ClobbersPred)); - Enqueued = true; + TrueBBICalc, FalseBBICalc)) { + if (feasibleDiamond()) { + // Diamond: + // EBB + // / \_ + // | | + // TBB FBB + // \ / + // TailBB + // Note TailBB can be empty. + Tokens.push_back(llvm::make_unique( + BBI, ICDiamond, TNeedSub | FNeedSub, Dups, Dups2, + (bool) TrueBBICalc.ClobbersPred, (bool) FalseBBICalc.ClobbersPred)); + Enqueued = true; + } + } else if (ValidForkedDiamond(TrueBBI, FalseBBI, Dups, Dups2, + TrueBBICalc, FalseBBICalc)) { + if (feasibleDiamond()) { + // ForkedDiamond: + // if TBB and FBB have a common tail that includes their conditional + // branch instructions, then we can If Convert this pattern. + // EBB + // _/ \_ + // | | + // TBB FBB + // / \ / \ + // FalseBB TrueBB FalseBB + // + Tokens.push_back(llvm::make_unique( + BBI, ICForkedDiamond, TNeedSub | FNeedSub, Dups, Dups2, + (bool) TrueBBICalc.ClobbersPred, (bool) FalseBBICalc.ClobbersPred)); + Enqueued = true; + } } } @@ -1528,32 +1672,28 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) { return true; } -/// If convert a diamond sub-CFG. -/// \p BBI is the head of the diamond -/// \p NumDups1 - number of shared instructions at the beginning of TrueBBI and -/// FalseBBI -/// \p NumDups2 - number of shared instructions at the end of TrueBBI and -/// FalseBBI -/// \p TClobbersPred - True if the true block clobbers the predicate in the -/// non-shared portion. -/// \p TClobbersPred - True if the false block clobbers the predicate in the -/// non-shared portion. -bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, - unsigned NumDups1, unsigned NumDups2, - bool TClobbersPred, bool FClobbersPred) { - BBInfo &TrueBBI = BBAnalysis[BBI.TrueBB->getNumber()]; - BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()]; - MachineBasicBlock *TailBB = TrueBBI.TrueBB; - // True block must fall through or end with an unanalyzable terminator. - if (!TailBB) { - if (blockAlwaysFallThrough(TrueBBI)) - TailBB = FalseBBI.TrueBB; - assert((TailBB || !TrueBBI.IsBrAnalyzable) && "Unexpected!"); - } +/// Common code shared between diamond conversions. +/// \p BBI, \p TrueBBI, and \p FalseBBI form the diamond shape. +/// \p NumDups1 - number of shared instructions at the beginning of \p TrueBBI +/// and FalseBBI +/// \p NumDups2 - number of shared instructions at the end of \p TrueBBI +/// and \p FalseBBI +/// \p RemoveTrueBranch - Remove the branch of the true block before predicating +/// Only false for unanalyzable fallthrough cases. +/// \p RemoveFalseBranch - Remove the branch of the false block before +/// predicating Only false for unanalyzable fallthrough +/// cases. +/// \p MergeAddEdges - Add successor edges when merging blocks. Only false for +/// unanalyzable fallthrough +bool IfConverter::IfConvertDiamondCommon( + BBInfo &BBI, BBInfo &TrueBBI, BBInfo &FalseBBI, + unsigned NumDups1, unsigned NumDups2, + bool TClobbersPred, bool FClobbersPred, + bool RemoveTrueBranch, bool RemoveFalseBranch, + bool MergeAddEdges) { if (TrueBBI.IsDone || FalseBBI.IsDone || - TrueBBI.BB->pred_size() > 1 || - FalseBBI.BB->pred_size() > 1) { + TrueBBI.BB->pred_size() > 1 || FalseBBI.BB->pred_size() > 1) { // Something has changed. It's no longer safe to predicate these blocks. BBI.IsAnalyzed = false; TrueBBI.IsAnalyzed = false; @@ -1587,6 +1727,7 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, if (DoSwap) { std::swap(BBI1, BBI2); std::swap(Cond1, Cond2); + std::swap(RemoveTrueBranch, RemoveFalseBranch); } // Remove the conditional branch from entry to the blocks. @@ -1639,12 +1780,8 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, BBI.BB->splice(BBI.BB->end(), &MBB1, MBB1.begin(), DI1); MBB2.erase(MBB2.begin(), DI2); - // Remove branch from the 'true' block, unless it was not analyzable. - // Non-analyzable branches need to be preserved, since in such cases, - // the CFG structure is not an actual diamond (the join block may not - // be present). - if (BBI1->IsBrAnalyzable) - BBI1->NonPredSize -= TII->RemoveBranch(MBB1); + if (RemoveTrueBranch) + BBI1->NonPredSize -= TII->RemoveBranch(*BBI1->BB); // Remove duplicated instructions. DI1 = MBB1.end(); for (unsigned i = 0; i != NumDups2; ) { @@ -1662,11 +1799,11 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, // must be removed. RemoveKills(MBB1.begin(), MBB1.end(), DontKill, *TRI); - // Remove 'false' block branch (unless it was not analyzable), and find - // the last instruction to predicate. - if (BBI2->IsBrAnalyzable) - BBI2->NonPredSize -= TII->RemoveBranch(MBB2); - DI2 = MBB2.end(); + // Remove 'false' block branch, and find the last instruction to predicate. + // Save the debug location. + if (RemoveFalseBranch) + BBI2->NonPredSize -= TII->RemoveBranch(*BBI2->BB); + DI2 = BBI2->BB->end(); while (NumDups2 != 0) { // NumDups2 only counted non-dbg_value instructions, so this won't // run off the head of the list. @@ -1738,8 +1875,74 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, PredicateBlock(*BBI2, DI2, *Cond2); // Merge the true block into the entry of the diamond. - MergeBlocks(BBI, *BBI1, TailBB == nullptr); - MergeBlocks(BBI, *BBI2, TailBB == nullptr); + MergeBlocks(BBI, *BBI1, MergeAddEdges); + MergeBlocks(BBI, *BBI2, MergeAddEdges); + return true; +} + +/// If convert an almost-diamond sub-CFG where the true +/// and false blocks share a common tail. +bool IfConverter::IfConvertForkedDiamond( + BBInfo &BBI, IfcvtKind Kind, + unsigned NumDups1, unsigned NumDups2, + bool TClobbersPred, bool FClobbersPred) { + BBInfo &TrueBBI = BBAnalysis[BBI.TrueBB->getNumber()]; + BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()]; + + // Save the debug location for later. + DebugLoc dl; + MachineBasicBlock::iterator TIE = TrueBBI.BB->getFirstTerminator(); + if (TIE != TrueBBI.BB->end()) + dl = TIE->getDebugLoc(); + // Removing branches from both blocks is safe, because we have already + // determined that both blocks have the same branch instructions. The branch + // will be added back at the end, unpredicated. + if (!IfConvertDiamondCommon( + BBI, TrueBBI, FalseBBI, + NumDups1, NumDups2, + TClobbersPred, FClobbersPred, + /* RemoveTrueBranch */ true, /* RemoveFalseBranch */ true, + /* MergeAddEdges */ true)) + return false; + + // Add back the branch. + // Debug location saved above when removing the branch from BBI2 + TII->InsertBranch(*BBI.BB, TrueBBI.TrueBB, TrueBBI.FalseBB, + TrueBBI.BrCond, dl); + + RemoveExtraEdges(BBI); + + // Update block info. + BBI.IsDone = TrueBBI.IsDone = FalseBBI.IsDone = true; + InvalidatePreds(*BBI.BB); + + // FIXME: Must maintain LiveIns. + return true; +} + +/// If convert a diamond sub-CFG. +bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, + unsigned NumDups1, unsigned NumDups2, + bool TClobbersPred, bool FClobbersPred) { + BBInfo &TrueBBI = BBAnalysis[BBI.TrueBB->getNumber()]; + BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()]; + MachineBasicBlock *TailBB = TrueBBI.TrueBB; + + // True block must fall through or end with an unanalyzable terminator. + if (!TailBB) { + if (blockAlwaysFallThrough(TrueBBI)) + TailBB = FalseBBI.TrueBB; + assert((TailBB || !TrueBBI.IsBrAnalyzable) && "Unexpected!"); + } + + if (!IfConvertDiamondCommon( + BBI, TrueBBI, FalseBBI, + NumDups1, NumDups2, + TrueBBI.ClobbersPred, FalseBBI.ClobbersPred, + /* RemoveTrueBranch */ TrueBBI.IsBrAnalyzable, + /* RemoveFalseBranch */ FalseBBI.IsBrAnalyzable, + /* MergeAddEdges */ TailBB == nullptr)) + return false; // If the if-converted block falls through or unconditionally branches into // the tail block, and the tail block does not have other predecessors, then @@ -1762,7 +1965,7 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, CanMergeTail = false; else if (NumPreds == 1 && CanMergeTail) { MachineBasicBlock::pred_iterator PI = TailBB->pred_begin(); - if (*PI != &MBB1 && *PI != &MBB2) + if (*PI != TrueBBI.BB && *PI != FalseBBI.BB) CanMergeTail = false; } if (CanMergeTail) { @@ -1778,8 +1981,8 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, // RemoveExtraEdges won't work if the block has an unanalyzable branch, // which can happen here if TailBB is unanalyzable and is merged, so // explicitly remove BBI1 and BBI2 as successors. - BBI.BB->removeSuccessor(&MBB1); - BBI.BB->removeSuccessor(&MBB2, true); + BBI.BB->removeSuccessor(TrueBBI.BB); + BBI.BB->removeSuccessor(FalseBBI.BB, /* NormalizeSuccessProbs */ true); RemoveExtraEdges(BBI); // Update block info. diff --git a/llvm/test/CodeGen/PowerPC/ifcvt-forked-bug-2016-08-08.ll b/llvm/test/CodeGen/PowerPC/ifcvt-forked-bug-2016-08-08.ll new file mode 100644 index 0000000..baa8d87 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/ifcvt-forked-bug-2016-08-08.ll @@ -0,0 +1,36 @@ +; ModuleID = 'bugpoint-reduced-instructions.bc' +; RUN: llc -O2 -o - %s | FileCheck %s +source_filename = "bugpoint-output-9ad75f8.bc" +target datalayout = "e-m:e-i64:64-n32:64" +target triple = "powerpc64le-unknown-linux-gnu" + +; Function Attrs: nounwind uwtable +define hidden void @_ZN11__sanitizer25MaybeStartBackgroudThreadEv() local_unnamed_addr #0 { +entry: + br i1 undef, label %land.lhs.true, label %if.end + +; CHECK: # %land.lhs.true +; CHECK-NEXT: bclr +; CHECK-NEXT: # %if.end4 +land.lhs.true: ; preds = %entry + br i1 undef, label %return, label %if.end4 + +if.end: ; preds = %entry + br i1 icmp ne (i32 (i8*, i8*, i8* (i8*)*, i8*)* @_ZN11__sanitizer19real_pthread_createEPvS0_PFS0_S0_ES0_, i32 (i8*, i8*, i8* (i8*)*, i8*)* null), label %if.end4, label %return + +if.end4: ; preds = %if.end, %land.lhs.true + %call5 = tail call i8* @_ZN11__sanitizer21internal_start_threadEPFvPvES0_(void (i8*)* nonnull @_ZN11__sanitizer16BackgroundThreadEPv, i8* null) #7 + unreachable + +return: ; preds = %if.end, %land.lhs.true + ret void +} + +declare extern_weak signext i32 @_ZN11__sanitizer19real_pthread_createEPvS0_PFS0_S0_ES0_(i8*, i8*, i8* (i8*)*, i8*) #2 + +declare i8* @_ZN11__sanitizer21internal_start_threadEPFvPvES0_(void (i8*)*, i8*) local_unnamed_addr #2 + +declare hidden void @_ZN11__sanitizer16BackgroundThreadEPv(i8* nocapture readnone) #5 + +attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+power8-vector,+vsx,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #7 = { nobuiltin nounwind } diff --git a/llvm/test/CodeGen/Thumb2/thumb2-ifcvt1.ll b/llvm/test/CodeGen/Thumb2/thumb2-ifcvt1.ll index eb48ffb..a78b0c1 100644 --- a/llvm/test/CodeGen/Thumb2/thumb2-ifcvt1.ll +++ b/llvm/test/CodeGen/Thumb2/thumb2-ifcvt1.ll @@ -1,6 +1,7 @@ ; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s ; RUN: llc < %s -mtriple=thumbv7-apple-darwin -arm-default-it | FileCheck %s -; RUN: llc < %s -mtriple=thumbv8 -arm-no-restrict-it |FileCheck %s +; RUN: llc < %s -mtriple=thumbv8 -arm-no-restrict-it | FileCheck %s +; RUN: llc < %s -mtriple=thumbv8 -arm-no-restrict-it -enable-tail-merge=0 | FileCheck %s define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) nounwind { ; CHECK-LABEL: t1: ; CHECK: ittt ne @@ -25,9 +26,9 @@ cond_next: define i32 @t2(i32 %a, i32 %b) nounwind { entry: ; CHECK-LABEL: t2: -; CHECK: ite gt -; CHECK: subgt -; CHECK: suble +; CHECK: ite {{gt|le}} +; CHECK-DAG: suble +; CHECK-DAG: subgt %tmp1434 = icmp eq i32 %a, %b ; [#uses=1] br i1 %tmp1434, label %bb17, label %bb.outer @@ -60,6 +61,44 @@ bb17: ; preds = %cond_false, %cond_true, %entry ret i32 %a_addr.026.1 } +define i32 @t2_nomerge(i32 %a, i32 %b) nounwind { +entry: +; CHECK-LABEL: t2_nomerge: +; CHECK-NOT: ite {{gt|le}} +; CHECK-NOT: suble +; CHECK-NOT: subgt + %tmp1434 = icmp eq i32 %a, %b ; [#uses=1] + br i1 %tmp1434, label %bb17, label %bb.outer + +bb.outer: ; preds = %cond_false, %entry + %b_addr.021.0.ph = phi i32 [ %b, %entry ], [ %tmp10, %cond_false ] ; [#uses=5] + %a_addr.026.0.ph = phi i32 [ %a, %entry ], [ %a_addr.026.0, %cond_false ] ; [#uses=1] + br label %bb + +bb: ; preds = %cond_true, %bb.outer + %indvar = phi i32 [ 0, %bb.outer ], [ %indvar.next, %cond_true ] ; [#uses=2] + %tmp. = sub i32 0, %b_addr.021.0.ph ; [#uses=1] + %tmp.40 = mul i32 %indvar, %tmp. ; [#uses=1] + %a_addr.026.0 = add i32 %tmp.40, %a_addr.026.0.ph ; [#uses=6] + %tmp3 = icmp sgt i32 %a_addr.026.0, %b_addr.021.0.ph ; [#uses=1] + br i1 %tmp3, label %cond_true, label %cond_false + +cond_true: ; preds = %bb + %tmp7 = sub i32 %a_addr.026.0, %b_addr.021.0.ph ; [#uses=2] + %tmp1437 = icmp eq i32 %tmp7, %b_addr.021.0.ph ; [#uses=1] + %indvar.next = add i32 %indvar, 1 ; [#uses=1] + br i1 %tmp1437, label %bb17, label %bb + +cond_false: ; preds = %bb + %tmp10 = sub i32 %b_addr.021.0.ph, %a_addr.026.0 ; [#uses=2] + %tmp14 = icmp eq i32 %b_addr.021.0.ph, %tmp10 ; [#uses=1] + br i1 %tmp14, label %bb17, label %bb.outer + +bb17: ; preds = %cond_false, %cond_true, %entry + %a_addr.026.1 = phi i32 [ %a, %entry ], [ %tmp7, %cond_true ], [ %a_addr.026.0, %cond_false ] ; [#uses=1] + ret i32 %a_addr.026.1 +} + @x = external global i32* ; [#uses=1] define void @foo(i32 %a) nounwind { -- 2.7.4