From 80f10e4fe5987cda01724ba9663b6d96754e3169 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Fri, 23 Mar 2018 18:00:18 +0000 Subject: [PATCH] [Hexagon] Avoid early if-conversion for one sided branches Patch by Anand Kodnani. llvm-svn: 328344 --- llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp | 31 ++++++---- llvm/test/CodeGen/Hexagon/prof-early-if.ll | 80 ++++++++++++++++++++++++++ 2 files changed, 99 insertions(+), 12 deletions(-) create mode 100644 llvm/test/CodeGen/Hexagon/prof-early-if.ll diff --git a/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp b/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp index 8410dc4..f2ca118 100644 --- a/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp +++ b/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp @@ -100,7 +100,7 @@ namespace llvm { } // end namespace llvm static cl::opt EnableHexagonBP("enable-hexagon-br-prob", cl::Hidden, - cl::init(false), cl::desc("Enable branch probability info")); + cl::init(true), cl::desc("Enable branch probability info")); static cl::opt SizeLimit("eif-limit", cl::init(6), cl::Hidden, cl::desc("Size limit in Hexagon early if-conversion")); static cl::opt SkipExitBranches("eif-no-loop-exit", cl::init(false), @@ -243,7 +243,7 @@ bool HexagonEarlyIfConversion::matchFlowPattern(MachineBasicBlock *B, // Interested only in conditional branches, no .new, no new-value, etc. // Check the terminators directly, it's easier than handling all responses - // from AnalyzeBranch. + // from analyzeBranch. MachineBasicBlock *TB = nullptr, *FB = nullptr; MachineBasicBlock::const_iterator T1I = B->getFirstTerminator(); if (T1I == B->end()) @@ -336,7 +336,7 @@ bool HexagonEarlyIfConversion::matchFlowPattern(MachineBasicBlock *B, return true; } -// KLUDGE: HexagonInstrInfo::AnalyzeBranch won't work on a block that +// KLUDGE: HexagonInstrInfo::analyzeBranch won't work on a block that // contains EH_LABEL. bool HexagonEarlyIfConversion::hasEHLabel(const MachineBasicBlock *B) const { for (auto &I : *B) @@ -345,7 +345,7 @@ bool HexagonEarlyIfConversion::hasEHLabel(const MachineBasicBlock *B) const { return false; } -// KLUDGE: HexagonInstrInfo::AnalyzeBranch may be unable to recognize +// KLUDGE: HexagonInstrInfo::analyzeBranch may be unable to recognize // that a block can never fall-through. bool HexagonEarlyIfConversion::hasUncondBranch(const MachineBasicBlock *B) const { @@ -495,7 +495,7 @@ unsigned HexagonEarlyIfConversion::countPredicateDefs( unsigned R = MO.getReg(); if (!TargetRegisterInfo::isVirtualRegister(R)) continue; - if (MRI->getRegClass(R) == &Hexagon::PredRegsRegClass) + if (isPredicate(R)) PredDefs++; } } @@ -503,10 +503,21 @@ unsigned HexagonEarlyIfConversion::countPredicateDefs( } bool HexagonEarlyIfConversion::isProfitable(const FlowPattern &FP) const { + BranchProbability JumpProb(1, 10); + BranchProbability Prob(9, 10); + if (MBPI && FP.TrueB && !FP.FalseB && + (MBPI->getEdgeProbability(FP.SplitB, FP.TrueB) < JumpProb || + MBPI->getEdgeProbability(FP.SplitB, FP.TrueB) > Prob)) + return false; + + if (MBPI && !FP.TrueB && FP.FalseB && + (MBPI->getEdgeProbability(FP.SplitB, FP.FalseB) < JumpProb || + MBPI->getEdgeProbability(FP.SplitB, FP.FalseB) > Prob)) + return false; + if (FP.TrueB && FP.FalseB) { // Do not IfCovert if the branch is one sided. if (MBPI) { - BranchProbability Prob(9, 10); if (MBPI->getEdgeProbability(FP.SplitB, FP.TrueB) > Prob) return false; if (MBPI->getEdgeProbability(FP.SplitB, FP.FalseB) > Prob) @@ -1018,11 +1029,7 @@ void HexagonEarlyIfConversion::mergeBlocks(MachineBasicBlock *PredB, HII->removeBranch(*PredB); PredB->removeSuccessor(SuccB); PredB->splice(PredB->end(), SuccB, SuccB->begin(), SuccB->end()); - MachineBasicBlock::succ_iterator I, E = SuccB->succ_end(); - for (I = SuccB->succ_begin(); I != E; ++I) - PredB->addSuccessor(*I); - PredB->normalizeSuccProbs(); - replacePhiEdges(SuccB, PredB); + PredB->transferSuccessorsAndUpdatePHIs(SuccB); removeBlock(SuccB); if (!TermOk) PredB->updateTerminator(); @@ -1044,7 +1051,7 @@ void HexagonEarlyIfConversion::simplifyFlowGraph(const FlowPattern &FP) { // By now, the split block has only one successor (SB), and SB has only // one predecessor. We can try to merge them. We will need to update ter- - // minators in FP.Split+SB, and that requires working AnalyzeBranch, which + // minators in FP.Split+SB, and that requires working analyzeBranch, which // fails on Hexagon for blocks that have EH_LABELs. However, if SB ends // with an unconditional branch, we won't need to touch the terminators. if (!hasEHLabel(SB) || hasUncondBranch(SB)) diff --git a/llvm/test/CodeGen/Hexagon/prof-early-if.ll b/llvm/test/CodeGen/Hexagon/prof-early-if.ll new file mode 100644 index 0000000..a5215a9 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/prof-early-if.ll @@ -0,0 +1,80 @@ +; RUN: llc -O2 -march=hexagon < %s | FileCheck %s +; Rely on the comments generated by llc. Check that "if.then" was not predicated. +; CHECK: b2 +; CHECK-NOT: if{{.*}}memd +; CHECK: b5 + +%s.0 = type { [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [3 x i32], [24 x i32], [8 x %s.1], [5 x i32] } +%s.1 = type { i32, i32 } + +@g0 = global i64 0 +@g1 = global i32 0 +@g2 = global i32 0 +@g3 = global i8 0 + +declare i32 @llvm.hexagon.S2.cl0(i32) #0 +declare i32 @llvm.hexagon.S2.setbit.r(i32, i32) #0 +declare i64 @llvm.hexagon.M2.vmpy2s.s0(i32, i32) #0 +declare i64 @llvm.hexagon.M2.vmac2s.s0(i64, i32, i32) #0 +declare i64 @llvm.hexagon.A2.vaddws(i64, i64) #0 +declare i64 @llvm.hexagon.A2.vsubws(i64, i64) #0 +declare i32 @llvm.hexagon.A4.modwrapu(i32, i32) #0 + +define void @f0(i32 %a0, i64* %a1) #1 { +b0: + br label %b1 + +b1: ; preds = %b5, %b0 + %v0 = phi i32 [ 0, %b0 ], [ %v26, %b5 ] + %v1 = phi i32 [ 0, %b0 ], [ %v25, %b5 ] + %v2 = load i32, i32* @g1, align 4 + %v3 = load i32, i32* @g2, align 8 + %v4 = and i32 %v3, %v2 + br label %b2 + +b2: ; preds = %b4, %b1 + %v5 = phi i64 [ %v21, %b4 ], [ 0, %b1 ] + %v6 = phi i64 [ %v22, %b4 ], [ 0, %b1 ] + %v7 = phi i32 [ %v9, %b4 ], [ %v4, %b1 ] + %v8 = tail call i32 @llvm.hexagon.S2.cl0(i32 %v7) + %v9 = tail call i32 @llvm.hexagon.S2.setbit.r(i32 %v7, i32 %v8) + %v10 = getelementptr [10 x %s.0], [10 x %s.0]* inttoptr (i32 -121502345 to [10 x %s.0]*), i32 0, i32 %v1 + %v11 = getelementptr %s.0, %s.0* %v10, i32 0, i32 12, i32 %v8 + %v12 = load i32, i32* %v11, align 4 + %v13 = tail call i64 @llvm.hexagon.M2.vmpy2s.s0(i32 %v12, i32 %v12) + %v14 = getelementptr %s.0, %s.0* %v10, i32 0, i32 13, i32 %v8 + %v15 = load i32, i32* %v14, align 4 + %v16 = tail call i64 @llvm.hexagon.M2.vmac2s.s0(i64 %v13, i32 %v15, i32 %v15) + %v17 = load i8, i8* @g3, align 1 + %v18 = and i8 %v17, 1 + %v19 = icmp eq i8 %v18, 0 + br i1 %v19, label %b3, label %b4, !prof !0 + +b3: ; preds = %b2 + %v20 = tail call i64 @llvm.hexagon.A2.vaddws(i64 %v5, i64 %v16) + store i64 %v20, i64* %a1, align 8 + br label %b4 + +b4: ; preds = %b3, %b2 + %v21 = phi i64 [ %v20, %b3 ], [ %v5, %b2 ] + %v22 = tail call i64 @llvm.hexagon.A2.vsubws(i64 %v6, i64 %v16) + %v23 = icmp eq i32 %v9, 0 + br i1 %v23, label %b5, label %b2, !prof !1 + +b5: ; preds = %b4 + %v24 = add i32 %v1, 1 + %v25 = tail call i32 @llvm.hexagon.A4.modwrapu(i32 %v24, i32 10) #0 + %v26 = add i32 %v0, 1 + %v27 = icmp eq i32 %v26, %a0 + br i1 %v27, label %b6, label %b1, !prof !1 + +b6: ; preds = %b5 + store i64 %v16, i64* @g0, align 8 + ret void +} + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind } + +!0 = !{!"branch_weights", i32 99, i32 1} +!1 = !{!"branch_weights", i32 10, i32 90} -- 2.7.4