From cde00c02e11da2355ec715f571958fa180b17a43 Mon Sep 17 00:00:00 2001 From: Serguei Katkov Date: Thu, 25 Jul 2019 19:31:50 +0000 Subject: [PATCH] [Loop Peeling] Fix idom detection algorithm. We'd like to determine the idom of exit block after peeling one iteration. Let Exit is exit block. Let ExitingSet - is a set of predecessors of Exit block. They are exiting blocks. Let Latch' and ExitingSet' are copies after a peeling. We'd like to find an idom'(Exit) - idom of Exit after peeling. It is an evident that idom'(Exit) will be the nearest common dominator of ExitingSet and ExitingSet'. idom(Exit) is a nearest common dominator of ExitingSet. idom(Exit)' is a nearest common dominator of ExitingSet'. Taking into account that we have a single Latch, Latch' will dominate Header and idom(Exit). So the idom'(Exit) is nearest common dominator of idom(Exit)' and Latch'. All these basic blocks are in the same loop, so what we find is (nearest common dominator of idom(Exit) and Latch)'. Reviewers: reames, fhahn Reviewed By: reames Subscribers: hiraditya, zzheng, llvm-commits Differential Revision: https://reviews.llvm.org/D65292 llvm-svn: 367044 --- llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp | 21 +++++++++- .../LoopUnroll/peel-loop-pgo-deopt-idom-2.ll | 46 ++++++++++++++++++++++ 2 files changed, 66 insertions(+), 1 deletion(-) create mode 100644 llvm/test/Transforms/LoopUnroll/peel-loop-pgo-deopt-idom-2.ll diff --git a/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp b/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp index 897a4d5..e78a570 100644 --- a/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp +++ b/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp @@ -575,11 +575,30 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, DenseMap ExitIDom; if (DT) { + // We'd like to determine the idom of exit block after peeling one + // iteration. + // Let Exit is exit block. + // Let ExitingSet - is a set of predecessors of Exit block. They are exiting + // blocks. + // Let Latch' and ExitingSet' are copies after a peeling. + // We'd like to find an idom'(Exit) - idom of Exit after peeling. + // It is an evident that idom'(Exit) will be the nearest common dominator + // of ExitingSet and ExitingSet'. + // idom(Exit) is a nearest common dominator of ExitingSet. + // idom(Exit)' is a nearest common dominator of ExitingSet'. + // Taking into account that we have a single Latch, Latch' will dominate + // Header and idom(Exit). + // So the idom'(Exit) is nearest common dominator of idom(Exit)' and Latch'. + // All these basic blocks are in the same loop, so what we find is + // (nearest common dominator of idom(Exit) and Latch)'. + // In the loop below we remember nearest common dominator of idom(Exit) and + // Latch to update idom of Exit later. assert(L->hasDedicatedExits() && "No dedicated exits?"); for (auto Edge : ExitEdges) { if (ExitIDom.count(Edge.second)) continue; - BasicBlock *BB = DT->getNode(Edge.second)->getIDom()->getBlock(); + BasicBlock *BB = DT->findNearestCommonDominator( + DT->getNode(Edge.second)->getIDom()->getBlock(), Latch); assert(L->contains(BB) && "IDom is not in a loop"); ExitIDom[Edge.second] = BB; } diff --git a/llvm/test/Transforms/LoopUnroll/peel-loop-pgo-deopt-idom-2.ll b/llvm/test/Transforms/LoopUnroll/peel-loop-pgo-deopt-idom-2.ll new file mode 100644 index 0000000..17fd5a7 --- /dev/null +++ b/llvm/test/Transforms/LoopUnroll/peel-loop-pgo-deopt-idom-2.ll @@ -0,0 +1,46 @@ +; REQUIRES: asserts +; RUN: opt < %s -S -debug-only=loop-unroll -loop-unroll -unroll-runtime -unroll-peel-multi-deopt-exit 2>&1 | FileCheck %s +; RUN: opt < %s -S -debug-only=loop-unroll -unroll-peel-multi-deopt-exit -passes='require,function(require,unroll)' 2>&1 | FileCheck %s + +; Regression test for setting the correct idom for exit blocks. + +; CHECK: Loop Unroll: F[basic] +; CHECK: PEELING loop %for.body with iteration count 1! + +define i32 @basic(i32* %p, i32 %k, i1 %c1, i1 %c2) #0 !prof !3 { +entry: + br label %for.body + +for.body: + %i.05 = phi i32 [ 0, %entry ], [ %inc, %latch ] + %p.addr.04 = phi i32* [ %p, %entry ], [ %incdec.ptr, %latch ] + %incdec.ptr = getelementptr inbounds i32, i32* %p.addr.04, i32 1 + store i32 %i.05, i32* %p.addr.04, align 4 + %inc = add nsw i32 %i.05, 1 + %cmp = icmp slt i32 %inc, %k + br i1 %c1, label %left, label %right + +left: + br label %latch + +right: + br i1 %c1, label %latch, label %side_exit, !prof !2 + +latch: + br i1 %cmp, label %for.body, label %for.end, !prof !1 + +for.end: + ret i32 %inc + +side_exit: + %rval = call i32(...) @llvm.experimental.deoptimize.i32() [ "deopt"(i32 %inc) ] + ret i32 %rval +} + +declare i32 @llvm.experimental.deoptimize.i32(...) + +attributes #0 = { nounwind } + +!1 = !{!"branch_weights", i32 1, i32 1} +!2 = !{!"branch_weights", i32 1, i32 0} +!3 = !{!"function_entry_count", i64 1} -- 2.7.4