From 947dbe12543116216b364deebb5b6ea204db3357 Mon Sep 17 00:00:00 2001 From: Dehao Chen Date: Wed, 9 Nov 2016 00:58:19 +0000 Subject: [PATCH] Enable Loop Sink pass for functions that has profile. Summary: For functions with profile data, we are confident that loop sink will be optimal in sinking code. Reviewers: davidxl, hfinkel Subscribers: mehdi_amini, mzolotukhin, llvm-commits Differential Revision: https://reviews.llvm.org/D26155 llvm-svn: 286325 --- llvm/lib/Transforms/IPO/PassManagerBuilder.cpp | 8 ++++---- llvm/lib/Transforms/Scalar/LoopSink.cpp | 5 +++++ llvm/test/Other/pass-pipelines.ll | 3 +++ llvm/test/Transforms/LICM/loopsink.ll | 11 ++++++----- llvm/test/Transforms/LICM/sink.ll | 3 ++- 5 files changed, 20 insertions(+), 10 deletions(-) diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp index aae0134..c340da4 100644 --- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -626,10 +626,7 @@ void PassManagerBuilder::populateModulePassManager( // outer loop. LICM pass can help to promote the runtime check out if the // checked value is loop invariant. MPM.add(createLICMPass()); - - // Get rid of LCSSA nodes. - MPM.add(createInstructionSimplifierPass()); - } + } // After vectorization and unrolling, assume intrinsics may tell us more // about pointer alignments. @@ -650,6 +647,9 @@ void PassManagerBuilder::populateModulePassManager( if (MergeFunctions) MPM.add(createMergeFunctionsPass()); + MPM.add(createLoopSinkPass()); + // Get rid of LCSSA nodes. + MPM.add(createInstructionSimplifierPass()); addExtensionsToPM(EP_OptimizerLast, MPM); } diff --git a/llvm/lib/Transforms/Scalar/LoopSink.cpp b/llvm/lib/Transforms/Scalar/LoopSink.cpp index 85249b8..90309d7 100644 --- a/llvm/lib/Transforms/Scalar/LoopSink.cpp +++ b/llvm/lib/Transforms/Scalar/LoopSink.cpp @@ -243,6 +243,11 @@ static bool sinkLoopInvariantInstructions(Loop &L, AAResults &AA, LoopInfo &LI, if (!Preheader) return false; + // Enable LoopSink only when runtime profile is available. + // With static profile, the sinking decision may be sub-optimal. + if (!Preheader->getParent()->getEntryCount()) + return false; + const BlockFrequency PreheaderFreq = BFI.getBlockFreq(Preheader); // If there are no basic blocks with lower frequency than the preheader then // we can avoid the detailed analysis as we will never find profitable sinking diff --git a/llvm/test/Other/pass-pipelines.ll b/llvm/test/Other/pass-pipelines.ll index 5da31cc..196f9bf 100644 --- a/llvm/test/Other/pass-pipelines.ll +++ b/llvm/test/Other/pass-pipelines.ll @@ -82,6 +82,9 @@ ; the runtime unrolling though. ; CHECK-O2: Loop Pass Manager ; CHECK-O2-NEXT: Loop Invariant Code Motion +; CHECK-O2: FunctionPass Manager +; CHECK-O2: Loop Pass Manager +; CHECK-O2-NEXT: Loop Sink ; CHECK-O2-NOT: Manager ; ; FIXME: There really shouldn't be another pass manager, especially one that diff --git a/llvm/test/Transforms/LICM/loopsink.ll b/llvm/test/Transforms/LICM/loopsink.ll index 564a35b..5004752 100644 --- a/llvm/test/Transforms/LICM/loopsink.ll +++ b/llvm/test/Transforms/LICM/loopsink.ll @@ -21,7 +21,7 @@ ; CHECK: load i32, i32* @g ; CHECK: .b3: ; CHECK-NOT: load i32, i32* @g -define i32 @t1(i32, i32) #0 { +define i32 @t1(i32, i32) #0 !prof !0 { %3 = icmp eq i32 %1, 0 br i1 %3, label %.exit, label %.preheader @@ -88,7 +88,7 @@ define i32 @t1(i32, i32) #0 { ; CHECK: .b6: ; CHECK: load i32, i32* @g ; CHECK: .b7: -define i32 @t2(i32, i32) #0 { +define i32 @t2(i32, i32) #0 !prof !0 { %3 = icmp eq i32 %1, 0 br i1 %3, label %.exit, label %.preheader @@ -150,7 +150,7 @@ define i32 @t2(i32, i32) #0 { ; CHECK: load i32, i32* @g ; CHECK: .b1: ; CHECK-NOT: load i32, i32* @g -define i32 @t3(i32, i32) #0 { +define i32 @t3(i32, i32) #0 !prof !0 { %3 = icmp eq i32 %1, 0 br i1 %3, label %.exit, label %.preheader @@ -201,7 +201,7 @@ define i32 @t3(i32, i32) #0 { ; CHECK: .b1: ; CHECK: load i32, i32* @g ; CHECK: .exit: -define i32 @t4(i32, i32) #0 { +define i32 @t4(i32, i32) #0 !prof !0 { .preheader: %invariant = load i32, i32* @g br label %.b1 @@ -235,7 +235,7 @@ define i32 @t4(i32, i32) #0 { ; CHECK: load i32, i32* @g ; CHECK: .b1: ; CHECK-NOT: load i32, i32* @g -define i32 @t5(i32, i32*) #0 { +define i32 @t5(i32, i32*) #0 !prof !0 { %3 = icmp eq i32 %0, 0 br i1 %3, label %.exit, label %.preheader @@ -281,6 +281,7 @@ define i32 @t5(i32, i32*) #0 { declare i32 @foo() +!0 = !{!"function_entry_count", i64 1} !1 = !{!"branch_weights", i32 1, i32 2000} !2 = !{!"branch_weights", i32 2000, i32 1} !3 = !{!"branch_weights", i32 100, i32 1} diff --git a/llvm/test/Transforms/LICM/sink.ll b/llvm/test/Transforms/LICM/sink.ll index 0eceb3d..cf169dd 100644 --- a/llvm/test/Transforms/LICM/sink.ll +++ b/llvm/test/Transforms/LICM/sink.ll @@ -15,7 +15,7 @@ @g = global i32 0, align 4 -define i32 @foo(i32, i32) #0 { +define i32 @foo(i32, i32) #0 !prof !2 { %3 = icmp eq i32 %1, 0 br i1 %3, label %._crit_edge, label %.lr.ph.preheader @@ -58,3 +58,4 @@ define i32 @foo(i32, i32) #0 { } !1 = !{!"branch_weights", i32 1, i32 2000} +!2 = !{!"function_entry_count", i64 1} -- 2.7.4