From 96a19d35aea91a6b96d039bae1d604a41ee7ad55 Mon Sep 17 00:00:00 2001 From: Rong Xu Date: Fri, 15 Jul 2016 18:10:49 +0000 Subject: [PATCH] [PGO] IRPGO pre-cleanup pass changes This patch adds a selected set of cleanup passes including a pre-inline pass before LLVM IR PGO instrumentation. The inline is only intended to apply those obvious/trivial ones before instrumentation so that much less instrumentation is needed to get better profiling information. This will drastically improve the instrumented code performance for large C++ applications. Another benefit is the context sensitive counts that can potentially improve the PGO optimization. Differential Revision: http://reviews.llvm.org/D21405 llvm-svn: 275588 --- llvm/lib/Transforms/IPO/PassManagerBuilder.cpp | 22 ++++++++++++++++++++++ llvm/test/Transforms/PGOProfile/preinline.ll | 22 ++++++++++++++++++++++ 2 files changed, 44 insertions(+) create mode 100644 llvm/test/Transforms/PGOProfile/preinline.ll diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp index c6feb13..537919d 100644 --- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -125,6 +125,15 @@ static cl::opt UseLoopVersioningLICM( "enable-loop-versioning-licm", cl::init(false), cl::Hidden, cl::desc("Enable the experimental Loop Versioning LICM pass")); +static cl::opt + DisablePreInliner("disable-preinline", cl::init(false), cl::Hidden, + cl::desc("Disable pre-instrumentation inliner")); + +static cl::opt PreInlineThreshold( + "preinline-threshold", cl::Hidden, cl::init(75), cl::ZeroOrMore, + cl::desc("Control the amount of inlining in pre-instrumentation inliner " + "(default = 75)")); + PassManagerBuilder::PassManagerBuilder() { OptLevel = 2; SizeLevel = 0; @@ -229,6 +238,19 @@ void PassManagerBuilder::populateFunctionPassManager( // Do PGO instrumentation generation or use pass as the option specified. void PassManagerBuilder::addPGOInstrPasses(legacy::PassManagerBase &MPM) { + if (PGOInstrGen.empty() && PGOInstrUse.empty()) + return; + // Perform the preinline and cleanup passes for O1 and above. + // And avoid doing them if optimizing for size. + if (OptLevel > 0 && SizeLevel == 0 && !DisablePreInliner) { + // Create preinline pass. + MPM.add(createFunctionInliningPass(PreInlineThreshold)); + MPM.add(createSROAPass()); + MPM.add(createEarlyCSEPass()); // Catch trivial redundancies + MPM.add(createCFGSimplificationPass()); // Merge & remove BBs + MPM.add(createInstructionCombiningPass()); // Combine silly seq's + addExtensionsToPM(EP_Peephole, MPM); + } if (!PGOInstrGen.empty()) { MPM.add(createPGOInstrumentationGenLegacyPass()); // Add the profile lowering pass. diff --git a/llvm/test/Transforms/PGOProfile/preinline.ll b/llvm/test/Transforms/PGOProfile/preinline.ll new file mode 100644 index 0000000..2618666 --- /dev/null +++ b/llvm/test/Transforms/PGOProfile/preinline.ll @@ -0,0 +1,22 @@ +; RUN: opt < %s -O2 -profile-generate=default.profraw -S | FileCheck %s --check-prefix=GEN +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define i32 @foo(i32 %i) { +entry: +; GEN: %pgocount = load i64, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc_foo +; GEN-NOT: %pgocount.i = load i64, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc__stdin__bar + %call = call i32 @bar() + %add = add nsw i32 %i, %call + ret i32 %add +} + +define internal i32 @bar() { +; check that bar is inlined into foo and eliminiated from IR. +; GEN-NOT: define internal i32 @bar +entry: + %call = call i32 (...) @bar1() + ret i32 %call +} + +declare i32 @bar1(...) -- 2.7.4