From b1bd398ceb5bead0251ec0f40ba5db0177e4a2df Mon Sep 17 00:00:00 2001 From: Easwaran Raman Date: Tue, 8 Mar 2016 00:36:35 +0000 Subject: [PATCH] Revert revisions 262636, 262643, 262679, and 262682. llvm-svn: 262883 --- llvm/include/llvm/Analysis/InlineCost.h | 25 +---- llvm/include/llvm/Transforms/IPO/InlinerPass.h | 29 ----- llvm/include/llvm/Transforms/Utils/Cloning.h | 22 +--- llvm/lib/Analysis/InlineCost.cpp | 102 +++--------------- llvm/lib/Transforms/IPO/InlineSimple.cpp | 3 +- llvm/lib/Transforms/IPO/Inliner.cpp | 120 ++------------------- llvm/lib/Transforms/Utils/CloneFunction.cpp | 42 ++++---- llvm/lib/Transforms/Utils/InlineFunction.cpp | 9 +- .../Transforms/Inline/function-count-update-2.ll | 27 ----- .../Transforms/Inline/function-count-update-3.ll | 69 ------------ .../Transforms/Inline/function-count-update.ll | 51 --------- 11 files changed, 53 insertions(+), 446 deletions(-) delete mode 100644 llvm/test/Transforms/Inline/function-count-update-2.ll delete mode 100644 llvm/test/Transforms/Inline/function-count-update-3.ll delete mode 100644 llvm/test/Transforms/Inline/function-count-update.ll diff --git a/llvm/include/llvm/Analysis/InlineCost.h b/llvm/include/llvm/Analysis/InlineCost.h index 4f0321e..6f457bc 100644 --- a/llvm/include/llvm/Analysis/InlineCost.h +++ b/llvm/include/llvm/Analysis/InlineCost.h @@ -20,7 +20,6 @@ namespace llvm { class AssumptionCacheTracker; -class BlockFrequencyInfo; class CallSite; class DataLayout; class Function; @@ -39,21 +38,6 @@ namespace InlineConstants { const unsigned TotalAllocaSizeRecursiveCaller = 1024; } -/// \brief Block frequency analysis for multiple functions. -/// This class mimics block frequency analysis on CGSCC level. Block frequency -/// info is computed on demand and cached unless they are invalidated. -class BlockFrequencyAnalysis { -private: - DenseMap BFM; - -public: - ~BlockFrequencyAnalysis(); - /// \brief Returns BlockFrequencyInfo for a function. - BlockFrequencyInfo *getBlockFrequencyInfo(Function *); - /// \brief Invalidates block frequency info for a function. - void invalidateBlockFrequencyInfo(Function *); -}; - /// \brief Represents the cost of inlining a function. /// /// This supports special values for functions which should "always" or @@ -127,8 +111,7 @@ public: /// inlining the callsite. It is an expensive, heavyweight call. InlineCost getInlineCost(CallSite CS, int DefaultThreshold, TargetTransformInfo &CalleeTTI, - AssumptionCacheTracker *ACT, - BlockFrequencyAnalysis *BFA); + AssumptionCacheTracker *ACT); /// \brief Get an InlineCost with the callee explicitly specified. /// This allows you to calculate the cost of inlining a function via a @@ -137,8 +120,7 @@ InlineCost getInlineCost(CallSite CS, int DefaultThreshold, // InlineCost getInlineCost(CallSite CS, Function *Callee, int DefaultThreshold, TargetTransformInfo &CalleeTTI, - AssumptionCacheTracker *ACT, - BlockFrequencyAnalysis *BFA); + AssumptionCacheTracker *ACT); int computeThresholdFromOptLevels(unsigned OptLevel, unsigned SizeOptLevel); @@ -147,9 +129,6 @@ int getDefaultInlineThreshold(); /// \brief Minimal filter to detect invalid constructs for inlining. bool isInlineViable(Function &Callee); - -/// \brief Return estimated count of the block \p BB. -Optional getBlockCount(BasicBlock *BB, BlockFrequencyAnalysis *BFA); } #endif diff --git a/llvm/include/llvm/Transforms/IPO/InlinerPass.h b/llvm/include/llvm/Transforms/IPO/InlinerPass.h index d81c993..40766c6 100644 --- a/llvm/include/llvm/Transforms/IPO/InlinerPass.h +++ b/llvm/include/llvm/Transforms/IPO/InlinerPass.h @@ -24,18 +24,8 @@ class AssumptionCacheTracker; class CallSite; class DataLayout; class InlineCost; -class BlockFrequencyAnalysis; template class SmallPtrSet; -// Functor invoked when a block is cloned during inlining. -typedef std::function - BlockCloningFunctor; -// Functor invoked when a function is inlined inside the basic block -// containing the call. -typedef std::function FunctionCloningFunctor; -// Functor invoked when a function gets deleted during inlining. -typedef std::function FunctionDeletedFunctor; - /// Inliner - This class contains all of the helper code which is used to /// perform the inlining operations that do not depend on the policy. /// @@ -79,28 +69,9 @@ private: /// shouldInline - Return true if the inliner should attempt to /// inline at the given CallSite. bool shouldInline(CallSite CS); - /// Set the BFI of \p Dst to be the same as \p Src. - void copyBlockFrequency(BasicBlock *Src, BasicBlock *Dst); - /// Invalidates BFI for function \p F. - void invalidateBFI(Function *F); - /// Invalidates BFI for all functions in \p SCC. - void invalidateBFI(CallGraphSCC &SCC); - /// Update function entry count for \p Callee which has been inlined into - /// \p CallBB. - void updateEntryCount(BasicBlock *CallBB, Function *Callee); - /// \brief Update block frequency of an inlined block. - /// This method updates the block frequency of \p NewBB which is a clone of - /// \p OrigBB when the callsite \p CS gets inlined. The frequency of \p NewBB - /// is computed as follows: - /// Freq(NewBB) = Freq(OrigBB) * CallSiteFreq / CalleeEntryFreq. - void updateBlockFreq(CallSite &CS, const BasicBlock *OrigBB, - const BasicBlock *NewBB); protected: AssumptionCacheTracker *ACT; - std::unique_ptr BFA; - /// Are we using profile guided optimization? - bool HasProfileData; }; } // End llvm namespace diff --git a/llvm/include/llvm/Transforms/Utils/Cloning.h b/llvm/include/llvm/Transforms/Utils/Cloning.h index b934a56..0bae2bd 100644 --- a/llvm/include/llvm/Transforms/Utils/Cloning.h +++ b/llvm/include/llvm/Transforms/Utils/Cloning.h @@ -48,9 +48,6 @@ class AllocaInst; class AssumptionCacheTracker; class DominatorTree; -typedef std::function - BlockCloningFunctor; - /// Return an exact copy of the specified module /// std::unique_ptr CloneModule(const Module *M); @@ -160,8 +157,7 @@ void CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, ValueToValueMapTy &VMap, bool ModuleLevelChanges, SmallVectorImpl &Returns, const char *NameSuffix = "", - ClonedCodeInfo *CodeInfo = nullptr, - BlockCloningFunctor Ftor = nullptr); + ClonedCodeInfo *CodeInfo = nullptr); /// CloneAndPruneFunctionInto - This works exactly like CloneFunctionInto, /// except that it does some simple constant prop and DCE on the fly. The @@ -176,31 +172,23 @@ void CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, /// void CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, ValueToValueMapTy &VMap, bool ModuleLevelChanges, - SmallVectorImpl &Returns, + SmallVectorImpl &Returns, const char *NameSuffix = "", ClonedCodeInfo *CodeInfo = nullptr, - Instruction *TheCall = nullptr, - BlockCloningFunctor Ftor = nullptr); + Instruction *TheCall = nullptr); /// InlineFunctionInfo - This class captures the data input to the /// InlineFunction call, and records the auxiliary results produced by it. class InlineFunctionInfo { public: explicit InlineFunctionInfo(CallGraph *cg = nullptr, - AssumptionCacheTracker *ACT = nullptr, - BlockCloningFunctor Ftor = nullptr) - : CG(cg), ACT(ACT), Ftor(Ftor), CallSuccessorBlockDeleted(false) {} + AssumptionCacheTracker *ACT = nullptr) + : CG(cg), ACT(ACT) {} /// CG - If non-null, InlineFunction will update the callgraph to reflect the /// changes it makes. CallGraph *CG; AssumptionCacheTracker *ACT; - // Functor that is invoked when a block is cloned into the new function. - BlockCloningFunctor Ftor; - - /// CallSuccessorBlockDeleted - whether the block immediately following the - /// call has been deleted during inlining - bool CallSuccessorBlockDeleted; /// StaticAllocas - InlineFunction fills this in with all static allocas that /// get copied into the caller. diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp index 355c326..b4138b2 100644 --- a/llvm/lib/Analysis/InlineCost.cpp +++ b/llvm/lib/Analysis/InlineCost.cpp @@ -18,18 +18,13 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AssumptionCache.h" -#include "llvm/Analysis/BlockFrequencyInfo.h" -#include "llvm/Analysis/BlockFrequencyInfoImpl.h" -#include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/CodeMetrics.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InstructionSimplify.h" -#include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/DataLayout.h" -#include "llvm/IR/Dominators.h" #include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/GlobalAlias.h" #include "llvm/IR/InstVisitor.h" @@ -90,7 +85,6 @@ class CallAnalyzer : public InstVisitor { // easily cacheable. Instead, use the cover function paramHasAttr. CallSite CandidateCS; - BlockFrequencyAnalysis *BFA; int Threshold; int Cost; @@ -159,8 +153,6 @@ class CallAnalyzer : public InstVisitor { /// passed to support analyzing indirect calls whose target is inferred by /// analysis. void updateThreshold(CallSite CS, Function &Callee); - /// Adjust Threshold based on CallSiteCount and return the adjusted threshold. - int getAdjustedThreshold(int Threshold, Optional CallSiteCount); // Custom analysis routines. bool analyzeBlock(BasicBlock *BB, SmallPtrSetImpl &EphValues); @@ -202,19 +194,17 @@ class CallAnalyzer : public InstVisitor { public: CallAnalyzer(const TargetTransformInfo &TTI, AssumptionCacheTracker *ACT, - Function &Callee, int Threshold, CallSite CSArg, - BlockFrequencyAnalysis *BFA) - : TTI(TTI), ACT(ACT), F(Callee), CandidateCS(CSArg), BFA(BFA), - Threshold(Threshold), Cost(0), IsCallerRecursive(false), - IsRecursiveCall(false), ExposesReturnsTwice(false), - HasDynamicAlloca(false), ContainsNoDuplicateCall(false), - HasReturn(false), HasIndirectBr(false), HasFrameEscape(false), - AllocatedSize(0), NumInstructions(0), NumVectorInstructions(0), - FiftyPercentVectorBonus(0), TenPercentVectorBonus(0), VectorBonus(0), - NumConstantArgs(0), NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), - NumConstantPtrCmps(0), NumConstantPtrDiffs(0), - NumInstructionsSimplified(0), SROACostSavings(0), - SROACostSavingsLost(0) {} + Function &Callee, int Threshold, CallSite CSArg) + : TTI(TTI), ACT(ACT), F(Callee), CandidateCS(CSArg), Threshold(Threshold), + Cost(0), IsCallerRecursive(false), IsRecursiveCall(false), + ExposesReturnsTwice(false), HasDynamicAlloca(false), + ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false), + HasFrameEscape(false), AllocatedSize(0), NumInstructions(0), + NumVectorInstructions(0), FiftyPercentVectorBonus(0), + TenPercentVectorBonus(0), VectorBonus(0), NumConstantArgs(0), + NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), NumConstantPtrCmps(0), + NumConstantPtrDiffs(0), NumInstructionsSimplified(0), + SROACostSavings(0), SROACostSavingsLost(0) {} bool analyzeCall(CallSite CS); @@ -582,15 +572,6 @@ bool CallAnalyzer::isKnownNonNullInCallee(Value *V) { return false; } -// Adjust the threshold based on callsite hotness. Currently this is a nop. -int CallAnalyzer::getAdjustedThreshold(int Threshold, - Optional CallSiteCount - LLVM_ATTRIBUTE_UNUSED) { - // FIXME: The new threshold should be computed from the given Threshold and - // the callsite hotness. - return Threshold; -} - void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) { // If -inline-threshold is not given, listen to the optsize and minsize // attributes when they would decrease the threshold. @@ -615,9 +596,6 @@ void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) { FunctionCount = Callee.getEntryCount().getValue(); MaxFunctionCount = Callee.getParent()->getMaximumFunctionCount().getValue(); } - Optional CallSiteCount = - llvm::getBlockCount(CS.getInstruction()->getParent(), BFA); - Threshold = getAdjustedThreshold(Threshold, CallSiteCount); // Listen to the inlinehint attribute or profile based hotness information // when it would increase the threshold and the caller does not need to @@ -934,8 +912,7 @@ bool CallAnalyzer::visitCallSite(CallSite CS) { // during devirtualization and so we want to give it a hefty bonus for // inlining, but cap that bonus in the event that inlining wouldn't pan // out. Pretend to inline the function, with a custom threshold. - CallAnalyzer CA(TTI, ACT, *F, InlineConstants::IndirectCallThreshold, CS, - BFA); + CallAnalyzer CA(TTI, ACT, *F, InlineConstants::IndirectCallThreshold, CS); if (CA.analyzeCall(CS)) { // We were able to inline the indirect call! Subtract the cost from the // threshold to get the bonus we want to apply, but don't go below zero. @@ -1456,10 +1433,9 @@ static bool functionsHaveCompatibleAttributes(Function *Caller, InlineCost llvm::getInlineCost(CallSite CS, int DefaultThreshold, TargetTransformInfo &CalleeTTI, - AssumptionCacheTracker *ACT, - BlockFrequencyAnalysis *BFA) { + AssumptionCacheTracker *ACT) { return getInlineCost(CS, CS.getCalledFunction(), DefaultThreshold, CalleeTTI, - ACT, BFA); + ACT); } int llvm::computeThresholdFromOptLevels(unsigned OptLevel, @@ -1478,8 +1454,7 @@ int llvm::getDefaultInlineThreshold() { return DefaultInlineThreshold; } InlineCost llvm::getInlineCost(CallSite CS, Function *Callee, int DefaultThreshold, TargetTransformInfo &CalleeTTI, - AssumptionCacheTracker *ACT, - BlockFrequencyAnalysis *BFA) { + AssumptionCacheTracker *ACT) { // Cannot inline indirect calls. if (!Callee) @@ -1512,7 +1487,7 @@ InlineCost llvm::getInlineCost(CallSite CS, Function *Callee, DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName() << "...\n"); - CallAnalyzer CA(CalleeTTI, ACT, *Callee, DefaultThreshold, CS, BFA); + CallAnalyzer CA(CalleeTTI, ACT, *Callee, DefaultThreshold, CS); bool ShouldInline = CA.analyzeCall(CS); DEBUG(CA.dump()); @@ -1560,48 +1535,3 @@ bool llvm::isInlineViable(Function &F) { return true; } - -/// \brief Get estimated execution count for \p BB. -Optional llvm::getBlockCount(BasicBlock *BB, - BlockFrequencyAnalysis *BFA) { - if (!BFA) - return None; - Function *F = BB->getParent(); - Optional EntryCount = F->getEntryCount(); - if (!EntryCount) - return None; - BlockFrequencyInfo *BFI = BFA->getBlockFrequencyInfo(F); - uint64_t BBFreq = BFI->getBlockFreq(BB).getFrequency(); - uint64_t FunctionEntryFreq = BFI->getEntryFreq(); - uint64_t BBCount = EntryCount.getValue() * BBFreq / FunctionEntryFreq; - return BBCount; -} - -BlockFrequencyAnalysis::~BlockFrequencyAnalysis() { - for (auto &Entry : BFM) { - delete Entry.second; - } -} - -/// \brief Get BlockFrequencyInfo for a function. -BlockFrequencyInfo *BlockFrequencyAnalysis::getBlockFrequencyInfo(Function *F) { - auto Iter = BFM.find(F); - if (Iter != BFM.end()) - return Iter->second; - // We need to create a BlockFrequencyInfo object for F and store it. - DominatorTree DT; - DT.recalculate(*F); - LoopInfo LI(DT); - BranchProbabilityInfo BPI(*F, LI); - BlockFrequencyInfo *BFI = new BlockFrequencyInfo(*F, BPI, LI); - BFM[F] = BFI; - return BFI; -} - -/// \brief Invalidate BlockFrequencyInfo for a function. -void BlockFrequencyAnalysis::invalidateBlockFrequencyInfo(Function *F) { - if (BFM.count(F)) { - delete BFM[F]; - BFM.erase(F); - } -} diff --git a/llvm/lib/Transforms/IPO/InlineSimple.cpp b/llvm/lib/Transforms/IPO/InlineSimple.cpp index cc37c97..a87c0d3 100644 --- a/llvm/lib/Transforms/IPO/InlineSimple.cpp +++ b/llvm/lib/Transforms/IPO/InlineSimple.cpp @@ -59,8 +59,7 @@ public: InlineCost getInlineCost(CallSite CS) override { Function *Callee = CS.getCalledFunction(); TargetTransformInfo &TTI = TTIWP->getTTI(*Callee); - return llvm::getInlineCost(CS, DefaultThreshold, TTI, ACT, - HasProfileData ? BFA.get() : nullptr); + return llvm::getInlineCost(CS, DefaultThreshold, TTI, ACT); } bool runOnSCC(CallGraphSCC &SCC) override; diff --git a/llvm/lib/Transforms/IPO/Inliner.cpp b/llvm/lib/Transforms/IPO/Inliner.cpp index c82c7df..568707d 100644 --- a/llvm/lib/Transforms/IPO/Inliner.cpp +++ b/llvm/lib/Transforms/IPO/Inliner.cpp @@ -19,7 +19,6 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/BasicAliasAnalysis.h" -#include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/InlineCost.h" #include "llvm/Analysis/TargetLibraryInfo.h" @@ -48,13 +47,10 @@ STATISTIC(NumMergedAllocas, "Number of allocas merged together"); // if those would be more profitable and blocked inline steps. STATISTIC(NumCallerCallersAnalyzed, "Number of caller-callers analyzed"); -Inliner::Inliner(char &ID) - : CallGraphSCCPass(ID), InsertLifetime(true), - BFA(new BlockFrequencyAnalysis()) {} +Inliner::Inliner(char &ID) : CallGraphSCCPass(ID), InsertLifetime(true) {} Inliner::Inliner(char &ID, bool InsertLifetime) - : CallGraphSCCPass(ID), InsertLifetime(InsertLifetime), - BFA(new BlockFrequencyAnalysis()) {} + : CallGraphSCCPass(ID), InsertLifetime(InsertLifetime) {} /// For this class, we declare that we require and preserve the call graph. /// If the derived class implements this method, it should @@ -263,7 +259,7 @@ bool Inliner::shouldInline(CallSite CS) { Twine(IC.getCostDelta() + IC.getCost()) + ")"); return false; } - + // Try to detect the case where the current inlining candidate caller (call // it B) is a static or linkonce-ODR function and is an inlining candidate // elsewhere, and the current candidate callee (call it C) is large enough @@ -360,90 +356,8 @@ static bool InlineHistoryIncludes(Function *F, int InlineHistoryID, return false; } -/// \brief Update the frequency of a block that is cloned into the caller. -/// This is invoked when \p OrigBB from the callee is cloned into \p NewBB in -/// the caller. -void Inliner::updateBlockFreq(CallSite &CS, const BasicBlock *OrigBB, - const BasicBlock *NewBB) { - if (!HasProfileData) - return; - Instruction *Call = CS.getInstruction(); - BasicBlock *CallBB = Call->getParent(); - BlockFrequencyInfo *CalleeBFI = - BFA->getBlockFrequencyInfo(CS.getCalledFunction()); - BlockFrequencyInfo *CallerBFI = - BFA->getBlockFrequencyInfo(CallBB->getParent()); - // Find the number of times OrigBB is executed per invocation of the callee - // and multiply by the number of times callee is executed in the caller. - // Freq(NewBB) = Freq(OrigBB) * CallSiteFreq / CalleeEntryFreq. - uint64_t CallSiteFreq = CallerBFI->getBlockFreq(CallBB).getFrequency(); - uint64_t CalleeEntryFreq = CalleeBFI->getEntryFreq(); - // Frequency of OrigBB in the callee. - BlockFrequency OrigBBFreq = CalleeBFI->getBlockFreq(OrigBB); - CallerBFI->setBlockFreq(NewBB, (double)(OrigBBFreq.getFrequency()) / - CalleeEntryFreq * CallSiteFreq); -} - -/// \brief Update entry count of \p Callee after it got inlined at a callsite -/// in block \p CallBB. -void Inliner::updateEntryCount(BasicBlock *CallBB, Function *Callee) { - if (!HasProfileData) - return; - // If the callee has a original count of N, and the estimated count of - // callsite is M, the new callee count is set to N - M. M is estimated from - // the caller's entry count, its entry block frequency and the block frequency - // of the callsite. - Optional CalleeCount = Callee->getEntryCount(); - if (!CalleeCount) - return; - Optional CallSiteCount = llvm::getBlockCount(CallBB, BFA.get()); - if (!CallSiteCount) - return; - // Since CallSiteCount is an estimate, it could exceed the original callee - // count and has to be set to 0. - if (CallSiteCount.getValue() > CalleeCount.getValue()) { - Callee->setEntryCount(0); - DEBUG(llvm::dbgs() << "Estimated count of block " << CallBB->getName() - << " is " << CallSiteCount.getValue() - << " which exceeds the entry count " - << CalleeCount.getValue() << " of the callee " - << Callee->getName() << "\n"); - } else - Callee->setEntryCount(CalleeCount.getValue() - CallSiteCount.getValue()); -} - -void Inliner::invalidateBFI(Function *F) { - if (!HasProfileData) - return; - if (F) - BFA->invalidateBlockFrequencyInfo(F); -} -void Inliner::invalidateBFI(CallGraphSCC &SCC) { - if (!HasProfileData) - return; - for (CallGraphNode *Node : SCC) { - Function *F = Node->getFunction(); - invalidateBFI(F); - } -} -void Inliner::copyBlockFrequency(BasicBlock *Src, BasicBlock *Dst) { - if (!HasProfileData) - return; - Function *F = Src->getParent(); - BlockFrequencyInfo *BFI = BFA->getBlockFrequencyInfo(F); - BFI->setBlockFreq(Dst, BFI->getBlockFreq(Src).getFrequency()); -} - -static bool hasProfileData(Module &M) { - // We check for the presence of MaxFunctionCount in the module. - // FIXME: This now only works for frontend based instrumentation. - return M.getMaximumFunctionCount().hasValue(); -} - bool Inliner::runOnSCC(CallGraphSCC &SCC) { - using namespace std::placeholders; CallGraph &CG = getAnalysis().getCallGraph(); - HasProfileData = hasProfileData(CG.getModule()); ACT = &getAnalysis(); auto &TLI = getAnalysis().getTLI(); @@ -505,6 +419,7 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) { InlinedArrayAllocasTy InlinedArrayAllocas; + InlineFunctionInfo InlineInfo(&CG, ACT); // Now that we have all of the call sites, loop over them and inline them if // it looks profitable to do so. @@ -533,10 +448,6 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) { CS.getInstruction()->eraseFromParent(); ++NumCallsDeleted; } else { - Instruction *TheCall = CS.getInstruction(); - BasicBlock *CallSiteBlock = TheCall->getParent(); - Instruction *CallSuccessor = &*(++BasicBlock::iterator(TheCall)); - // We can only inline direct calls to non-declarations. if (!Callee || Callee->isDeclaration()) continue; @@ -565,11 +476,6 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) { continue; } - BlockCloningFunctor BCF = nullptr; - if (HasProfileData) - BCF = std::bind(&Inliner::updateBlockFreq, this, CS, _1, _2); - InlineFunctionInfo InlineInfo(&CG, ACT, BCF); - // Attempt to inline the function. if (!InlineCallIfPossible(*this, CS, InlineInfo, InlinedArrayAllocas, InlineHistoryID, InsertLifetime)) { @@ -579,15 +485,6 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) { Caller->getName())); continue; } - updateEntryCount(CallSiteBlock, Callee); - if (!InlineInfo.CallSuccessorBlockDeleted) { - // The instruction following the call is part of a new basic block - // created during the inlining process. This does not have an entry in - // the BFI. We create an entry by copying the frequency of the - // original block containing the call. - copyBlockFrequency(CallSiteBlock, CallSuccessor->getParent()); - } - ++NumInlined; // Report the inline decision. @@ -626,9 +523,7 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) { CalleeNode->removeAllCalledFunctions(); // Removing the node for callee from the call graph and delete it. - Function *F = CG.removeFunctionFromModule(CalleeNode); - invalidateBFI(F); - delete F; + delete CG.removeFunctionFromModule(CalleeNode); ++NumDeleted; } @@ -649,7 +544,6 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) { } } while (LocalChange); - invalidateBFI(SCC); return Changed; } @@ -757,9 +651,7 @@ bool Inliner::removeDeadFunctions(CallGraph &CG, bool AlwaysInlineOnly) { FunctionsToRemove.end()), FunctionsToRemove.end()); for (CallGraphNode *CGN : FunctionsToRemove) { - Function *F = CG.removeFunctionFromModule(CGN); - invalidateBFI(F); - delete F; + delete CG.removeFunctionFromModule(CGN); ++NumDeleted; } return true; diff --git a/llvm/lib/Transforms/Utils/CloneFunction.cpp b/llvm/lib/Transforms/Utils/CloneFunction.cpp index ac0867a..05b0a17 100644 --- a/llvm/lib/Transforms/Utils/CloneFunction.cpp +++ b/llvm/lib/Transforms/Utils/CloneFunction.cpp @@ -277,10 +277,9 @@ namespace { /// The specified block is found to be reachable, clone it and /// anything that it can reach. - void CloneBlock(const BasicBlock *BB, + void CloneBlock(const BasicBlock *BB, BasicBlock::const_iterator StartingInst, - std::vector &ToClone, - BlockCloningFunctor Ftor = nullptr); + std::vector &ToClone); }; } @@ -288,8 +287,7 @@ namespace { /// anything that it can reach. void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, BasicBlock::const_iterator StartingInst, - std::vector &ToClone, - BlockCloningFunctor Ftor) { + std::vector &ToClone){ WeakVH &BBEntry = VMap[BB]; // Have we already cloned this block? @@ -426,19 +424,18 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, CodeInfo->ContainsDynamicAllocas |= hasStaticAllocas && BB != &BB->getParent()->front(); } - // Call Ftor to tell BB has been cloned to NewBB - if (Ftor) - Ftor(BB, NewBB); } /// This works like CloneAndPruneFunctionInto, except that it does not clone the /// entire function. Instead it starts at an instruction provided by the caller /// and copies (and prunes) only the code reachable from that instruction. -void llvm::CloneAndPruneIntoFromInst( - Function *NewFunc, const Function *OldFunc, const Instruction *StartingInst, - ValueToValueMapTy &VMap, bool ModuleLevelChanges, - SmallVectorImpl &Returns, const char *NameSuffix, - ClonedCodeInfo *CodeInfo, BlockCloningFunctor Ftor) { +void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, + const Instruction *StartingInst, + ValueToValueMapTy &VMap, + bool ModuleLevelChanges, + SmallVectorImpl &Returns, + const char *NameSuffix, + ClonedCodeInfo *CodeInfo) { assert(NameSuffix && "NameSuffix cannot be null!"); ValueMapTypeRemapper *TypeMapper = nullptr; @@ -464,11 +461,11 @@ void llvm::CloneAndPruneIntoFromInst( // Clone the entry block, and anything recursively reachable from it. std::vector CloneWorklist; - PFC.CloneBlock(StartingBB, StartingInst->getIterator(), CloneWorklist, Ftor); + PFC.CloneBlock(StartingBB, StartingInst->getIterator(), CloneWorklist); while (!CloneWorklist.empty()) { const BasicBlock *BB = CloneWorklist.back(); CloneWorklist.pop_back(); - PFC.CloneBlock(BB, BB->begin(), CloneWorklist, Ftor); + PFC.CloneBlock(BB, BB->begin(), CloneWorklist); } // Loop over all of the basic blocks in the old function. If the block was @@ -670,14 +667,15 @@ void llvm::CloneAndPruneIntoFromInst( /// constant arguments cause a significant amount of code in the callee to be /// dead. Since this doesn't produce an exact copy of the input, it can't be /// used for things like CloneFunction or CloneModule. -void llvm::CloneAndPruneFunctionInto( - Function *NewFunc, const Function *OldFunc, ValueToValueMapTy &VMap, - bool ModuleLevelChanges, SmallVectorImpl &Returns, - const char *NameSuffix, ClonedCodeInfo *CodeInfo, Instruction *TheCall, - BlockCloningFunctor Ftor) { +void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, + ValueToValueMapTy &VMap, + bool ModuleLevelChanges, + SmallVectorImpl &Returns, + const char *NameSuffix, + ClonedCodeInfo *CodeInfo, + Instruction *TheCall) { CloneAndPruneIntoFromInst(NewFunc, OldFunc, &OldFunc->front().front(), VMap, - ModuleLevelChanges, Returns, NameSuffix, CodeInfo, - Ftor); + ModuleLevelChanges, Returns, NameSuffix, CodeInfo); } /// \brief Remaps instructions in \p Blocks using the mapping in \p VMap. diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp index 251afb5..491b18e 100644 --- a/llvm/lib/Transforms/Utils/InlineFunction.cpp +++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -1319,7 +1319,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, // If IFI has any state in it, zap it before we fill it in. IFI.reset(); - + const Function *CalledFunc = CS.getCalledFunction(); if (!CalledFunc || // Can't inline external function or indirect CalledFunc->isDeclaration() || // call, or call to a vararg function! @@ -1486,7 +1486,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, // happy with whatever the cloner can do. CloneAndPruneFunctionInto(Caller, CalledFunc, VMap, /*ModuleLevelChanges=*/false, Returns, ".i", - &InlinedFunctionInfo, TheCall, IFI.Ftor); + &InlinedFunctionInfo, TheCall); // Remember the first block that is newly cloned over. FirstNewBlock = LastBlock; ++FirstNewBlock; @@ -1994,11 +1994,8 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, // If we inlined any musttail calls and the original return is now // unreachable, delete it. It can only contain a bitcast and ret. - if (InlinedMustTailCalls && - pred_begin(AfterCallBB) == pred_end(AfterCallBB)) { - IFI.CallSuccessorBlockDeleted = true; + if (InlinedMustTailCalls && pred_begin(AfterCallBB) == pred_end(AfterCallBB)) AfterCallBB->eraseFromParent(); - } // We should always be able to fold the entry block of the function into the // single predecessor of the block... diff --git a/llvm/test/Transforms/Inline/function-count-update-2.ll b/llvm/test/Transforms/Inline/function-count-update-2.ll deleted file mode 100644 index e9a4459..0000000 --- a/llvm/test/Transforms/Inline/function-count-update-2.ll +++ /dev/null @@ -1,27 +0,0 @@ -; RUN: opt < %s -inline -S | FileCheck %s - -; This tests that the function count of a callee gets correctly updated after it -; has been inlined into a two callsites. - -; CHECK: @callee() !prof [[COUNT:![0-9]+]] -define i32 @callee() !prof !1 { - ret i32 0 -} - -define i32 @caller1() !prof !2 { - %i = call i32 @callee() - ret i32 %i -} - -define i32 @caller2() !prof !3 { - %i = call i32 @callee() - ret i32 %i -} - -!llvm.module.flags = !{!0} -; CHECK: [[COUNT]] = !{!"function_entry_count", i64 0} -!0 = !{i32 1, !"MaxFunctionCount", i32 1000} -!1 = !{!"function_entry_count", i64 1000} -!2 = !{!"function_entry_count", i64 600} -!3 = !{!"function_entry_count", i64 400} - diff --git a/llvm/test/Transforms/Inline/function-count-update-3.ll b/llvm/test/Transforms/Inline/function-count-update-3.ll deleted file mode 100644 index e7a4d0f..0000000 --- a/llvm/test/Transforms/Inline/function-count-update-3.ll +++ /dev/null @@ -1,69 +0,0 @@ -; RUN: opt < %s -inline -S -inline-threshold=50 | FileCheck %s - -; This tests that the function count of a function gets properly scaled after -; inlining a call chain leading to the function. -; Function a calls c with count 200 (C1) -; Function b calls c with count 300 -; Function c calls e with count 250 (C2) -; Entry count of e is 500 (C3) -; c->e inlining does not happen since the cost exceeds threshold. -; c then inlined into a. -; e now gets inlined into a (through c) since the branch condition in e is now -; known and hence the cost gets reduced. -; Estimated count of a->e callsite = C2 * (C1 / C3) -; Estimated count of a->e callsite = 250 * (200 / 500) = 100 -; Remaining count of e = C3 - 100 = 500 - 100 = 400 - -@data = external global i32 - -define i32 @a(i32 %a1) !prof !1 { - %a2 = call i32 @c(i32 %a1, i32 1) - ret i32 %a2 -} - -define i32 @b(i32 %b1) !prof !2 { - %b2 = call i32 @c(i32 %b1, i32 %b1) - ret i32 %b2 -} - -define i32 @c(i32 %c1, i32 %c100) !prof !3 { - %cond = icmp sle i32 %c1, 1 - br i1 %cond, label %cond_true, label %cond_false - -cond_false: - ret i32 0 - -cond_true: - %c11 = call i32 @e(i32 %c100) - ret i32 %c11 -} - -; CHECK: @e(i32 %c1) !prof [[COUNT:![0-9]+]] -define i32 @e(i32 %c1) !prof !4 { - %cond = icmp sle i32 %c1, 1 - br i1 %cond, label %cond_true, label %cond_false - -cond_false: - %c2 = load i32, i32* @data, align 4 - %c3 = add i32 %c1, %c2 - %c4 = mul i32 %c3, %c2 - %c5 = add i32 %c4, %c2 - %c6 = mul i32 %c5, %c2 - %c7 = add i32 %c6, %c2 - %c8 = mul i32 %c7, %c2 - %c9 = add i32 %c8, %c2 - %c10 = mul i32 %c9, %c2 - ret i32 %c10 - -cond_true: - ret i32 0 -} - -!llvm.module.flags = !{!0} -; CHECK: [[COUNT]] = !{!"function_entry_count", i64 400} -!0 = !{i32 1, !"MaxFunctionCount", i32 5000} -!1 = !{!"function_entry_count", i64 200} -!2 = !{!"function_entry_count", i64 300} -!3 = !{!"function_entry_count", i64 500} -!4 = !{!"function_entry_count", i64 500} - diff --git a/llvm/test/Transforms/Inline/function-count-update.ll b/llvm/test/Transforms/Inline/function-count-update.ll deleted file mode 100644 index 6fed301..0000000 --- a/llvm/test/Transforms/Inline/function-count-update.ll +++ /dev/null @@ -1,51 +0,0 @@ -; RUN: opt < %s -inline -S | FileCheck %s -; RUN: opt < %s -always-inline -S | FileCheck %s - -; This tests that the function count of two callees get correctly updated after -; they have been inlined into two back-to-back callsites in a single basic block -; in the caller. The callees have the alwaysinline attribute and so they get -; inlined both with the regular inliner pass and the always inline pass. In -; both cases, the new count of each callee is the original count minus callsite -; count which is 200 (since the caller's entry count is 400 and the block -; containing the calls have a relative block frequency of 0.5). - -; CHECK: @callee1(i32 %n) #0 !prof [[COUNT1:![0-9]+]] -define i32 @callee1(i32 %n) #0 !prof !1 { - %cond = icmp sle i32 %n, 10 - br i1 %cond, label %cond_true, label %cond_false - -cond_true: - %r1 = add i32 %n, 1 - ret i32 %r1 -cond_false: - %r2 = add i32 %n, 2 - ret i32 %r2 -} - -; CHECK: @callee2(i32 %n) #0 !prof [[COUNT2:![0-9]+]] -define i32 @callee2(i32 %n) #0 !prof !2 { - %r1 = add i32 %n, 1 - ret i32 %r1 -} - -define i32 @caller(i32 %n) !prof !3 { - %cond = icmp sle i32 %n, 100 - br i1 %cond, label %cond_true, label %cond_false - -cond_true: - %i = call i32 @callee1(i32 %n) - %j = call i32 @callee2(i32 %i) - ret i32 %j -cond_false: - ret i32 0 -} - -!llvm.module.flags = !{!0} -; CHECK: [[COUNT1]] = !{!"function_entry_count", i64 800} -; CHECK: [[COUNT2]] = !{!"function_entry_count", i64 1800} -!0 = !{i32 1, !"MaxFunctionCount", i32 1000} -!1 = !{!"function_entry_count", i64 1000} -!2 = !{!"function_entry_count", i64 2000} -!3 = !{!"function_entry_count", i64 400} -attributes #0 = { alwaysinline } - -- 2.7.4