From 63847d04e79549f345c29d41f7d6da24cb18687c Mon Sep 17 00:00:00 2001 From: Sebastian Pop Date: Thu, 14 Jul 2016 12:18:53 +0000 Subject: [PATCH] code hoisting pass based on GVN This pass hoists duplicated computations in the program. The primary goal of gvn-hoist is to reduce the size of functions before inline heuristics to reduce the total cost of function inlining. Pass written by Sebastian Pop, Aditya Kumar, Xiaoyu Hu, and Brian Rzycki. Important algorithmic contributions by Daniel Berlin under the form of reviews. Differential Revision: http://reviews.llvm.org/D19338 llvm-svn: 275401 --- llvm/include/llvm/InitializePasses.h | 1 + llvm/include/llvm/LinkAllPasses.h | 1 + llvm/include/llvm/Transforms/Scalar.h | 7 + llvm/include/llvm/Transforms/Scalar/GVN.h | 15 +- llvm/lib/Passes/PassRegistry.def | 1 + llvm/lib/Transforms/IPO/PassManagerBuilder.cpp | 1 + llvm/lib/Transforms/Scalar/CMakeLists.txt | 1 + llvm/lib/Transforms/Scalar/GVNHoist.cpp | 820 +++++++++++++++++++++++++ llvm/lib/Transforms/Scalar/Scalar.cpp | 5 + llvm/test/Transforms/GVN/hoist-pr20242.ll | 74 +++ llvm/test/Transforms/GVN/hoist-pr22005.ll | 30 + llvm/test/Transforms/GVN/hoist.ll | 691 +++++++++++++++++++++ 12 files changed, 1643 insertions(+), 4 deletions(-) create mode 100644 llvm/lib/Transforms/Scalar/GVNHoist.cpp create mode 100644 llvm/test/Transforms/GVN/hoist-pr20242.ll create mode 100644 llvm/test/Transforms/GVN/hoist-pr22005.ll create mode 100644 llvm/test/Transforms/GVN/hoist.ll diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index 34b156e..6f8aae7 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -120,6 +120,7 @@ void initializeEarlyIfConverterPass(PassRegistry&); void initializeEdgeBundlesPass(PassRegistry&); void initializeEfficiencySanitizerPass(PassRegistry&); void initializeEliminateAvailableExternallyLegacyPassPass(PassRegistry &); +void initializeGVNHoistLegacyPassPass(PassRegistry &); void initializeExpandISelPseudosPass(PassRegistry&); void initializeExpandPostRAPass(PassRegistry&); void initializeExternalAAWrapperPassPass(PassRegistry&); diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h index a12913e..b2721d0 100644 --- a/llvm/include/llvm/LinkAllPasses.h +++ b/llvm/include/llvm/LinkAllPasses.h @@ -160,6 +160,7 @@ namespace { (void) llvm::createConstantHoistingPass(); (void) llvm::createCodeGenPreparePass(); (void) llvm::createEarlyCSEPass(); + (void) llvm::createGVNHoistPass(); (void) llvm::createMergedLoadStoreMotionPass(); (void) llvm::createGVNPass(); (void) llvm::createMemCpyOptPass(); diff --git a/llvm/include/llvm/Transforms/Scalar.h b/llvm/include/llvm/Transforms/Scalar.h index 040e95c..167cc94 100644 --- a/llvm/include/llvm/Transforms/Scalar.h +++ b/llvm/include/llvm/Transforms/Scalar.h @@ -326,6 +326,13 @@ FunctionPass *createEarlyCSEPass(); //===----------------------------------------------------------------------===// // +// GVNHoist - This pass performs a simple and fast GVN pass over the dominator +// tree to hoist common expressions from sibling branches. +// +FunctionPass *createGVNHoistPass(); + +//===----------------------------------------------------------------------===// +// // MergedLoadStoreMotion - This pass merges loads and stores in diamonds. Loads // are hoisted into the header, while stores sink into the footer. // diff --git a/llvm/include/llvm/Transforms/Scalar/GVN.h b/llvm/include/llvm/Transforms/Scalar/GVN.h index 9a53ca3..3bb5ec3 100644 --- a/llvm/include/llvm/Transforms/Scalar/GVN.h +++ b/llvm/include/llvm/Transforms/Scalar/GVN.h @@ -58,11 +58,7 @@ public: AliasAnalysis *getAliasAnalysis() const { return VN.getAliasAnalysis(); } MemoryDependenceResults &getMemDep() const { return *MD; } -private: - friend class gvn::GVNLegacyPass; - struct Expression; - friend struct DenseMapInfo; /// This class holds the mapping between values and value numbers. It is used /// as an efficient mechanism to determine the expression-wise equivalence of @@ -104,6 +100,10 @@ private: void verifyRemoved(const Value *) const; }; +private: + friend class gvn::GVNLegacyPass; + friend struct DenseMapInfo; + MemoryDependenceResults *MD; DominatorTree *DT; const TargetLibraryInfo *TLI; @@ -228,6 +228,13 @@ private: /// loads are eliminated by the pass. FunctionPass *createGVNPass(bool NoLoads = false); +/// \brief A simple and fast domtree-based GVN pass to hoist common expressions +/// from sibling branches. +struct GVNHoistPass : PassInfoMixin { + /// \brief Run the pass over the function. + PreservedAnalyses run(Function &F, AnalysisManager &AM); +}; + } #endif diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index ffa0864..c5f9a3c 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -133,6 +133,7 @@ FUNCTION_PASS("correlated-propagation", CorrelatedValuePropagationPass()) FUNCTION_PASS("dce", DCEPass()) FUNCTION_PASS("dse", DSEPass()) FUNCTION_PASS("early-cse", EarlyCSEPass()) +FUNCTION_PASS("gvn-hoist", GVNHoistPass()) FUNCTION_PASS("instcombine", InstCombinePass()) FUNCTION_PASS("instsimplify", InstSimplifierPass()) FUNCTION_PASS("invalidate", InvalidateAllAnalysesPass()) diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp index e283d26..c6feb13 100644 --- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -223,6 +223,7 @@ void PassManagerBuilder::populateFunctionPassManager( FPM.add(createCFGSimplificationPass()); FPM.add(createSROAPass()); FPM.add(createEarlyCSEPass()); + FPM.add(createGVNHoistPass()); FPM.add(createLowerExpectIntrinsicPass()); } diff --git a/llvm/lib/Transforms/Scalar/CMakeLists.txt b/llvm/lib/Transforms/Scalar/CMakeLists.txt index ac16203..9f04344 100644 --- a/llvm/lib/Transforms/Scalar/CMakeLists.txt +++ b/llvm/lib/Transforms/Scalar/CMakeLists.txt @@ -12,6 +12,7 @@ add_llvm_library(LLVMScalarOpts Float2Int.cpp GuardWidening.cpp GVN.cpp + GVNHoist.cpp InductiveRangeCheckElimination.cpp IndVarSimplify.cpp JumpThreading.cpp diff --git a/llvm/lib/Transforms/Scalar/GVNHoist.cpp b/llvm/lib/Transforms/Scalar/GVNHoist.cpp new file mode 100644 index 0000000..c0fd9e2 --- /dev/null +++ b/llvm/lib/Transforms/Scalar/GVNHoist.cpp @@ -0,0 +1,820 @@ +//===- GVNHoist.cpp - Hoist scalar and load expressions -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass hoists expressions from branches to a common dominator. It uses +// GVN (global value numbering) to discover expressions computing the same +// values. The primary goal is to reduce the code size, and in some +// cases reduce critical path (by exposing more ILP). +// Hoisting may affect the performance in some cases. To mitigate that, hoisting +// is disabled in the following cases. +// 1. Scalars across calls. +// 2. geps when corresponding load/store cannot be hoisted. +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Scalar/GVN.h" +#include "llvm/Transforms/Utils/MemorySSA.h" +#include +#include +#include + +using namespace llvm; + +#define DEBUG_TYPE "gvn-hoist" + +STATISTIC(NumHoisted, "Number of instructions hoisted"); +STATISTIC(NumRemoved, "Number of instructions removed"); +STATISTIC(NumLoadsHoisted, "Number of loads hoisted"); +STATISTIC(NumLoadsRemoved, "Number of loads removed"); +STATISTIC(NumStoresHoisted, "Number of stores hoisted"); +STATISTIC(NumStoresRemoved, "Number of stores removed"); +STATISTIC(NumCallsHoisted, "Number of calls hoisted"); +STATISTIC(NumCallsRemoved, "Number of calls removed"); + +static cl::opt + MaxHoistedThreshold("gvn-max-hoisted", cl::Hidden, cl::init(-1), + cl::desc("Max number of instructions to hoist " + "(default unlimited = -1)")); +static cl::opt MaxNumberOfBBSInPath( + "gvn-hoist-max-bbs", cl::Hidden, cl::init(4), + cl::desc("Max number of basic blocks on the path between " + "hoisting locations (default = 4, unlimited = -1)")); + +static int HoistedCtr = 0; + +namespace { + +// Provides a sorting function based on the execution order of two instructions. +struct SortByDFSIn { +private: + DenseMap &DFSNumber; + +public: + SortByDFSIn(DenseMap &D) : DFSNumber(D) {} + + // Returns true when A executes before B. + bool operator()(const Instruction *A, const Instruction *B) const { + assert(A != B); + const BasicBlock *BA = A->getParent(); + const BasicBlock *BB = B->getParent(); + unsigned NA = DFSNumber[BA]; + unsigned NB = DFSNumber[BB]; + if (NA < NB) + return true; + if (NA == NB) { + // Sort them in the order they occur in the same basic block. + BasicBlock::const_iterator AI(A), BI(B); + return std::distance(AI, BI) < 0; + } + return false; + } +}; + +// A map from a VN (value number) to all the instructions with that VN. +typedef DenseMap> VNtoInsns; + +// Records all scalar instructions candidate for code hoisting. +class InsnInfo { + VNtoInsns VNtoScalars; + +public: + // Inserts I and its value number in VNtoScalars. + void insert(Instruction *I, GVN::ValueTable &VN) { + // Scalar instruction. + unsigned V = VN.lookupOrAdd(I); + VNtoScalars[V].push_back(I); + } + + const VNtoInsns &getVNTable() const { return VNtoScalars; } +}; + +// Records all load instructions candidate for code hoisting. +class LoadInfo { + VNtoInsns VNtoLoads; + +public: + // Insert Load and the value number of its memory address in VNtoLoads. + void insert(LoadInst *Load, GVN::ValueTable &VN) { + if (Load->isSimple()) { + unsigned V = VN.lookupOrAdd(Load->getPointerOperand()); + VNtoLoads[V].push_back(Load); + } + } + + const VNtoInsns &getVNTable() const { return VNtoLoads; } +}; + +// Records all store instructions candidate for code hoisting. +class StoreInfo { + VNtoInsns VNtoStores; + +public: + // Insert the Store and a hash number of the store address and the stored + // value in VNtoStores. + void insert(StoreInst *Store, GVN::ValueTable &VN) { + if (!Store->isSimple()) + return; + // Hash the store address and the stored value. + Value *Ptr = Store->getPointerOperand(); + Value *Val = Store->getValueOperand(); + VNtoStores[hash_combine(VN.lookupOrAdd(Ptr), VN.lookupOrAdd(Val))] + .push_back(Store); + } + + const VNtoInsns &getVNTable() const { return VNtoStores; } +}; + +// Records all call instructions candidate for code hoisting. +class CallInfo { + VNtoInsns VNtoCallsScalars; + VNtoInsns VNtoCallsLoads; + VNtoInsns VNtoCallsStores; + +public: + // Insert Call and its value numbering in one of the VNtoCalls* containers. + void insert(CallInst *Call, GVN::ValueTable &VN) { + // A call that doesNotAccessMemory is handled as a Scalar, + // onlyReadsMemory will be handled as a Load instruction, + // all other calls will be handled as stores. + unsigned V = VN.lookupOrAdd(Call); + + if (Call->doesNotAccessMemory()) + VNtoCallsScalars[V].push_back(Call); + else if (Call->onlyReadsMemory()) + VNtoCallsLoads[V].push_back(Call); + else + VNtoCallsStores[V].push_back(Call); + } + + const VNtoInsns &getScalarVNTable() const { return VNtoCallsScalars; } + + const VNtoInsns &getLoadVNTable() const { return VNtoCallsLoads; } + + const VNtoInsns &getStoreVNTable() const { return VNtoCallsStores; } +}; + +typedef DenseMap BBSideEffectsSet; +typedef SmallVector SmallVecInsn; +typedef SmallVectorImpl SmallVecImplInsn; + +// This pass hoists common computations across branches sharing common +// dominator. The primary goal is to reduce the code size, and in some +// cases reduce critical path (by exposing more ILP). +class GVNHoist { +public: + GVN::ValueTable VN; + DominatorTree *DT; + AliasAnalysis *AA; + MemoryDependenceResults *MD; + const bool OptForMinSize; + DenseMap DFSNumber; + BBSideEffectsSet BBSideEffects; + MemorySSA *MSSA; + enum InsKind { Unknown, Scalar, Load, Store }; + + GVNHoist(DominatorTree *Dt, AliasAnalysis *Aa, MemoryDependenceResults *Md, + bool OptForMinSize) + : DT(Dt), AA(Aa), MD(Md), OptForMinSize(OptForMinSize) {} + + // Return true when there are exception handling in BB. + bool hasEH(const BasicBlock *BB) { + auto It = BBSideEffects.find(BB); + if (It != BBSideEffects.end()) + return It->second; + + if (BB->isEHPad() || BB->hasAddressTaken()) { + BBSideEffects[BB] = true; + return true; + } + + if (BB->getTerminator()->mayThrow()) { + BBSideEffects[BB] = true; + return true; + } + + BBSideEffects[BB] = false; + return false; + } + + // Return true when all paths from A to the end of the function pass through + // either B or C. + bool hoistingFromAllPaths(const BasicBlock *A, const BasicBlock *B, + const BasicBlock *C) { + // We fully copy the WL in order to be able to remove items from it. + SmallPtrSet WL; + WL.insert(B); + WL.insert(C); + + for (auto It = df_begin(A), E = df_end(A); It != E;) { + // There exists a path from A to the exit of the function if we are still + // iterating in DF traversal and we removed all instructions from the work + // list. + if (WL.empty()) + return false; + + const BasicBlock *BB = *It; + if (WL.erase(BB)) { + // Stop DFS traversal when BB is in the work list. + It.skipChildren(); + continue; + } + + // Check for end of function, calls that do not return, etc. + if (!isGuaranteedToTransferExecutionToSuccessor(BB->getTerminator())) + return false; + + // Increment DFS traversal when not skipping children. + ++It; + } + + return true; + } + + /* Return true when I1 appears before I2 in the instructions of BB. */ + bool firstInBB(BasicBlock *BB, const Instruction *I1, const Instruction *I2) { + for (Instruction &I : *BB) { + if (&I == I1) + return true; + if (&I == I2) + return false; + } + + llvm_unreachable("I1 and I2 not found in BB"); + } + // Return true when there are users of Def in BB. + bool hasMemoryUseOnPath(MemoryAccess *Def, const BasicBlock *BB, + const Instruction *OldPt) { + Value::user_iterator UI = Def->user_begin(); + Value::user_iterator UE = Def->user_end(); + const BasicBlock *DefBB = Def->getBlock(); + const BasicBlock *OldBB = OldPt->getParent(); + + for (; UI != UE; ++UI) + if (MemoryUse *U = dyn_cast(*UI)) { + BasicBlock *UBB = U->getBlock(); + // Only analyze uses in BB. + if (BB != UBB) + continue; + + // A use in the same block as the Def is on the path. + if (UBB == DefBB) { + assert(MSSA->locallyDominates(Def, U) && "def not dominating use"); + return true; + } + + if (UBB != OldBB) + return true; + + // It is only harmful to hoist when the use is before OldPt. + if (firstInBB(UBB, U->getMemoryInst(), OldPt)) + return true; + } + + return false; + } + + // Return true when there are exception handling or loads of memory Def + // between OldPt and NewPt. + + // Decrement by 1 NBBsOnAllPaths for each block between HoistPt and BB, and + // return true when the counter NBBsOnAllPaths reaces 0, except when it is + // initialized to -1 which is unlimited. + bool hasEHOrLoadsOnPath(const Instruction *NewPt, const Instruction *OldPt, + MemoryAccess *Def, int &NBBsOnAllPaths) { + const BasicBlock *NewBB = NewPt->getParent(); + const BasicBlock *OldBB = OldPt->getParent(); + assert(DT->dominates(NewBB, OldBB) && "invalid path"); + assert(DT->dominates(Def->getBlock(), NewBB) && + "def does not dominate new hoisting point"); + + // Walk all basic blocks reachable in depth-first iteration on the inverse + // CFG from OldBB to NewBB. These blocks are all the blocks that may be + // executed between the execution of NewBB and OldBB. Hoisting an expression + // from OldBB into NewBB has to be safe on all execution paths. + for (auto I = idf_begin(OldBB), E = idf_end(OldBB); I != E;) { + if (*I == NewBB) { + // Stop traversal when reaching HoistPt. + I.skipChildren(); + continue; + } + + // Impossible to hoist with exceptions on the path. + if (hasEH(*I)) + return true; + + // Check that we do not move a store past loads. + if (hasMemoryUseOnPath(Def, *I, OldPt)) + return true; + + // Stop walk once the limit is reached. + if (NBBsOnAllPaths == 0) + return true; + + // -1 is unlimited number of blocks on all paths. + if (NBBsOnAllPaths != -1) + --NBBsOnAllPaths; + + ++I; + } + + return false; + } + + // Return true when there are exception handling between HoistPt and BB. + // Decrement by 1 NBBsOnAllPaths for each block between HoistPt and BB, and + // return true when the counter NBBsOnAllPaths reaches 0, except when it is + // initialized to -1 which is unlimited. + bool hasEHOnPath(const BasicBlock *HoistPt, const BasicBlock *BB, + int &NBBsOnAllPaths) { + assert(DT->dominates(HoistPt, BB) && "Invalid path"); + + // Walk all basic blocks reachable in depth-first iteration on + // the inverse CFG from BBInsn to NewHoistPt. These blocks are all the + // blocks that may be executed between the execution of NewHoistPt and + // BBInsn. Hoisting an expression from BBInsn into NewHoistPt has to be safe + // on all execution paths. + for (auto I = idf_begin(BB), E = idf_end(BB); I != E;) { + if (*I == HoistPt) { + // Stop traversal when reaching NewHoistPt. + I.skipChildren(); + continue; + } + + // Impossible to hoist with exceptions on the path. + if (hasEH(*I)) + return true; + + // Stop walk once the limit is reached. + if (NBBsOnAllPaths == 0) + return true; + + // -1 is unlimited number of blocks on all paths. + if (NBBsOnAllPaths != -1) + --NBBsOnAllPaths; + + ++I; + } + + return false; + } + + // Return true when it is safe to hoist a memory load or store U from OldPt + // to NewPt. + bool safeToHoistLdSt(const Instruction *NewPt, const Instruction *OldPt, + MemoryUseOrDef *U, InsKind K, int &NBBsOnAllPaths) { + + // In place hoisting is safe. + if (NewPt == OldPt) + return true; + + const BasicBlock *NewBB = NewPt->getParent(); + const BasicBlock *OldBB = OldPt->getParent(); + const BasicBlock *UBB = U->getBlock(); + + // Check for dependences on the Memory SSA. + MemoryAccess *D = U->getDefiningAccess(); + BasicBlock *DBB = D->getBlock(); + if (DT->properlyDominates(NewBB, DBB)) + // Cannot move the load or store to NewBB above its definition in DBB. + return false; + + if (NewBB == DBB && !MSSA->isLiveOnEntryDef(D)) + if (MemoryUseOrDef *UD = dyn_cast(D)) + if (firstInBB(DBB, NewPt, UD->getMemoryInst())) + // Cannot move the load or store to NewPt above its definition in D. + return false; + + // Check for unsafe hoistings due to side effects. + if (K == InsKind::Store) { + if (hasEHOrLoadsOnPath(NewPt, OldPt, D, NBBsOnAllPaths)) + return false; + } else if (hasEHOnPath(NewBB, OldBB, NBBsOnAllPaths)) + return false; + + if (UBB == NewBB) { + if (DT->properlyDominates(DBB, NewBB)) + return true; + assert(UBB == DBB); + assert(MSSA->locallyDominates(D, U)); + } + + // No side effects: it is safe to hoist. + return true; + } + + // Return true when it is safe to hoist scalar instructions from BB1 and BB2 + // to HoistBB. + bool safeToHoistScalar(const BasicBlock *HoistBB, const BasicBlock *BB1, + const BasicBlock *BB2, int &NBBsOnAllPaths) { + // Check that the hoisted expression is needed on all paths. When HoistBB + // already contains an instruction to be hoisted, the expression is needed + // on all paths. Enable scalar hoisting at -Oz as it is safe to hoist + // scalars to a place where they are partially needed. + if (!OptForMinSize && BB1 != HoistBB && + !hoistingFromAllPaths(HoistBB, BB1, BB2)) + return false; + + if (hasEHOnPath(HoistBB, BB1, NBBsOnAllPaths) || + hasEHOnPath(HoistBB, BB2, NBBsOnAllPaths)) + return false; + + // Safe to hoist scalars from BB1 and BB2 to HoistBB. + return true; + } + + // Each element of a hoisting list contains the basic block where to hoist and + // a list of instructions to be hoisted. + typedef std::pair HoistingPointInfo; + typedef SmallVector HoistingPointList; + + // Partition InstructionsToHoist into a set of candidates which can share a + // common hoisting point. The partitions are collected in HPL. IsScalar is + // true when the instructions in InstructionsToHoist are scalars. IsLoad is + // true when the InstructionsToHoist are loads, false when they are stores. + void partitionCandidates(SmallVecImplInsn &InstructionsToHoist, + HoistingPointList &HPL, InsKind K) { + // No need to sort for two instructions. + if (InstructionsToHoist.size() > 2) { + SortByDFSIn Pred(DFSNumber); + std::sort(InstructionsToHoist.begin(), InstructionsToHoist.end(), Pred); + } + + int NBBsOnAllPaths = MaxNumberOfBBSInPath; + + SmallVecImplInsn::iterator II = InstructionsToHoist.begin(); + SmallVecImplInsn::iterator Start = II; + Instruction *HoistPt = *II; + BasicBlock *HoistBB = HoistPt->getParent(); + MemoryUseOrDef *UD; + if (K != InsKind::Scalar) + UD = cast(MSSA->getMemoryAccess(HoistPt)); + + for (++II; II != InstructionsToHoist.end(); ++II) { + Instruction *Insn = *II; + BasicBlock *BB = Insn->getParent(); + BasicBlock *NewHoistBB; + Instruction *NewHoistPt; + + if (BB == HoistBB) { + NewHoistBB = HoistBB; + NewHoistPt = firstInBB(BB, Insn, HoistPt) ? Insn : HoistPt; + } else { + NewHoistBB = DT->findNearestCommonDominator(HoistBB, BB); + if (NewHoistBB == BB) + NewHoistPt = Insn; + else if (NewHoistBB == HoistBB) + NewHoistPt = HoistPt; + else + NewHoistPt = NewHoistBB->getTerminator(); + } + + if (K == InsKind::Scalar) { + if (safeToHoistScalar(NewHoistBB, HoistBB, BB, NBBsOnAllPaths)) { + // Extend HoistPt to NewHoistPt. + HoistPt = NewHoistPt; + HoistBB = NewHoistBB; + continue; + } + } else { + // When NewBB already contains an instruction to be hoisted, the + // expression is needed on all paths. + // Check that the hoisted expression is needed on all paths: it is + // unsafe to hoist loads to a place where there may be a path not + // loading from the same address: for instance there may be a branch on + // which the address of the load may not be initialized. + if ((HoistBB == NewHoistBB || BB == NewHoistBB || + hoistingFromAllPaths(NewHoistBB, HoistBB, BB)) && + // Also check that it is safe to move the load or store from HoistPt + // to NewHoistPt, and from Insn to NewHoistPt. + safeToHoistLdSt(NewHoistPt, HoistPt, UD, K, NBBsOnAllPaths) && + safeToHoistLdSt(NewHoistPt, Insn, + cast(MSSA->getMemoryAccess(Insn)), + K, NBBsOnAllPaths)) { + // Extend HoistPt to NewHoistPt. + HoistPt = NewHoistPt; + HoistBB = NewHoistBB; + continue; + } + } + + // At this point it is not safe to extend the current hoisting to + // NewHoistPt: save the hoisting list so far. + if (std::distance(Start, II) > 1) + HPL.push_back(std::make_pair(HoistBB, SmallVecInsn(Start, II))); + + // Start over from BB. + Start = II; + if (K != InsKind::Scalar) + UD = cast(MSSA->getMemoryAccess(*Start)); + HoistPt = Insn; + HoistBB = BB; + NBBsOnAllPaths = MaxNumberOfBBSInPath; + } + + // Save the last partition. + if (std::distance(Start, II) > 1) + HPL.push_back(std::make_pair(HoistBB, SmallVecInsn(Start, II))); + } + + // Initialize HPL from Map. + void computeInsertionPoints(const VNtoInsns &Map, HoistingPointList &HPL, + InsKind K) { + for (VNtoInsns::const_iterator It = Map.begin(); It != Map.end(); ++It) { + if (MaxHoistedThreshold != -1 && ++HoistedCtr > MaxHoistedThreshold) + return; + + const SmallVecInsn &V = It->second; + if (V.size() < 2) + continue; + + // Compute the insertion point and the list of expressions to be hoisted. + SmallVecInsn InstructionsToHoist; + for (auto I : V) + if (!hasEH(I->getParent())) + InstructionsToHoist.push_back(I); + + if (InstructionsToHoist.size()) + partitionCandidates(InstructionsToHoist, HPL, K); + } + } + + // Return true when all operands of Instr are available at insertion point + // HoistPt. When limiting the number of hoisted expressions, one could hoist + // a load without hoisting its access function. So before hoisting any + // expression, make sure that all its operands are available at insert point. + bool allOperandsAvailable(const Instruction *I, + const BasicBlock *HoistPt) const { + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { + const Value *Op = I->getOperand(i); + const Instruction *Inst = dyn_cast(Op); + if (Inst && !DT->dominates(Inst->getParent(), HoistPt)) + return false; + } + + return true; + } + + Instruction *firstOfTwo(Instruction *I, Instruction *J) const { + for (Instruction &I1 : *I->getParent()) + if (&I1 == I || &I1 == J) + return &I1; + llvm_unreachable("Both I and J must be from same BB"); + } + + // Replace the use of From with To in Insn. + void replaceUseWith(Instruction *Insn, Value *From, Value *To) const { + for (Value::use_iterator UI = From->use_begin(), UE = From->use_end(); + UI != UE;) { + Use &U = *UI++; + if (U.getUser() == Insn) { + U.set(To); + return; + } + } + llvm_unreachable("should replace exactly once"); + } + + bool makeOperandsAvailable(Instruction *Repl, BasicBlock *HoistPt) const { + // Check whether the GEP of a ld/st can be synthesized at HoistPt. + Instruction *Gep = nullptr; + Instruction *Val = nullptr; + if (LoadInst *Ld = dyn_cast(Repl)) + Gep = dyn_cast(Ld->getPointerOperand()); + if (StoreInst *St = dyn_cast(Repl)) { + Gep = dyn_cast(St->getPointerOperand()); + Val = dyn_cast(St->getValueOperand()); + } + + if (!Gep || !isa(Gep)) + return false; + + // Check whether we can compute the Gep at HoistPt. + if (!allOperandsAvailable(Gep, HoistPt)) + return false; + + // Also check that the stored value is available. + if (Val && !allOperandsAvailable(Val, HoistPt)) + return false; + + // Copy the gep before moving the ld/st. + Instruction *ClonedGep = Gep->clone(); + ClonedGep->insertBefore(HoistPt->getTerminator()); + replaceUseWith(Repl, Gep, ClonedGep); + + // Also copy Val when it is a gep: geps are not hoisted by default. + if (Val && isa(Val)) { + Instruction *ClonedVal = Val->clone(); + ClonedVal->insertBefore(HoistPt->getTerminator()); + replaceUseWith(Repl, Val, ClonedVal); + } + + return true; + } + + std::pair hoist(HoistingPointList &HPL) { + unsigned NI = 0, NL = 0, NS = 0, NC = 0, NR = 0; + for (const HoistingPointInfo &HP : HPL) { + // Find out whether we already have one of the instructions in HoistPt, + // in which case we do not have to move it. + BasicBlock *HoistPt = HP.first; + const SmallVecInsn &InstructionsToHoist = HP.second; + Instruction *Repl = nullptr; + for (Instruction *I : InstructionsToHoist) + if (I->getParent() == HoistPt) { + // If there are two instructions in HoistPt to be hoisted in place: + // update Repl to be the first one, such that we can rename the uses + // of the second based on the first. + Repl = !Repl ? I : firstOfTwo(Repl, I); + } + + if (Repl) { + // Repl is already in HoistPt: it remains in place. + assert(allOperandsAvailable(Repl, HoistPt) && + "instruction depends on operands that are not available"); + } else { + // When we do not find Repl in HoistPt, select the first in the list + // and move it to HoistPt. + Repl = InstructionsToHoist.front(); + + // We can move Repl in HoistPt only when all operands are available. + // The order in which hoistings are done may influence the availability + // of operands. + if (!allOperandsAvailable(Repl, HoistPt) && + !makeOperandsAvailable(Repl, HoistPt)) + continue; + Repl->moveBefore(HoistPt->getTerminator()); + } + + if (isa(Repl)) + ++NL; + else if (isa(Repl)) + ++NS; + else if (isa(Repl)) + ++NC; + else // Scalar + ++NI; + + // Remove and rename all other instructions. + for (Instruction *I : InstructionsToHoist) + if (I != Repl) { + ++NR; + if (isa(Repl)) + ++NumLoadsRemoved; + else if (isa(Repl)) + ++NumStoresRemoved; + else if (isa(Repl)) + ++NumCallsRemoved; + I->replaceAllUsesWith(Repl); + I->eraseFromParent(); + } + } + + NumHoisted += NL + NS + NC + NI; + NumRemoved += NR; + NumLoadsHoisted += NL; + NumStoresHoisted += NS; + NumCallsHoisted += NC; + return {NI, NL + NC + NS}; + } + + // Hoist all expressions. Returns Number of scalars hoisted + // and number of non-scalars hoisted. + std::pair hoistExpressions(Function &F) { + InsnInfo II; + LoadInfo LI; + StoreInfo SI; + CallInfo CI; + for (BasicBlock *BB : depth_first(&F.getEntryBlock())) { + for (Instruction &I1 : *BB) { + if (LoadInst *Load = dyn_cast(&I1)) + LI.insert(Load, VN); + else if (StoreInst *Store = dyn_cast(&I1)) + SI.insert(Store, VN); + else if (CallInst *Call = dyn_cast(&I1)) { + if (IntrinsicInst *Intr = dyn_cast(Call)) { + if (isa(Intr) || + Intr->getIntrinsicID() == Intrinsic::assume) + continue; + } + if (Call->mayHaveSideEffects()) { + if (!OptForMinSize) + break; + // We may continue hoisting across calls which write to memory. + if (Call->mayThrow()) + break; + } + CI.insert(Call, VN); + } else if (OptForMinSize || !isa(&I1)) + // Do not hoist scalars past calls that may write to memory because + // that could result in spills later. geps are handled separately. + // TODO: We can relax this for targets like AArch64 as they have more + // registers than X86. + II.insert(&I1, VN); + } + } + + HoistingPointList HPL; + computeInsertionPoints(II.getVNTable(), HPL, InsKind::Scalar); + computeInsertionPoints(LI.getVNTable(), HPL, InsKind::Load); + computeInsertionPoints(SI.getVNTable(), HPL, InsKind::Store); + computeInsertionPoints(CI.getScalarVNTable(), HPL, InsKind::Scalar); + computeInsertionPoints(CI.getLoadVNTable(), HPL, InsKind::Load); + computeInsertionPoints(CI.getStoreVNTable(), HPL, InsKind::Store); + return hoist(HPL); + } + + bool run(Function &F) { + VN.setDomTree(DT); + VN.setAliasAnalysis(AA); + VN.setMemDep(MD); + bool Res = false; + + unsigned I = 0; + for (const BasicBlock *BB : depth_first(&F.getEntryBlock())) + DFSNumber.insert(std::make_pair(BB, ++I)); + + // FIXME: use lazy evaluation of VN to avoid the fix-point computation. + while (1) { + // FIXME: only compute MemorySSA once. We need to update the analysis in + // the same time as transforming the code. + MemorySSA M(F, AA, DT); + MSSA = &M; + + auto HoistStat = hoistExpressions(F); + if (HoistStat.first + HoistStat.second == 0) { + return Res; + } + if (HoistStat.second > 0) { + // To address a limitation of the current GVN, we need to rerun the + // hoisting after we hoisted loads in order to be able to hoist all + // scalars dependent on the hoisted loads. Same for stores. + VN.clear(); + } + Res = true; + } + + return Res; + } +}; + +class GVNHoistLegacyPass : public FunctionPass { +public: + static char ID; + + GVNHoistLegacyPass() : FunctionPass(ID) { + initializeGVNHoistLegacyPassPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override { + auto &DT = getAnalysis().getDomTree(); + auto &AA = getAnalysis().getAAResults(); + auto &MD = getAnalysis().getMemDep(); + + GVNHoist G(&DT, &AA, &MD, F.optForMinSize()); + return G.run(F); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.addPreserved(); + } +}; +} // namespace + +PreservedAnalyses GVNHoistPass::run(Function &F, + AnalysisManager &AM) { + DominatorTree &DT = AM.getResult(F); + AliasAnalysis &AA = AM.getResult(F); + MemoryDependenceResults &MD = AM.getResult(F); + + GVNHoist G(&DT, &AA, &MD, F.optForMinSize()); + if (!G.run(F)) + return PreservedAnalyses::all(); + + PreservedAnalyses PA; + PA.preserve(); + return PA; +} + +char GVNHoistLegacyPass::ID = 0; +INITIALIZE_PASS_BEGIN(GVNHoistLegacyPass, "gvn-hoist", + "Early GVN Hoisting of Expressions", false, false) +INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) +INITIALIZE_PASS_END(GVNHoistLegacyPass, "gvn-hoist", + "Early GVN Hoisting of Expressions", false, false) + +FunctionPass *llvm::createGVNHoistPass() { return new GVNHoistLegacyPass(); } diff --git a/llvm/lib/Transforms/Scalar/Scalar.cpp b/llvm/lib/Transforms/Scalar/Scalar.cpp index 13610fa..e57fda3 100644 --- a/llvm/lib/Transforms/Scalar/Scalar.cpp +++ b/llvm/lib/Transforms/Scalar/Scalar.cpp @@ -44,6 +44,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) { initializeGuardWideningLegacyPassPass(Registry); initializeGVNLegacyPassPass(Registry); initializeEarlyCSELegacyPassPass(Registry); + initializeGVNHoistLegacyPassPass(Registry); initializeFlattenCFGPassPass(Registry); initializeInductiveRangeCheckEliminationPass(Registry); initializeIndVarSimplifyLegacyPassPass(Registry); @@ -236,6 +237,10 @@ void LLVMAddEarlyCSEPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createEarlyCSEPass()); } +void LLVMAddGVNHoistLegacyPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createGVNHoistPass()); +} + void LLVMAddTypeBasedAliasAnalysisPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createTypeBasedAAWrapperPass()); } diff --git a/llvm/test/Transforms/GVN/hoist-pr20242.ll b/llvm/test/Transforms/GVN/hoist-pr20242.ll new file mode 100644 index 0000000..b91f18a --- /dev/null +++ b/llvm/test/Transforms/GVN/hoist-pr20242.ll @@ -0,0 +1,74 @@ +; RUN: opt -gvn-hoist -S < %s | FileCheck %s +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Check that all "or" expressions are hoisted. +; CHECK-LABEL: @encode +; CHECK: or i32 +; CHECK-NOT: or i32 + +define i8* @encode(i8* %p, i32 %v) { +entry: + %p.addr = alloca i8*, align 8 + %v.addr = alloca i32, align 4 + store i8* %p, i8** %p.addr, align 8 + store i32 %v, i32* %v.addr, align 4 + %0 = load i32, i32* %v.addr, align 4 + %cmp = icmp ult i32 %0, 23 + br i1 %cmp, label %if.then, label %if.else + +if.then: ; preds = %entry + %1 = load i32, i32* %v.addr, align 4 + %or = or i32 %1, 128 + %conv = trunc i32 %or to i8 + %2 = load i8*, i8** %p.addr, align 8 + %incdec.ptr = getelementptr inbounds i8, i8* %2, i32 1 + store i8* %incdec.ptr, i8** %p.addr, align 8 + store i8 %conv, i8* %2, align 1 + br label %if.end15 + +if.else: ; preds = %entry + %3 = load i32, i32* %v.addr, align 4 + %cmp1 = icmp ult i32 %3, 42 + br i1 %cmp1, label %if.then3, label %if.else9 + +if.then3: ; preds = %if.else + %4 = load i32, i32* %v.addr, align 4 + %or4 = or i32 %4, 128 + %conv5 = trunc i32 %or4 to i8 + %5 = load i8*, i8** %p.addr, align 8 + %incdec.ptr6 = getelementptr inbounds i8, i8* %5, i32 1 + store i8* %incdec.ptr6, i8** %p.addr, align 8 + store i8 %conv5, i8* %5, align 1 + %6 = load i32, i32* %v.addr, align 4 + %conv7 = trunc i32 %6 to i8 + %7 = load i8*, i8** %p.addr, align 8 + %incdec.ptr8 = getelementptr inbounds i8, i8* %7, i32 1 + store i8* %incdec.ptr8, i8** %p.addr, align 8 + store i8 %conv7, i8* %7, align 1 + br label %if.end + +if.else9: ; preds = %if.else + %8 = load i32, i32* %v.addr, align 4 + %or10 = or i32 %8, 128 + %conv11 = trunc i32 %or10 to i8 + %9 = load i8*, i8** %p.addr, align 8 + %incdec.ptr12 = getelementptr inbounds i8, i8* %9, i32 1 + store i8* %incdec.ptr12, i8** %p.addr, align 8 + store i8 %conv11, i8* %9, align 1 + %10 = load i32, i32* %v.addr, align 4 + %shr = lshr i32 %10, 7 + %conv13 = trunc i32 %shr to i8 + %11 = load i8*, i8** %p.addr, align 8 + %incdec.ptr14 = getelementptr inbounds i8, i8* %11, i32 1 + store i8* %incdec.ptr14, i8** %p.addr, align 8 + store i8 %conv13, i8* %11, align 1 + br label %if.end + +if.end: ; preds = %if.else9, %if.then3 + br label %if.end15 + +if.end15: ; preds = %if.end, %if.then + %12 = load i8*, i8** %p.addr, align 8 + ret i8* %12 +} diff --git a/llvm/test/Transforms/GVN/hoist-pr22005.ll b/llvm/test/Transforms/GVN/hoist-pr22005.ll new file mode 100644 index 0000000..9299f4f --- /dev/null +++ b/llvm/test/Transforms/GVN/hoist-pr22005.ll @@ -0,0 +1,30 @@ +; RUN: opt -gvn-hoist -S < %s | FileCheck %s +target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Check that all "sub" expressions are hoisted. +; CHECK-LABEL: @fun +; CHECK: sub i64 +; CHECK-NOT: sub i64 + +define i64 @fun(i8* %out, i8* %end) { + %1 = icmp ult i8* %out, %end + br i1 %1, label %2, label %6 + +;