From 8716b58583cf5e753f2dc2cfb214cdf24a839c43 Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Thu, 13 Nov 2014 22:54:59 +0000 Subject: [PATCH] Revert "[GVN] Perform Scalar PRE on gep indices that feed loads before doing Load PRE." This reverts commit r221924. It appears the commit was a bit premature and is causing bot failures that need further investigation. llvm-svn: 221939 --- llvm/lib/Transforms/Scalar/GVN.cpp | 332 +++++++++++++++---------------- llvm/test/Transforms/GVN/pre-gep-load.ll | 49 ----- 2 files changed, 166 insertions(+), 215 deletions(-) delete mode 100644 llvm/test/Transforms/GVN/pre-gep-load.ll diff --git a/llvm/lib/Transforms/Scalar/GVN.cpp b/llvm/lib/Transforms/Scalar/GVN.cpp index 5279216..7dba4e2 100644 --- a/llvm/lib/Transforms/Scalar/GVN.cpp +++ b/llvm/lib/Transforms/Scalar/GVN.cpp @@ -20,7 +20,6 @@ #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/Hashing.h" #include "llvm/ADT/MapVector.h" -#include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" @@ -710,7 +709,6 @@ namespace { void dump(DenseMap &d); bool iterateOnFunction(Function &F); bool performPRE(Function &F); - bool performScalarPRE(Instruction *I); Value *findLeader(const BasicBlock *BB, uint32_t num); void cleanupGlobalSets(); void verifyRemoved(const Instruction *I) const; @@ -1731,14 +1729,6 @@ bool GVN::processNonLocalLoad(LoadInst *LI) { return false; } - // If this load follows a GEP, see if we can PRE the indices before analyzing. - if (GetElementPtrInst *GEP = dyn_cast(LI->getOperand(0))) { - for(GetElementPtrInst::op_iterator OI = GEP->idx_begin(), - OE = GEP->idx_end(); OI != OE; ++OI) - if (Instruction *I = dyn_cast(OI->get())) - performScalarPRE(I); - } - // Step 2: Analyze the availability of the load AvailValInBlkVect ValuesPerBlock; UnavailBlkVect UnavailableBlocks; @@ -2441,180 +2431,175 @@ bool GVN::processBlock(BasicBlock *BB) { return ChangedFunction; } -bool GVN::performScalarPRE(Instruction *CurInst) { +/// performPRE - Perform a purely local form of PRE that looks for diamond +/// control flow patterns and attempts to perform simple PRE at the join point. +bool GVN::performPRE(Function &F) { + bool Changed = false; SmallVector, 8> predMap; + for (BasicBlock *CurrentBlock : depth_first(&F.getEntryBlock())) { + // Nothing to PRE in the entry block. + if (CurrentBlock == &F.getEntryBlock()) continue; - if (isa(CurInst) || - isa(CurInst) || isa(CurInst) || - CurInst->getType()->isVoidTy() || - CurInst->mayReadFromMemory() || CurInst->mayHaveSideEffects() || - isa(CurInst)) - return false; - - // Don't do PRE on compares. The PHI would prevent CodeGenPrepare from - // sinking the compare again, and it would force the code generator to - // move the i1 from processor flags or predicate registers into a general - // purpose register. - if (isa(CurInst)) - return false; - - // We don't currently value number ANY inline asm calls. - if (CallInst *CallI = dyn_cast(CurInst)) - if (CallI->isInlineAsm()) - return false; + // Don't perform PRE on a landing pad. + if (CurrentBlock->isLandingPad()) continue; - uint32_t ValNo = VN.lookup(CurInst); - - // Look for the predecessors for PRE opportunities. We're - // only trying to solve the basic diamond case, where - // a value is computed in the successor and one predecessor, - // but not the other. We also explicitly disallow cases - // where the successor is its own predecessor, because they're - // more complicated to get right. - unsigned NumWith = 0; - unsigned NumWithout = 0; - BasicBlock *PREPred = nullptr; - BasicBlock *CurrentBlock = CurInst->getParent(); - predMap.clear(); - - for (pred_iterator PI = pred_begin(CurrentBlock), - PE = pred_end(CurrentBlock); PI != PE; ++PI) { - BasicBlock *P = *PI; - // We're not interested in PRE where the block is its - // own predecessor, or in blocks with predecessors - // that are not reachable. - if (P == CurrentBlock) { - NumWithout = 2; - break; - } else if (!DT->isReachableFromEntry(P)) { - NumWithout = 2; - break; - } + for (BasicBlock::iterator BI = CurrentBlock->begin(), + BE = CurrentBlock->end(); BI != BE; ) { + Instruction *CurInst = BI++; - Value* predV = findLeader(P, ValNo); - if (!predV) { - predMap.push_back(std::make_pair(static_cast(nullptr), P)); - PREPred = P; - ++NumWithout; - } else if (predV == CurInst) { - /* CurInst dominates this predecessor. */ - NumWithout = 2; - break; - } else { - predMap.push_back(std::make_pair(predV, P)); - ++NumWith; - } - } + if (isa(CurInst) || + isa(CurInst) || isa(CurInst) || + CurInst->getType()->isVoidTy() || + CurInst->mayReadFromMemory() || CurInst->mayHaveSideEffects() || + isa(CurInst)) + continue; - // Don't do PRE when it might increase code size, i.e. when - // we would need to insert instructions in more than one pred. - if (NumWithout != 1 || NumWith == 0) - return false; + // Don't do PRE on compares. The PHI would prevent CodeGenPrepare from + // sinking the compare again, and it would force the code generator to + // move the i1 from processor flags or predicate registers into a general + // purpose register. + if (isa(CurInst)) + continue; - // Don't do PRE across indirect branch. - if (isa(PREPred->getTerminator())) - return false; + // We don't currently value number ANY inline asm calls. + if (CallInst *CallI = dyn_cast(CurInst)) + if (CallI->isInlineAsm()) + continue; - // We can't do PRE safely on a critical edge, so instead we schedule - // the edge to be split and perform the PRE the next time we iterate - // on the function. - unsigned SuccNum = GetSuccessorNumber(PREPred, CurrentBlock); - if (isCriticalEdge(PREPred->getTerminator(), SuccNum)) { - toSplit.push_back(std::make_pair(PREPred->getTerminator(), SuccNum)); - return false; - } + uint32_t ValNo = VN.lookup(CurInst); + + // Look for the predecessors for PRE opportunities. We're + // only trying to solve the basic diamond case, where + // a value is computed in the successor and one predecessor, + // but not the other. We also explicitly disallow cases + // where the successor is its own predecessor, because they're + // more complicated to get right. + unsigned NumWith = 0; + unsigned NumWithout = 0; + BasicBlock *PREPred = nullptr; + predMap.clear(); + + for (pred_iterator PI = pred_begin(CurrentBlock), + PE = pred_end(CurrentBlock); PI != PE; ++PI) { + BasicBlock *P = *PI; + // We're not interested in PRE where the block is its + // own predecessor, or in blocks with predecessors + // that are not reachable. + if (P == CurrentBlock) { + NumWithout = 2; + break; + } else if (!DT->isReachableFromEntry(P)) { + NumWithout = 2; + break; + } - // Instantiate the expression in the predecessor that lacked it. - // Because we are going top-down through the block, all value numbers - // will be available in the predecessor by the time we need them. Any - // that weren't originally present will have been instantiated earlier - // in this loop. - Instruction *PREInstr = CurInst->clone(); - bool success = true; - for (unsigned i = 0, e = CurInst->getNumOperands(); i != e; ++i) { - Value *Op = PREInstr->getOperand(i); - if (isa(Op) || isa(Op) || isa(Op)) - continue; + Value* predV = findLeader(P, ValNo); + if (!predV) { + predMap.push_back(std::make_pair(static_cast(nullptr), P)); + PREPred = P; + ++NumWithout; + } else if (predV == CurInst) { + /* CurInst dominates this predecessor. */ + NumWithout = 2; + break; + } else { + predMap.push_back(std::make_pair(predV, P)); + ++NumWith; + } + } - if (Value *V = findLeader(PREPred, VN.lookup(Op))) { - PREInstr->setOperand(i, V); - } else { - success = false; - break; - } - } + // Don't do PRE when it might increase code size, i.e. when + // we would need to insert instructions in more than one pred. + if (NumWithout != 1 || NumWith == 0) + continue; - // Fail out if we encounter an operand that is not available in - // the PRE predecessor. This is typically because of loads which - // are not value numbered precisely. - if (!success) { - DEBUG(verifyRemoved(PREInstr)); - delete PREInstr; - return false; - } + // Don't do PRE across indirect branch. + if (isa(PREPred->getTerminator())) + continue; - PREInstr->insertBefore(PREPred->getTerminator()); - PREInstr->setName(CurInst->getName() + ".pre"); - PREInstr->setDebugLoc(CurInst->getDebugLoc()); - VN.add(PREInstr, ValNo); - ++NumGVNPRE; + // We can't do PRE safely on a critical edge, so instead we schedule + // the edge to be split and perform the PRE the next time we iterate + // on the function. + unsigned SuccNum = GetSuccessorNumber(PREPred, CurrentBlock); + if (isCriticalEdge(PREPred->getTerminator(), SuccNum)) { + toSplit.push_back(std::make_pair(PREPred->getTerminator(), SuccNum)); + continue; + } - // Update the availability map to include the new instruction. - addToLeaderTable(ValNo, PREInstr, PREPred); + // Instantiate the expression in the predecessor that lacked it. + // Because we are going top-down through the block, all value numbers + // will be available in the predecessor by the time we need them. Any + // that weren't originally present will have been instantiated earlier + // in this loop. + Instruction *PREInstr = CurInst->clone(); + bool success = true; + for (unsigned i = 0, e = CurInst->getNumOperands(); i != e; ++i) { + Value *Op = PREInstr->getOperand(i); + if (isa(Op) || isa(Op) || isa(Op)) + continue; - // Create a PHI to make the value available in this block. - PHINode* Phi = PHINode::Create(CurInst->getType(), predMap.size(), - CurInst->getName() + ".pre-phi", - CurrentBlock->begin()); - for (unsigned i = 0, e = predMap.size(); i != e; ++i) { - if (Value *V = predMap[i].first) - Phi->addIncoming(V, predMap[i].second); - else - Phi->addIncoming(PREInstr, PREPred); - } - - VN.add(Phi, ValNo); - addToLeaderTable(ValNo, Phi, CurrentBlock); - Phi->setDebugLoc(CurInst->getDebugLoc()); - CurInst->replaceAllUsesWith(Phi); - if (Phi->getType()->getScalarType()->isPointerTy()) { - // Because we have added a PHI-use of the pointer value, it has now - // "escaped" from alias analysis' perspective. We need to inform - // AA of this. - for (unsigned ii = 0, ee = Phi->getNumIncomingValues(); ii != ee; - ++ii) { - unsigned jj = PHINode::getOperandNumForIncomingValue(ii); - VN.getAliasAnalysis()->addEscapingUse(Phi->getOperandUse(jj)); - } + if (Value *V = findLeader(PREPred, VN.lookup(Op))) { + PREInstr->setOperand(i, V); + } else { + success = false; + break; + } + } - if (MD) - MD->invalidateCachedPointerInfo(Phi); - } - VN.erase(CurInst); - removeFromLeaderTable(ValNo, CurInst, CurrentBlock); + // Fail out if we encounter an operand that is not available in + // the PRE predecessor. This is typically because of loads which + // are not value numbered precisely. + if (!success) { + DEBUG(verifyRemoved(PREInstr)); + delete PREInstr; + continue; + } - DEBUG(dbgs() << "GVN PRE removed: " << *CurInst << '\n'); - if (MD) MD->removeInstruction(CurInst); - DEBUG(verifyRemoved(CurInst)); - CurInst->eraseFromParent(); - return true; -} + PREInstr->insertBefore(PREPred->getTerminator()); + PREInstr->setName(CurInst->getName() + ".pre"); + PREInstr->setDebugLoc(CurInst->getDebugLoc()); + VN.add(PREInstr, ValNo); + ++NumGVNPRE; + + // Update the availability map to include the new instruction. + addToLeaderTable(ValNo, PREInstr, PREPred); + + // Create a PHI to make the value available in this block. + PHINode* Phi = PHINode::Create(CurInst->getType(), predMap.size(), + CurInst->getName() + ".pre-phi", + CurrentBlock->begin()); + for (unsigned i = 0, e = predMap.size(); i != e; ++i) { + if (Value *V = predMap[i].first) + Phi->addIncoming(V, predMap[i].second); + else + Phi->addIncoming(PREInstr, PREPred); + } -/// performPRE - Perform a purely local form of PRE that looks for diamond -/// control flow patterns and attempts to perform simple PRE at the join point. -bool GVN::performPRE(Function &F) { - bool Changed = false; - for (BasicBlock *CurrentBlock : depth_first(&F.getEntryBlock())) { - // Nothing to PRE in the entry block. - if (CurrentBlock == &F.getEntryBlock()) continue; + VN.add(Phi, ValNo); + addToLeaderTable(ValNo, Phi, CurrentBlock); + Phi->setDebugLoc(CurInst->getDebugLoc()); + CurInst->replaceAllUsesWith(Phi); + if (Phi->getType()->getScalarType()->isPointerTy()) { + // Because we have added a PHI-use of the pointer value, it has now + // "escaped" from alias analysis' perspective. We need to inform + // AA of this. + for (unsigned ii = 0, ee = Phi->getNumIncomingValues(); ii != ee; + ++ii) { + unsigned jj = PHINode::getOperandNumForIncomingValue(ii); + VN.getAliasAnalysis()->addEscapingUse(Phi->getOperandUse(jj)); + } - // Don't perform PRE on a landing pad. - if (CurrentBlock->isLandingPad()) continue; + if (MD) + MD->invalidateCachedPointerInfo(Phi); + } + VN.erase(CurInst); + removeFromLeaderTable(ValNo, CurInst, CurrentBlock); - for (BasicBlock::iterator BI = CurrentBlock->begin(), - BE = CurrentBlock->end(); BI != BE; ) { - Instruction *CurInst = BI++; - Changed = performScalarPRE(CurInst); + DEBUG(dbgs() << "GVN PRE removed: " << *CurInst << '\n'); + if (MD) MD->removeInstruction(CurInst); + DEBUG(verifyRemoved(CurInst)); + CurInst->eraseFromParent(); + Changed = true; } } @@ -2652,11 +2637,26 @@ bool GVN::iterateOnFunction(Function &F) { // Top-down walk of the dominator tree bool Changed = false; +#if 0 // Needed for value numbering with phi construction to work. ReversePostOrderTraversal RPOT(&F); for (ReversePostOrderTraversal::rpo_iterator RI = RPOT.begin(), RE = RPOT.end(); RI != RE; ++RI) Changed |= processBlock(*RI); +#else + // Save the blocks this function have before transformation begins. GVN may + // split critical edge, and hence may invalidate the RPO/DT iterator. + // + std::vector BBVect; + BBVect.reserve(256); + for (DomTreeNode *X : depth_first(DT->getRootNode())) + BBVect.push_back(X->getBlock()); + + for (std::vector::iterator I = BBVect.begin(), E = BBVect.end(); + I != E; I++) + Changed |= processBlock(*I); +#endif + return Changed; } diff --git a/llvm/test/Transforms/GVN/pre-gep-load.ll b/llvm/test/Transforms/GVN/pre-gep-load.ll deleted file mode 100644 index 3ee3a37..0000000 --- a/llvm/test/Transforms/GVN/pre-gep-load.ll +++ /dev/null @@ -1,49 +0,0 @@ -; RUN: opt < %s -basicaa -gvn -enable-load-pre -S | FileCheck %s -target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" -target triple = "aarch64--linux-gnu" - -define double @foo(i32 %stat, i32 %i, double** %p) { -; CHECK-LABEL: @foo( -entry: - switch i32 %stat, label %sw.default [ - i32 0, label %sw.bb - i32 1, label %sw.bb - i32 2, label %sw.bb2 - ] - -sw.bb: ; preds = %entry, %entry - %idxprom = sext i32 %i to i64 - %arrayidx = getelementptr inbounds double** %p, i64 0 - %0 = load double** %arrayidx, align 8 - %arrayidx1 = getelementptr inbounds double* %0, i64 %idxprom - %1 = load double* %arrayidx1, align 8 - %sub = fsub double %1, 1.000000e+00 - %cmp = fcmp olt double %sub, 0.000000e+00 - br i1 %cmp, label %if.then, label %if.end - -if.then: ; preds = %sw.bb - br label %return - -if.end: ; preds = %sw.bb - br label %sw.bb2 - -sw.bb2: ; preds = %if.end, %entry - %idxprom3 = sext i32 %i to i64 - %arrayidx4 = getelementptr inbounds double** %p, i64 0 - %2 = load double** %arrayidx4, align 8 - %arrayidx5 = getelementptr inbounds double* %2, i64 %idxprom3 - %3 = load double* %arrayidx5, align 8 -; CHECK: sw.bb2: -; CHECK-NEXT-NOT: sext -; CHECK-NEXT: phi double [ -; CHECK-NOT: load - %sub6 = fsub double 3.000000e+00, %3 - br label %return - -sw.default: ; preds = %entry - br label %return - -return: ; preds = %sw.default, %sw.bb2, %if.then - %retval.0 = phi double [ 0.000000e+00, %sw.default ], [ %sub6, %sw.bb2 ], [ %sub, %if.then ] - ret double %retval.0 -} -- 2.7.4