From 8716b58583cf5e753f2dc2cfb214cdf24a839c43 Mon Sep 17 00:00:00 2001
From: Chad Rosier <mcrosier@codeaurora.org>
Date: Thu, 13 Nov 2014 22:54:59 +0000
Subject: [PATCH] Revert "[GVN] Perform Scalar PRE on gep indices that feed
 loads before doing Load PRE."

This reverts commit r221924.  It appears the commit was a bit premature and is causing
bot failures that need further investigation.

llvm-svn: 221939
---
 llvm/lib/Transforms/Scalar/GVN.cpp       | 332 +++++++++++++++----------------
 llvm/test/Transforms/GVN/pre-gep-load.ll |  49 -----
 2 files changed, 166 insertions(+), 215 deletions(-)
 delete mode 100644 llvm/test/Transforms/GVN/pre-gep-load.ll
diff --git a/llvm/lib/Transforms/Scalar/GVN.cpp b/llvm/lib/Transforms/Scalar/GVN.cpp
index 5279216..7dba4e2 100644
--- a/llvm/lib/Transforms/Scalar/GVN.cpp
+++ b/llvm/lib/Transforms/Scalar/GVN.cpp
@@ -20,7 +20,6 @@
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/Hashing.h"
 #include "llvm/ADT/MapVector.h"
-#include "llvm/ADT/PostOrderIterator.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/Statistic.h"
@@ -710,7 +709,6 @@ namespace {
     void dump(DenseMap<uint32_t, Value*> &d);
     bool iterateOnFunction(Function &F);
     bool performPRE(Function &F);
-    bool performScalarPRE(Instruction *I);
     Value *findLeader(const BasicBlock *BB, uint32_t num);
     void cleanupGlobalSets();
     void verifyRemoved(const Instruction *I) const;
@@ -1731,14 +1729,6 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
     return false;
   }
 
-  // If this load follows a GEP, see if we can PRE the indices before analyzing.
-  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(LI->getOperand(0))) {
-    for(GetElementPtrInst::op_iterator OI = GEP->idx_begin(),
-        OE = GEP->idx_end(); OI != OE; ++OI)
-      if (Instruction *I = dyn_cast<Instruction>(OI->get()))
-        performScalarPRE(I);
-  }
-
   // Step 2: Analyze the availability of the load
   AvailValInBlkVect ValuesPerBlock;
   UnavailBlkVect UnavailableBlocks;
@@ -2441,180 +2431,175 @@ bool GVN::processBlock(BasicBlock *BB) {
   return ChangedFunction;
 }
 
-bool GVN::performScalarPRE(Instruction *CurInst) {
+/// performPRE - Perform a purely local form of PRE that looks for diamond
+/// control flow patterns and attempts to perform simple PRE at the join point.
+bool GVN::performPRE(Function &F) {
+  bool Changed = false;
   SmallVector<std::pair<Value*, BasicBlock*>, 8> predMap;
+  for (BasicBlock *CurrentBlock : depth_first(&F.getEntryBlock())) {
+    // Nothing to PRE in the entry block.
+    if (CurrentBlock == &F.getEntryBlock()) continue;
 
-  if (isa<AllocaInst>(CurInst) ||
-      isa<TerminatorInst>(CurInst) || isa<PHINode>(CurInst) ||
-      CurInst->getType()->isVoidTy() ||
-      CurInst->mayReadFromMemory() || CurInst->mayHaveSideEffects() ||
-      isa<DbgInfoIntrinsic>(CurInst))
-    return false;
-
-  // Don't do PRE on compares. The PHI would prevent CodeGenPrepare from
-  // sinking the compare again, and it would force the code generator to
-  // move the i1 from processor flags or predicate registers into a general
-  // purpose register.
-  if (isa<CmpInst>(CurInst))
-    return false;
-
-  // We don't currently value number ANY inline asm calls.
-  if (CallInst *CallI = dyn_cast<CallInst>(CurInst))
-    if (CallI->isInlineAsm())
-      return false;
+    // Don't perform PRE on a landing pad.
+    if (CurrentBlock->isLandingPad()) continue;
 
-  uint32_t ValNo = VN.lookup(CurInst);
-
-  // Look for the predecessors for PRE opportunities.  We're
-  // only trying to solve the basic diamond case, where
-  // a value is computed in the successor and one predecessor,
-  // but not the other.  We also explicitly disallow cases
-  // where the successor is its own predecessor, because they're
-  // more complicated to get right.
-  unsigned NumWith = 0;
-  unsigned NumWithout = 0;
-  BasicBlock *PREPred = nullptr;
-  BasicBlock *CurrentBlock = CurInst->getParent();
-  predMap.clear();
-
-  for (pred_iterator PI = pred_begin(CurrentBlock),
-       PE = pred_end(CurrentBlock); PI != PE; ++PI) {
-    BasicBlock *P = *PI;
-    // We're not interested in PRE where the block is its
-    // own predecessor, or in blocks with predecessors
-    // that are not reachable.
-    if (P == CurrentBlock) {
-      NumWithout = 2;
-      break;
-    } else if (!DT->isReachableFromEntry(P))  {
-      NumWithout = 2;
-      break;
-    }
+    for (BasicBlock::iterator BI = CurrentBlock->begin(),
+         BE = CurrentBlock->end(); BI != BE; ) {
+      Instruction *CurInst = BI++;
 
-    Value* predV = findLeader(P, ValNo);
-    if (!predV) {
-      predMap.push_back(std::make_pair(static_cast<Value *>(nullptr), P));
-      PREPred = P;
-        ++NumWithout;
-    } else if (predV == CurInst) {
-      /* CurInst dominates this predecessor. */
-      NumWithout = 2;
-      break;
-    } else {
-      predMap.push_back(std::make_pair(predV, P));
-      ++NumWith;
-    }
-  }
+      if (isa<AllocaInst>(CurInst) ||
+          isa<TerminatorInst>(CurInst) || isa<PHINode>(CurInst) ||
+          CurInst->getType()->isVoidTy() ||
+          CurInst->mayReadFromMemory() || CurInst->mayHaveSideEffects() ||
+          isa<DbgInfoIntrinsic>(CurInst))
+        continue;
 
-  // Don't do PRE when it might increase code size, i.e. when
-  // we would need to insert instructions in more than one pred.
-  if (NumWithout != 1 || NumWith == 0)
-    return false;
+      // Don't do PRE on compares. The PHI would prevent CodeGenPrepare from
+      // sinking the compare again, and it would force the code generator to
+      // move the i1 from processor flags or predicate registers into a general
+      // purpose register.
+      if (isa<CmpInst>(CurInst))
+        continue;
 
-  // Don't do PRE across indirect branch.
-  if (isa<IndirectBrInst>(PREPred->getTerminator()))
-    return false;
+      // We don't currently value number ANY inline asm calls.
+      if (CallInst *CallI = dyn_cast<CallInst>(CurInst))
+        if (CallI->isInlineAsm())
+          continue;
 
-  // We can't do PRE safely on a critical edge, so instead we schedule
-  // the edge to be split and perform the PRE the next time we iterate
-  // on the function.
-  unsigned SuccNum = GetSuccessorNumber(PREPred, CurrentBlock);
-  if (isCriticalEdge(PREPred->getTerminator(), SuccNum)) {
-    toSplit.push_back(std::make_pair(PREPred->getTerminator(), SuccNum));
-    return false;
-  }
+      uint32_t ValNo = VN.lookup(CurInst);
+
+      // Look for the predecessors for PRE opportunities.  We're
+      // only trying to solve the basic diamond case, where
+      // a value is computed in the successor and one predecessor,
+      // but not the other.  We also explicitly disallow cases
+      // where the successor is its own predecessor, because they're
+      // more complicated to get right.
+      unsigned NumWith = 0;
+      unsigned NumWithout = 0;
+      BasicBlock *PREPred = nullptr;
+      predMap.clear();
+
+      for (pred_iterator PI = pred_begin(CurrentBlock),
+           PE = pred_end(CurrentBlock); PI != PE; ++PI) {
+        BasicBlock *P = *PI;
+        // We're not interested in PRE where the block is its
+        // own predecessor, or in blocks with predecessors
+        // that are not reachable.
+        if (P == CurrentBlock) {
+          NumWithout = 2;
+          break;
+        } else if (!DT->isReachableFromEntry(P))  {
+          NumWithout = 2;
+          break;
+        }
 
-  // Instantiate the expression in the predecessor that lacked it.
-  // Because we are going top-down through the block, all value numbers
-  // will be available in the predecessor by the time we need them.  Any
-  // that weren't originally present will have been instantiated earlier
-  // in this loop.
-  Instruction *PREInstr = CurInst->clone();
-  bool success = true;
-  for (unsigned i = 0, e = CurInst->getNumOperands(); i != e; ++i) {
-    Value *Op = PREInstr->getOperand(i);
-    if (isa<Argument>(Op) || isa<Constant>(Op) || isa<GlobalValue>(Op))
-      continue;
+        Value* predV = findLeader(P, ValNo);
+        if (!predV) {
+          predMap.push_back(std::make_pair(static_cast<Value *>(nullptr), P));
+          PREPred = P;
+          ++NumWithout;
+        } else if (predV == CurInst) {
+          /* CurInst dominates this predecessor. */
+          NumWithout = 2;
+          break;
+        } else {
+          predMap.push_back(std::make_pair(predV, P));
+          ++NumWith;
+        }
+      }
 
-    if (Value *V = findLeader(PREPred, VN.lookup(Op))) {
-      PREInstr->setOperand(i, V);
-    } else {
-      success = false;
-      break;
-    }
-  }
+      // Don't do PRE when it might increase code size, i.e. when
+      // we would need to insert instructions in more than one pred.
+      if (NumWithout != 1 || NumWith == 0)
+        continue;
 
-  // Fail out if we encounter an operand that is not available in
-  // the PRE predecessor.  This is typically because of loads which
-  // are not value numbered precisely.
-  if (!success) {
-    DEBUG(verifyRemoved(PREInstr));
-    delete PREInstr;
-    return false;
-  }
+      // Don't do PRE across indirect branch.
+      if (isa<IndirectBrInst>(PREPred->getTerminator()))
+        continue;
 
-  PREInstr->insertBefore(PREPred->getTerminator());
-  PREInstr->setName(CurInst->getName() + ".pre");
-  PREInstr->setDebugLoc(CurInst->getDebugLoc());
-  VN.add(PREInstr, ValNo);
-  ++NumGVNPRE;
+      // We can't do PRE safely on a critical edge, so instead we schedule
+      // the edge to be split and perform the PRE the next time we iterate
+      // on the function.
+      unsigned SuccNum = GetSuccessorNumber(PREPred, CurrentBlock);
+      if (isCriticalEdge(PREPred->getTerminator(), SuccNum)) {
+        toSplit.push_back(std::make_pair(PREPred->getTerminator(), SuccNum));
+        continue;
+      }
 
-  // Update the availability map to include the new instruction.
-  addToLeaderTable(ValNo, PREInstr, PREPred);
+      // Instantiate the expression in the predecessor that lacked it.
+      // Because we are going top-down through the block, all value numbers
+      // will be available in the predecessor by the time we need them.  Any
+      // that weren't originally present will have been instantiated earlier
+      // in this loop.
+      Instruction *PREInstr = CurInst->clone();
+      bool success = true;
+      for (unsigned i = 0, e = CurInst->getNumOperands(); i != e; ++i) {
+        Value *Op = PREInstr->getOperand(i);
+        if (isa<Argument>(Op) || isa<Constant>(Op) || isa<GlobalValue>(Op))
+          continue;
 
-  // Create a PHI to make the value available in this block.
-  PHINode* Phi = PHINode::Create(CurInst->getType(), predMap.size(),
-                                 CurInst->getName() + ".pre-phi",
-                                 CurrentBlock->begin());
-  for (unsigned i = 0, e = predMap.size(); i != e; ++i) {
-    if (Value *V = predMap[i].first)
-      Phi->addIncoming(V, predMap[i].second);
-    else
-      Phi->addIncoming(PREInstr, PREPred);
-  }
-
-  VN.add(Phi, ValNo);
-  addToLeaderTable(ValNo, Phi, CurrentBlock);
-  Phi->setDebugLoc(CurInst->getDebugLoc());
-  CurInst->replaceAllUsesWith(Phi);
-  if (Phi->getType()->getScalarType()->isPointerTy()) {
-    // Because we have added a PHI-use of the pointer value, it has now
-    // "escaped" from alias analysis' perspective.  We need to inform
-    // AA of this.
-    for (unsigned ii = 0, ee = Phi->getNumIncomingValues(); ii != ee;
-         ++ii) {
-      unsigned jj = PHINode::getOperandNumForIncomingValue(ii);
-      VN.getAliasAnalysis()->addEscapingUse(Phi->getOperandUse(jj));
-    }
+        if (Value *V = findLeader(PREPred, VN.lookup(Op))) {
+          PREInstr->setOperand(i, V);
+        } else {
+          success = false;
+          break;
+        }
+      }
 
-    if (MD)
-      MD->invalidateCachedPointerInfo(Phi);
-  }
-  VN.erase(CurInst);
-  removeFromLeaderTable(ValNo, CurInst, CurrentBlock);
+      // Fail out if we encounter an operand that is not available in
+      // the PRE predecessor.  This is typically because of loads which
+      // are not value numbered precisely.
+      if (!success) {
+        DEBUG(verifyRemoved(PREInstr));
+        delete PREInstr;
+        continue;
+      }
 
-  DEBUG(dbgs() << "GVN PRE removed: " << *CurInst << '\n');
-  if (MD) MD->removeInstruction(CurInst);
-  DEBUG(verifyRemoved(CurInst));
-  CurInst->eraseFromParent();
-  return true;
-}
+      PREInstr->insertBefore(PREPred->getTerminator());
+      PREInstr->setName(CurInst->getName() + ".pre");
+      PREInstr->setDebugLoc(CurInst->getDebugLoc());
+      VN.add(PREInstr, ValNo);
+      ++NumGVNPRE;
+
+      // Update the availability map to include the new instruction.
+      addToLeaderTable(ValNo, PREInstr, PREPred);
+
+      // Create a PHI to make the value available in this block.
+      PHINode* Phi = PHINode::Create(CurInst->getType(), predMap.size(),
+                                     CurInst->getName() + ".pre-phi",
+                                     CurrentBlock->begin());
+      for (unsigned i = 0, e = predMap.size(); i != e; ++i) {
+        if (Value *V = predMap[i].first)
+          Phi->addIncoming(V, predMap[i].second);
+        else
+          Phi->addIncoming(PREInstr, PREPred);
+      }
 
-/// performPRE - Perform a purely local form of PRE that looks for diamond
-/// control flow patterns and attempts to perform simple PRE at the join point.
-bool GVN::performPRE(Function &F) {
-  bool Changed = false;
-  for (BasicBlock *CurrentBlock : depth_first(&F.getEntryBlock())) {
-    // Nothing to PRE in the entry block.
-    if (CurrentBlock == &F.getEntryBlock()) continue;
+      VN.add(Phi, ValNo);
+      addToLeaderTable(ValNo, Phi, CurrentBlock);
+      Phi->setDebugLoc(CurInst->getDebugLoc());
+      CurInst->replaceAllUsesWith(Phi);
+      if (Phi->getType()->getScalarType()->isPointerTy()) {
+        // Because we have added a PHI-use of the pointer value, it has now
+        // "escaped" from alias analysis' perspective.  We need to inform
+        // AA of this.
+        for (unsigned ii = 0, ee = Phi->getNumIncomingValues(); ii != ee;
+             ++ii) {
+          unsigned jj = PHINode::getOperandNumForIncomingValue(ii);
+          VN.getAliasAnalysis()->addEscapingUse(Phi->getOperandUse(jj));
+        }
 
-    // Don't perform PRE on a landing pad.
-    if (CurrentBlock->isLandingPad()) continue;
+        if (MD)
+          MD->invalidateCachedPointerInfo(Phi);
+      }
+      VN.erase(CurInst);
+      removeFromLeaderTable(ValNo, CurInst, CurrentBlock);
 
-    for (BasicBlock::iterator BI = CurrentBlock->begin(),
-         BE = CurrentBlock->end(); BI != BE; ) {
-      Instruction *CurInst = BI++;
-      Changed = performScalarPRE(CurInst);
+      DEBUG(dbgs() << "GVN PRE removed: " << *CurInst << '\n');
+      if (MD) MD->removeInstruction(CurInst);
+      DEBUG(verifyRemoved(CurInst));
+      CurInst->eraseFromParent();
+      Changed = true;
     }
   }
 
@@ -2652,11 +2637,26 @@ bool GVN::iterateOnFunction(Function &F) {
 
   // Top-down walk of the dominator tree
   bool Changed = false;
+#if 0
   // Needed for value numbering with phi construction to work.
   ReversePostOrderTraversal<Function*> RPOT(&F);
   for (ReversePostOrderTraversal<Function*>::rpo_iterator RI = RPOT.begin(),
        RE = RPOT.end(); RI != RE; ++RI)
     Changed |= processBlock(*RI);
+#else
+  // Save the blocks this function have before transformation begins. GVN may
+  // split critical edge, and hence may invalidate the RPO/DT iterator.
+  //
+  std::vector<BasicBlock *> BBVect;
+  BBVect.reserve(256);
+  for (DomTreeNode *X : depth_first(DT->getRootNode()))
+    BBVect.push_back(X->getBlock());
+
+  for (std::vector<BasicBlock *>::iterator I = BBVect.begin(), E = BBVect.end();
+       I != E; I++)
+    Changed |= processBlock(*I);
+#endif
+
   return Changed;
 }
 
diff --git a/llvm/test/Transforms/GVN/pre-gep-load.ll b/llvm/test/Transforms/GVN/pre-gep-load.ll
deleted file mode 100644
index 3ee3a37..0000000
--- a/llvm/test/Transforms/GVN/pre-gep-load.ll
+++ /dev/null
@@ -1,49 +0,0 @@
-; RUN: opt < %s -basicaa -gvn -enable-load-pre -S | FileCheck %s
-target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
-target triple = "aarch64--linux-gnu"
-
-define double @foo(i32 %stat, i32 %i, double** %p) {
-; CHECK-LABEL: @foo(
-entry:
-  switch i32 %stat, label %sw.default [
-    i32 0, label %sw.bb
-    i32 1, label %sw.bb
-    i32 2, label %sw.bb2
-  ]
-
-sw.bb:                                            ; preds = %entry, %entry
-  %idxprom = sext i32 %i to i64
-  %arrayidx = getelementptr inbounds double** %p, i64 0
-  %0 = load double** %arrayidx, align 8
-  %arrayidx1 = getelementptr inbounds double* %0, i64 %idxprom
-  %1 = load double* %arrayidx1, align 8
-  %sub = fsub double %1, 1.000000e+00
-  %cmp = fcmp olt double %sub, 0.000000e+00
-  br i1 %cmp, label %if.then, label %if.end
-
-if.then:                                          ; preds = %sw.bb
-  br label %return
-
-if.end:                                           ; preds = %sw.bb
-  br label %sw.bb2
-
-sw.bb2:                                           ; preds = %if.end, %entry
-  %idxprom3 = sext i32 %i to i64
-  %arrayidx4 = getelementptr inbounds double** %p, i64 0
-  %2 = load double** %arrayidx4, align 8
-  %arrayidx5 = getelementptr inbounds double* %2, i64 %idxprom3
-  %3 = load double* %arrayidx5, align 8
-; CHECK: sw.bb2:
-; CHECK-NEXT-NOT: sext
-; CHECK-NEXT: phi double [
-; CHECK-NOT: load
-  %sub6 = fsub double 3.000000e+00, %3
-  br label %return
-
-sw.default:                                       ; preds = %entry
-  br label %return
-
-return:                                           ; preds = %sw.default, %sw.bb2, %if.then
-  %retval.0 = phi double [ 0.000000e+00, %sw.default ], [ %sub6, %sw.bb2 ], [ %sub, %if.then ]
-  ret double %retval.0
-}
-- 
2.7.4