From a30aba7a01b156de75a77d0b91ed5728b6c6f13c Mon Sep 17 00:00:00 2001
From: Nadav Rotem <nrotem@apple.com>
Date: Mon, 3 Dec 2012 21:06:35 +0000
Subject: [PATCH] Add initial support for IF-conversion. This patch implements
 the first 1/3, which is the legality of the if-conversion transformation. The
 next step is to implement the cost-model for the if-converted code as well as
 the vectorization itself.

llvm-svn: 169152
---
 llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 387 ++++++++++++++++--------
 1 file changed, 253 insertions(+), 134 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 33b76ba..8e2538f 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -80,6 +80,10 @@ static cl::opt<unsigned>
 VectorizationFactor("force-vector-width", cl::init(0), cl::Hidden,
           cl::desc("Set the default vectorization width. Zero is autoselect."));
 
+static cl::opt<bool>
+EnableIfConversion("enable-if-conversion", cl::init(false), cl::Hidden,
+                   cl::desc("Enable if-conversion during vectorization."));
+
 /// We don't vectorize loops with a known constant trip count below this number.
 const unsigned TinyTripCountThreshold = 16;
 
@@ -219,16 +223,17 @@ private:
 /// * Memory checks - The code in canVectorizeMemory checks if vectorization
 ///   will change the order of memory accesses in a way that will change the
 ///   correctness of the program.
-/// * Scalars checks - The code in canVectorizeBlock checks for a number
-///   of different conditions, such as the availability of a single induction
-///   variable, that all types are supported and vectorize-able, etc.
-/// This code reflects the capabilities of SingleBlockLoopVectorizer.
+/// * Scalars checks - The code in canVectorizeInstrs and canVectorizeMemory
+/// checks for a number of different conditions, such as the availability of a
+/// single induction variable, that all types are supported and vectorize-able,
+/// etc. This code reflects the capabilities of SingleBlockLoopVectorizer.
 /// This class is also used by SingleBlockLoopVectorizer for identifying
 /// induction variable and the different reduction variables.
 class LoopVectorizationLegality {
 public:
-  LoopVectorizationLegality(Loop *Lp, ScalarEvolution *Se, DataLayout *Dl):
-  TheLoop(Lp), SE(Se), DL(Dl), Induction(0) { }
+  LoopVectorizationLegality(Loop *Lp, ScalarEvolution *Se, DataLayout *Dl,
+                              DominatorTree *Dt):
+  TheLoop(Lp), SE(Se), DL(Dl), DT(Dt), Induction(0) { }
 
   /// This represents the kinds of reductions that we support.
   enum ReductionKind {
@@ -277,7 +282,7 @@ public:
       const SCEV *Sc = SE->getSCEV(Ptr);
       const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Sc);
       assert(AR && "Invalid addrec expression");
-      const SCEV *Ex = SE->getExitCount(Lp, Lp->getHeader());
+      const SCEV *Ex = SE->getExitCount(Lp, Lp->getLoopLatch());
       const SCEV *ScEnd = AR->evaluateAtIteration(Ex, *SE);
       Pointers.push_back(Ptr);
       Starts.push_back(AR->getStart());
@@ -334,13 +339,28 @@ private:
   /// Check if a single basic block loop is vectorizable.
   /// At this point we know that this is a loop with a constant trip count
   /// and we only need to check individual instructions.
-  bool canVectorizeBlock(BasicBlock &BB);
+  bool canVectorizeInstrs(BasicBlock &BB);
 
   /// When we vectorize loops we may change the order in which
   /// we read and write from memory. This method checks if it is
   /// legal to vectorize the code, considering only memory constrains.
   /// Returns true if BB is vectorizable
-  bool canVectorizeMemory(BasicBlock &BB);
+  bool canVectorizeMemory();
+
+  /// Return true if we can vectorize this loop using the IF-conversion
+  /// transformation.
+  bool canVectorizeWithIfConvert();
+
+  /// Collect the variables that need to stay uniform after vectorization.
+  void collectLoopUniforms();
+
+  /// Return true if the block BB needs to be predicated in order for the loop
+  /// to be vectorized.
+  bool blockNeedsPredication(BasicBlock *BB);
+
+  /// return true if all of the instructions in the block can be speculatively
+  /// executed.
+  bool blockCanBePredicated(BasicBlock *BB);
 
   /// Returns True, if 'Phi' is the kind of reduction variable for type
   /// 'Kind'. If this is a reduction variable, it adds it to ReductionList.
@@ -359,6 +379,8 @@ private:
   ScalarEvolution *SE;
   /// DataLayout analysis.
   DataLayout *DL;
+  // Dominators.
+  DominatorTree *DT;
 
   //  ---  vectorization state --- //
 
@@ -458,7 +480,7 @@ struct LoopVectorize : public LoopPass {
           L->getHeader()->getParent()->getName() << "\"\n");
 
     // Check if it is legal to vectorize the loop.
-    LoopVectorizationLegality LVL(L, SE, DL);
+    LoopVectorizationLegality LVL(L, SE, DL, DT);
     if (!LVL.canVectorize()) {
       DEBUG(dbgs() << "LV: Not vectorizing.\n");
       return false;
@@ -1423,41 +1445,91 @@ void SingleBlockLoopVectorizer::updateAnalysis() {
   DEBUG(DT->verifyAnalysis());
 }
 
+
+bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
+  if (!EnableIfConversion)
+    return false;
+
+  assert(TheLoop->getNumBlocks() > 1 && "Single block loops are vectorizable");
+  std::vector<BasicBlock*> &LoopBlocks = TheLoop->getBlocksVector();
+
+  // Collect the blocks that need predication.
+  for (unsigned i = 0, e = LoopBlocks.size(); i < e; ++i) {
+    BasicBlock *BB = LoopBlocks[i];
+
+    // We must have at most two predecessors because we need to convert
+    // all PHIs to selects.
+    unsigned Preds = std::distance(pred_begin(BB), pred_end(BB));
+    if (Preds > 2)
+      return false;
+
+    // We must be able to predicate all blocks that needs to be predicated.
+    if (blockNeedsPredication(BB) && !blockCanBePredicated(BB))
+      return false;
+  }
+
+  // We can if-convert this loop.
+  return true;
+}
+
 bool LoopVectorizationLegality::canVectorize() {
   assert(TheLoop->getLoopPreheader() && "No preheader!!");
 
-  // We can only vectorize single basic block loops.
+  // We can only vectorize innermost loops.
+  if (TheLoop->getSubLoopsVector().size())
+    return false;
+
+  // We must have a single backedge.
+  if (TheLoop->getNumBackEdges() != 1)
+    return false;
+
+  // We must have a single exiting block.
+  if (!TheLoop->getExitingBlock())
+    return false;
+
   unsigned NumBlocks = TheLoop->getNumBlocks();
-  if (NumBlocks != 1) {
-    DEBUG(dbgs() << "LV: Too many blocks:" << NumBlocks << "\n");
+
+  // Check if we can if-convert non single-bb loops.
+  if (NumBlocks != 1 && !canVectorizeWithIfConvert()) {
+    DEBUG(dbgs() << "LV: Can't if-convert the loop.\n");
     return false;
   }
 
   // We need to have a loop header.
-  BasicBlock *BB = TheLoop->getHeader();
-  DEBUG(dbgs() << "LV: Found a loop: " << BB->getName() << "\n");
+  BasicBlock *Header = TheLoop->getHeader();
+  BasicBlock *Latch = TheLoop->getLoopLatch();
+  DEBUG(dbgs() << "LV: Found a loop: " << Header->getName() << "\n");
 
   // ScalarEvolution needs to be able to find the exit count.
-  const SCEV *ExitCount = SE->getExitCount(TheLoop, BB);
+  const SCEV *ExitCount = SE->getExitCount(TheLoop, Latch);
   if (ExitCount == SE->getCouldNotCompute()) {
     DEBUG(dbgs() << "LV: SCEV could not compute the loop exit count.\n");
     return false;
   }
 
   // Do not loop-vectorize loops with a tiny trip count.
-  unsigned TC = SE->getSmallConstantTripCount(TheLoop, BB);
+  unsigned TC = SE->getSmallConstantTripCount(TheLoop, Latch);
   if (TC > 0u && TC < TinyTripCountThreshold) {
     DEBUG(dbgs() << "LV: Found a loop with a very small trip count. " <<
           "This loop is not worth vectorizing.\n");
     return false;
   }
 
+  // Check if we can vectorize the instructions and CFG in this loop.
+  if (!canVectorizeInstrs(*Header)) {
+    DEBUG(dbgs() << "LV: Can't vectorize the instructions or CFG\n");
+    return false;
+  }
+
   // Go over each instruction and look at memory deps.
-  if (!canVectorizeBlock(*BB)) {
-    DEBUG(dbgs() << "LV: Can't vectorize this loop header\n");
+  if (!canVectorizeMemory()) {
+    DEBUG(dbgs() << "LV: Can't vectorize due to memory conflicts\n");
     return false;
   }
 
+  // Collect all of the variables that remain  uniform after vectorization.
+  collectLoopUniforms();
+
   DEBUG(dbgs() << "LV: We can vectorize this loop" <<
         (PtrRtCheck.Need ? " (with a runtime bound check)" : "")
         <<"!\n");
@@ -1468,122 +1540,138 @@ bool LoopVectorizationLegality::canVectorize() {
   return true;
 }
 
-bool LoopVectorizationLegality::canVectorizeBlock(BasicBlock &BB) {
-
+bool LoopVectorizationLegality::canVectorizeInstrs(BasicBlock &BB) {
   BasicBlock *PreHeader = TheLoop->getLoopPreheader();
+  BasicBlock *Header = TheLoop->getHeader();
 
-  // Scan the instructions in the block and look for hazards.
-  for (BasicBlock::iterator it = BB.begin(), e = BB.end(); it != e; ++it) {
-    Instruction *I = it;
+  // For each block in the loop
+  for (Loop::block_iterator bb = TheLoop->block_begin(),
+       be = TheLoop->block_end(); bb != be; ++bb) {
 
-    if (PHINode *Phi = dyn_cast<PHINode>(I)) {
-      // This should not happen because the loop should be normalized.
-      if (Phi->getNumIncomingValues() != 2) {
-        DEBUG(dbgs() << "LV: Found an invalid PHI.\n");
-        return false;
-      }
+    // Scan the instructions in the block and look for hazards.
+    for (BasicBlock::iterator it = BB.begin(), e = BB.end(); it != e; ++it) {
+      Instruction *I = it;
 
-      // This is the value coming from the preheader.
-      Value *StartValue = Phi->getIncomingValueForBlock(PreHeader);
+      if (PHINode *Phi = dyn_cast<PHINode>(I)) {
+        // This should not happen because the loop should be normalized.
+        if (Phi->getNumIncomingValues() != 2) {
+          DEBUG(dbgs() << "LV: Found an invalid PHI.\n");
+          return false;
+        }
 
-      // We only look at integer and pointer phi nodes.
-      if (Phi->getType()->isPointerTy() && isInductionVariable(Phi)) {
-        DEBUG(dbgs() << "LV: Found a pointer induction variable.\n");
-        Inductions[Phi] = StartValue;
-        continue;
-      } else if (!Phi->getType()->isIntegerTy()) {
-        DEBUG(dbgs() << "LV: Found an non-int non-pointer PHI.\n");
-        return false;
-      }
+        // If this PHINode is not in the header block, then we know that we
+        // can convert it to select during if-conversion.
+        if (*bb != Header) {
+          continue;
+        }
 
-      // Handle integer PHIs:
-      if (isInductionVariable(Phi)) {
-        if (Induction) {
-          DEBUG(dbgs() << "LV: Found too many inductions."<< *Phi <<"\n");
+        // This is the value coming from the preheader.
+        Value *StartValue = Phi->getIncomingValueForBlock(PreHeader);
+
+        // We only look at integer and pointer phi nodes.
+        if (Phi->getType()->isPointerTy() && isInductionVariable(Phi)) {
+          DEBUG(dbgs() << "LV: Found a pointer induction variable.\n");
+          Inductions[Phi] = StartValue;
+          continue;
+        } else if (!Phi->getType()->isIntegerTy()) {
+          DEBUG(dbgs() << "LV: Found an non-int non-pointer PHI.\n");
           return false;
         }
-        DEBUG(dbgs() << "LV: Found the induction PHI."<< *Phi <<"\n");
-        Induction = Phi;
-        Inductions[Phi] = StartValue;
-        continue;
-      }
-      if (AddReductionVar(Phi, IntegerAdd)) {
-        DEBUG(dbgs() << "LV: Found an ADD reduction PHI."<< *Phi <<"\n");
-        continue;
-      }
-      if (AddReductionVar(Phi, IntegerMult)) {
-        DEBUG(dbgs() << "LV: Found a MUL reduction PHI."<< *Phi <<"\n");
-        continue;
-      }
-      if (AddReductionVar(Phi, IntegerOr)) {
-        DEBUG(dbgs() << "LV: Found an OR reduction PHI."<< *Phi <<"\n");
-        continue;
-      }
-      if (AddReductionVar(Phi, IntegerAnd)) {
-        DEBUG(dbgs() << "LV: Found an AND reduction PHI."<< *Phi <<"\n");
-        continue;
-      }
-      if (AddReductionVar(Phi, IntegerXor)) {
-        DEBUG(dbgs() << "LV: Found a XOR reduction PHI."<< *Phi <<"\n");
-        continue;
-      }
 
-      DEBUG(dbgs() << "LV: Found an unidentified PHI."<< *Phi <<"\n");
-      return false;
-    }// end of PHI handling
+        // Handle integer PHIs:
+        if (isInductionVariable(Phi)) {
+          if (Induction) {
+            DEBUG(dbgs() << "LV: Found too many inductions."<< *Phi <<"\n");
+            return false;
+          }
+          DEBUG(dbgs() << "LV: Found the induction PHI."<< *Phi <<"\n");
+          Induction = Phi;
+          Inductions[Phi] = StartValue;
+          continue;
+        }
+        if (AddReductionVar(Phi, IntegerAdd)) {
+          DEBUG(dbgs() << "LV: Found an ADD reduction PHI."<< *Phi <<"\n");
+          continue;
+        }
+        if (AddReductionVar(Phi, IntegerMult)) {
+          DEBUG(dbgs() << "LV: Found a MUL reduction PHI."<< *Phi <<"\n");
+          continue;
+        }
+        if (AddReductionVar(Phi, IntegerOr)) {
+          DEBUG(dbgs() << "LV: Found an OR reduction PHI."<< *Phi <<"\n");
+          continue;
+        }
+        if (AddReductionVar(Phi, IntegerAnd)) {
+          DEBUG(dbgs() << "LV: Found an AND reduction PHI."<< *Phi <<"\n");
+          continue;
+        }
+        if (AddReductionVar(Phi, IntegerXor)) {
+          DEBUG(dbgs() << "LV: Found a XOR reduction PHI."<< *Phi <<"\n");
+          continue;
+        }
 
-    // We still don't handle functions.
-    CallInst *CI = dyn_cast<CallInst>(I);
-    if (CI) {
-      DEBUG(dbgs() << "LV: Found a call site.\n");
-      return false;
-    }
+        DEBUG(dbgs() << "LV: Found an unidentified PHI."<< *Phi <<"\n");
+        return false;
+      }// end of PHI handling
 
-    // We do not re-vectorize vectors.
-    if (!VectorType::isValidElementType(I->getType()) &&
-        !I->getType()->isVoidTy()) {
-      DEBUG(dbgs() << "LV: Found unvectorizable type." << "\n");
-      return false;
-    }
+      // We still don't handle functions.
+      CallInst *CI = dyn_cast<CallInst>(I);
+      if (CI) {
+        DEBUG(dbgs() << "LV: Found a call site.\n");
+        return false;
+      }
 
-    // Reduction instructions are allowed to have exit users.
-    // All other instructions must not have external users.
-    if (!AllowedExit.count(I))
-      //Check that all of the users of the loop are inside the BB.
-      for (Value::use_iterator it = I->use_begin(), e = I->use_end();
-           it != e; ++it) {
-        Instruction *U = cast<Instruction>(*it);
-        // This user may be a reduction exit value.
-        BasicBlock *Parent = U->getParent();
-        if (Parent != &BB) {
-          DEBUG(dbgs() << "LV: Found an outside user for : "<< *U << "\n");
-          return false;
+      // We do not re-vectorize vectors.
+      if (!VectorType::isValidElementType(I->getType()) &&
+          !I->getType()->isVoidTy()) {
+        DEBUG(dbgs() << "LV: Found unvectorizable type." << "\n");
+        return false;
+      }
+
+      // Reduction instructions are allowed to have exit users.
+      // All other instructions must not have external users.
+      if (!AllowedExit.count(I))
+        //Check that all of the users of the loop are inside the BB.
+        for (Value::use_iterator it = I->use_begin(), e = I->use_end();
+             it != e; ++it) {
+          Instruction *U = cast<Instruction>(*it);
+          // This user may be a reduction exit value.
+          if (!TheLoop->contains(U)) {
+            DEBUG(dbgs() << "LV: Found an outside user for : "<< *U << "\n");
+            return false;
+          }
         }
-    }
-  } // next instr.
+    } // next instr.
+
+  }
 
   if (!Induction) {
     DEBUG(dbgs() << "LV: Did not find one integer induction var.\n");
     assert(getInductionVars()->size() && "No induction variables");
   }
 
-  // Don't vectorize if the memory dependencies do not allow vectorization.
-  if (!canVectorizeMemory(BB))
-    return false;
+  return true;
+}
 
+void LoopVectorizationLegality::collectLoopUniforms() {
   // We now know that the loop is vectorizable!
   // Collect variables that will remain uniform after vectorization.
   std::vector<Value*> Worklist;
 
+  BasicBlock *Latch = TheLoop->getLoopLatch();
+
   // Start with the conditional branch and walk up the block.
-  Worklist.push_back(BB.getTerminator()->getOperand(0));
+  Worklist.push_back(Latch->getTerminator()->getOperand(0));
 
   while (Worklist.size()) {
     Instruction *I = dyn_cast<Instruction>(Worklist.back());
     Worklist.pop_back();
 
-    // Look at instructions inside this block. Stop when reaching PHI nodes.
-    if (!I || I->getParent() != &BB || isa<PHINode>(I))
+    // Look at instructions inside this loop.
+    // Stop when reaching PHI nodes.
+    // TODO: we need to prevent loops but we do need to follow PHIs inside this
+    // loop.
+    if (!I || !TheLoop->contains(I) || isa<PHINode>(I))
       continue;
 
     // This is a known uniform.
@@ -1594,11 +1682,9 @@ bool LoopVectorizationLegality::canVectorizeBlock(BasicBlock &BB) {
       Worklist.push_back(I->getOperand(i));
     }
   }
-
-  return true;
 }
 
-bool LoopVectorizationLegality::canVectorizeMemory(BasicBlock &BB) {
+bool LoopVectorizationLegality::canVectorizeMemory() {
   typedef SmallVector<Value*, 16> ValueVector;
   typedef SmallPtrSet<Value*, 16> ValueSet;
   // Holds the Load and Store *instructions*.
@@ -1607,35 +1693,40 @@ bool LoopVectorizationLegality::canVectorizeMemory(BasicBlock &BB) {
   PtrRtCheck.Pointers.clear();
   PtrRtCheck.Need = false;
 
-  // Scan the BB and collect legal loads and stores.
-  for (BasicBlock::iterator it = BB.begin(), e = BB.end(); it != e; ++it) {
-    Instruction *I = it;
-
-    // If this is a load, save it. If this instruction can read from memory
-    // but is not a load, then we quit. Notice that we don't handle function
-    // calls that read or write.
-    if (I->mayReadFromMemory()) {
-      LoadInst *Ld = dyn_cast<LoadInst>(I);
-      if (!Ld) return false;
-      if (!Ld->isSimple()) {
-        DEBUG(dbgs() << "LV: Found a non-simple load.\n");
-        return false;
+  // For each block.
+  for (Loop::block_iterator bb = TheLoop->block_begin(),
+       be = TheLoop->block_end(); bb != be; ++bb) {
+
+    // Scan the BB and collect legal loads and stores.
+    for (BasicBlock::iterator it = (*bb)->begin(), e = (*bb)->end(); it != e;
+         ++it) {
+
+      // If this is a load, save it. If this instruction can read from memory
+      // but is not a load, then we quit. Notice that we don't handle function
+      // calls that read or write.
+      if (it->mayReadFromMemory()) {
+        LoadInst *Ld = dyn_cast<LoadInst>(it);
+        if (!Ld) return false;
+        if (!Ld->isSimple()) {
+          DEBUG(dbgs() << "LV: Found a non-simple load.\n");
+          return false;
+        }
+        Loads.push_back(Ld);
+        continue;
       }
-      Loads.push_back(Ld);
-      continue;
-    }
 
-    // Save store instructions. Abort if other instructions write to memory.
-    if (I->mayWriteToMemory()) {
-      StoreInst *St = dyn_cast<StoreInst>(I);
-      if (!St) return false;
-      if (!St->isSimple()) {
-        DEBUG(dbgs() << "LV: Found a non-simple store.\n");
-        return false;
+      // Save 'store' instructions. Abort if other instructions write to memory.
+      if (it->mayWriteToMemory()) {
+        StoreInst *St = dyn_cast<StoreInst>(it);
+        if (!St) return false;
+        if (!St->isSimple()) {
+          DEBUG(dbgs() << "LV: Found a non-simple store.\n");
+          return false;
+        }
+        Stores.push_back(St);
       }
-      Stores.push_back(St);
-    }
-  } // next instr.
+    } // next instr.
+  } // next block.
 
   // Now we have two lists that hold the loads and the stores.
   // Next, we find the pointers that they use.
@@ -1908,6 +1999,34 @@ bool LoopVectorizationLegality::isInductionVariable(PHINode *Phi) {
   return (C->getValue()->equalsInt(Size));
 }
 
+bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB)  {
+  assert(TheLoop->contains(BB) && "Unknown block used");
+
+  // Blocks that do not dominate the latch need predication.
+  BasicBlock* Latch = TheLoop->getLoopLatch();
+  return !DT->dominates(BB, Latch);
+}
+
+bool LoopVectorizationLegality::blockCanBePredicated(BasicBlock *BB) {
+  for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
+    // We don't predicate loads/stores at the moment.
+    if (it->mayReadFromMemory() || it->mayWriteToMemory() || it->mayThrow())
+      return false;
+
+    // The isntructions below can trap.
+    switch (it->getOpcode()) {
+      default: continue;
+      case Instruction::UDiv:
+      case Instruction::SDiv:
+      case Instruction::URem:
+      case Instruction::SRem:
+        return false;
+    }
+  }
+
+  return true;
+}
+
 bool LoopVectorizationLegality::hasComputableBounds(Value *Ptr) {
   const SCEV *PhiScev = SE->getSCEV(Ptr);
   const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PhiScev);
-- 
2.7.4