From 63847d04e79549f345c29d41f7d6da24cb18687c Mon Sep 17 00:00:00 2001
From: Sebastian Pop <spop@codeaurora.org>
Date: Thu, 14 Jul 2016 12:18:53 +0000
Subject: [PATCH] code hoisting pass based on GVN

This pass hoists duplicated computations in the program. The primary goal of
gvn-hoist is to reduce the size of functions before inline heuristics to reduce
the total cost of function inlining.

Pass written by Sebastian Pop, Aditya Kumar, Xiaoyu Hu, and Brian Rzycki.
Important algorithmic contributions by Daniel Berlin under the form of reviews.

Differential Revision: http://reviews.llvm.org/D19338

llvm-svn: 275401
---
 llvm/include/llvm/InitializePasses.h           |   1 +
 llvm/include/llvm/LinkAllPasses.h              |   1 +
 llvm/include/llvm/Transforms/Scalar.h          |   7 +
 llvm/include/llvm/Transforms/Scalar/GVN.h      |  15 +-
 llvm/lib/Passes/PassRegistry.def               |   1 +
 llvm/lib/Transforms/IPO/PassManagerBuilder.cpp |   1 +
 llvm/lib/Transforms/Scalar/CMakeLists.txt      |   1 +
 llvm/lib/Transforms/Scalar/GVNHoist.cpp        | 820 +++++++++++++++++++++++++
 llvm/lib/Transforms/Scalar/Scalar.cpp          |   5 +
 llvm/test/Transforms/GVN/hoist-pr20242.ll      |  74 +++
 llvm/test/Transforms/GVN/hoist-pr22005.ll      |  30 +
 llvm/test/Transforms/GVN/hoist.ll              | 691 +++++++++++++++++++++
 12 files changed, 1643 insertions(+), 4 deletions(-)
 create mode 100644 llvm/lib/Transforms/Scalar/GVNHoist.cpp
 create mode 100644 llvm/test/Transforms/GVN/hoist-pr20242.ll
 create mode 100644 llvm/test/Transforms/GVN/hoist-pr22005.ll
 create mode 100644 llvm/test/Transforms/GVN/hoist.ll

diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h
index 34b156e..6f8aae7 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -120,6 +120,7 @@ void initializeEarlyIfConverterPass(PassRegistry&);
 void initializeEdgeBundlesPass(PassRegistry&);
 void initializeEfficiencySanitizerPass(PassRegistry&);
 void initializeEliminateAvailableExternallyLegacyPassPass(PassRegistry &);
+void initializeGVNHoistLegacyPassPass(PassRegistry &);
 void initializeExpandISelPseudosPass(PassRegistry&);
 void initializeExpandPostRAPass(PassRegistry&);
 void initializeExternalAAWrapperPassPass(PassRegistry&);
diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h
index a12913e..b2721d0 100644
--- a/llvm/include/llvm/LinkAllPasses.h
+++ b/llvm/include/llvm/LinkAllPasses.h
@@ -160,6 +160,7 @@ namespace {
       (void) llvm::createConstantHoistingPass();
       (void) llvm::createCodeGenPreparePass();
       (void) llvm::createEarlyCSEPass();
+      (void) llvm::createGVNHoistPass();
       (void) llvm::createMergedLoadStoreMotionPass();
       (void) llvm::createGVNPass();
       (void) llvm::createMemCpyOptPass();
diff --git a/llvm/include/llvm/Transforms/Scalar.h b/llvm/include/llvm/Transforms/Scalar.h
index 040e95c..167cc94 100644
--- a/llvm/include/llvm/Transforms/Scalar.h
+++ b/llvm/include/llvm/Transforms/Scalar.h
@@ -326,6 +326,13 @@ FunctionPass *createEarlyCSEPass();
 
 //===----------------------------------------------------------------------===//
 //
+// GVNHoist - This pass performs a simple and fast GVN pass over the dominator
+// tree to hoist common expressions from sibling branches.
+//
+FunctionPass *createGVNHoistPass();
+
+//===----------------------------------------------------------------------===//
+//
 // MergedLoadStoreMotion - This pass merges loads and stores in diamonds. Loads
 // are hoisted into the header, while stores sink into the footer.
 //
diff --git a/llvm/include/llvm/Transforms/Scalar/GVN.h b/llvm/include/llvm/Transforms/Scalar/GVN.h
index 9a53ca3..3bb5ec3 100644
--- a/llvm/include/llvm/Transforms/Scalar/GVN.h
+++ b/llvm/include/llvm/Transforms/Scalar/GVN.h
@@ -58,11 +58,7 @@ public:
   AliasAnalysis *getAliasAnalysis() const { return VN.getAliasAnalysis(); }
   MemoryDependenceResults &getMemDep() const { return *MD; }
 
-private:
-  friend class gvn::GVNLegacyPass;
-
   struct Expression;
-  friend struct DenseMapInfo<Expression>;
 
   /// This class holds the mapping between values and value numbers.  It is used
   /// as an efficient mechanism to determine the expression-wise equivalence of
@@ -104,6 +100,10 @@ private:
     void verifyRemoved(const Value *) const;
   };
 
+private:
+  friend class gvn::GVNLegacyPass;
+  friend struct DenseMapInfo<Expression>;
+
   MemoryDependenceResults *MD;
   DominatorTree *DT;
   const TargetLibraryInfo *TLI;
@@ -228,6 +228,13 @@ private:
 /// loads are eliminated by the pass.
 FunctionPass *createGVNPass(bool NoLoads = false);
 
+/// \brief A simple and fast domtree-based GVN pass to hoist common expressions
+/// from sibling branches.
+struct GVNHoistPass : PassInfoMixin<GVNHoistPass> {
+  /// \brief Run the pass over the function.
+  PreservedAnalyses run(Function &F, AnalysisManager<Function> &AM);
+};
+
 }
 
 #endif
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index ffa0864..c5f9a3c 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -133,6 +133,7 @@ FUNCTION_PASS("correlated-propagation", CorrelatedValuePropagationPass())
 FUNCTION_PASS("dce", DCEPass())
 FUNCTION_PASS("dse", DSEPass())
 FUNCTION_PASS("early-cse", EarlyCSEPass())
+FUNCTION_PASS("gvn-hoist", GVNHoistPass())
 FUNCTION_PASS("instcombine", InstCombinePass())
 FUNCTION_PASS("instsimplify", InstSimplifierPass())
 FUNCTION_PASS("invalidate<all>", InvalidateAllAnalysesPass())
diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
index e283d26..c6feb13 100644
--- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -223,6 +223,7 @@ void PassManagerBuilder::populateFunctionPassManager(
   FPM.add(createCFGSimplificationPass());
   FPM.add(createSROAPass());
   FPM.add(createEarlyCSEPass());
+  FPM.add(createGVNHoistPass());
   FPM.add(createLowerExpectIntrinsicPass());
 }
 
diff --git a/llvm/lib/Transforms/Scalar/CMakeLists.txt b/llvm/lib/Transforms/Scalar/CMakeLists.txt
index ac16203..9f04344 100644
--- a/llvm/lib/Transforms/Scalar/CMakeLists.txt
+++ b/llvm/lib/Transforms/Scalar/CMakeLists.txt
@@ -12,6 +12,7 @@ add_llvm_library(LLVMScalarOpts
   Float2Int.cpp
   GuardWidening.cpp
   GVN.cpp
+  GVNHoist.cpp
   InductiveRangeCheckElimination.cpp
   IndVarSimplify.cpp
   JumpThreading.cpp
diff --git a/llvm/lib/Transforms/Scalar/GVNHoist.cpp b/llvm/lib/Transforms/Scalar/GVNHoist.cpp
new file mode 100644
index 0000000..c0fd9e2
--- /dev/null
+++ b/llvm/lib/Transforms/Scalar/GVNHoist.cpp
@@ -0,0 +1,820 @@
+//===- GVNHoist.cpp - Hoist scalar and load expressions -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass hoists expressions from branches to a common dominator. It uses
+// GVN (global value numbering) to discover expressions computing the same
+// values. The primary goal is to reduce the code size, and in some
+// cases reduce critical path (by exposing more ILP).
+// Hoisting may affect the performance in some cases. To mitigate that, hoisting
+// is disabled in the following cases.
+// 1. Scalars across calls.
+// 2. geps when corresponding load/store cannot be hoisted.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Scalar/GVN.h"
+#include "llvm/Transforms/Utils/MemorySSA.h"
+#include <functional>
+#include <unordered_map>
+#include <vector>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "gvn-hoist"
+
+STATISTIC(NumHoisted, "Number of instructions hoisted");
+STATISTIC(NumRemoved, "Number of instructions removed");
+STATISTIC(NumLoadsHoisted, "Number of loads hoisted");
+STATISTIC(NumLoadsRemoved, "Number of loads removed");
+STATISTIC(NumStoresHoisted, "Number of stores hoisted");
+STATISTIC(NumStoresRemoved, "Number of stores removed");
+STATISTIC(NumCallsHoisted, "Number of calls hoisted");
+STATISTIC(NumCallsRemoved, "Number of calls removed");
+
+static cl::opt<int>
+    MaxHoistedThreshold("gvn-max-hoisted", cl::Hidden, cl::init(-1),
+                        cl::desc("Max number of instructions to hoist "
+                                 "(default unlimited = -1)"));
+static cl::opt<int> MaxNumberOfBBSInPath(
+    "gvn-hoist-max-bbs", cl::Hidden, cl::init(4),
+    cl::desc("Max number of basic blocks on the path between "
+             "hoisting locations (default = 4, unlimited = -1)"));
+
+static int HoistedCtr = 0;
+
+namespace {
+
+// Provides a sorting function based on the execution order of two instructions.
+struct SortByDFSIn {
+private:
+  DenseMap<const BasicBlock *, unsigned> &DFSNumber;
+
+public:
+  SortByDFSIn(DenseMap<const BasicBlock *, unsigned> &D) : DFSNumber(D) {}
+
+  // Returns true when A executes before B.
+  bool operator()(const Instruction *A, const Instruction *B) const {
+    assert(A != B);
+    const BasicBlock *BA = A->getParent();
+    const BasicBlock *BB = B->getParent();
+    unsigned NA = DFSNumber[BA];
+    unsigned NB = DFSNumber[BB];
+    if (NA < NB)
+      return true;
+    if (NA == NB) {
+      // Sort them in the order they occur in the same basic block.
+      BasicBlock::const_iterator AI(A), BI(B);
+      return std::distance(AI, BI) < 0;
+    }
+    return false;
+  }
+};
+
+// A map from a VN (value number) to all the instructions with that VN.
+typedef DenseMap<unsigned, SmallVector<Instruction *, 4>> VNtoInsns;
+
+// Records all scalar instructions candidate for code hoisting.
+class InsnInfo {
+  VNtoInsns VNtoScalars;
+
+public:
+  // Inserts I and its value number in VNtoScalars.
+  void insert(Instruction *I, GVN::ValueTable &VN) {
+    // Scalar instruction.
+    unsigned V = VN.lookupOrAdd(I);
+    VNtoScalars[V].push_back(I);
+  }
+
+  const VNtoInsns &getVNTable() const { return VNtoScalars; }
+};
+
+// Records all load instructions candidate for code hoisting.
+class LoadInfo {
+  VNtoInsns VNtoLoads;
+
+public:
+  // Insert Load and the value number of its memory address in VNtoLoads.
+  void insert(LoadInst *Load, GVN::ValueTable &VN) {
+    if (Load->isSimple()) {
+      unsigned V = VN.lookupOrAdd(Load->getPointerOperand());
+      VNtoLoads[V].push_back(Load);
+    }
+  }
+
+  const VNtoInsns &getVNTable() const { return VNtoLoads; }
+};
+
+// Records all store instructions candidate for code hoisting.
+class StoreInfo {
+  VNtoInsns VNtoStores;
+
+public:
+  // Insert the Store and a hash number of the store address and the stored
+  // value in VNtoStores.
+  void insert(StoreInst *Store, GVN::ValueTable &VN) {
+    if (!Store->isSimple())
+      return;
+    // Hash the store address and the stored value.
+    Value *Ptr = Store->getPointerOperand();
+    Value *Val = Store->getValueOperand();
+    VNtoStores[hash_combine(VN.lookupOrAdd(Ptr), VN.lookupOrAdd(Val))]
+        .push_back(Store);
+  }
+
+  const VNtoInsns &getVNTable() const { return VNtoStores; }
+};
+
+// Records all call instructions candidate for code hoisting.
+class CallInfo {
+  VNtoInsns VNtoCallsScalars;
+  VNtoInsns VNtoCallsLoads;
+  VNtoInsns VNtoCallsStores;
+
+public:
+  // Insert Call and its value numbering in one of the VNtoCalls* containers.
+  void insert(CallInst *Call, GVN::ValueTable &VN) {
+    // A call that doesNotAccessMemory is handled as a Scalar,
+    // onlyReadsMemory will be handled as a Load instruction,
+    // all other calls will be handled as stores.
+    unsigned V = VN.lookupOrAdd(Call);
+
+    if (Call->doesNotAccessMemory())
+      VNtoCallsScalars[V].push_back(Call);
+    else if (Call->onlyReadsMemory())
+      VNtoCallsLoads[V].push_back(Call);
+    else
+      VNtoCallsStores[V].push_back(Call);
+  }
+
+  const VNtoInsns &getScalarVNTable() const { return VNtoCallsScalars; }
+
+  const VNtoInsns &getLoadVNTable() const { return VNtoCallsLoads; }
+
+  const VNtoInsns &getStoreVNTable() const { return VNtoCallsStores; }
+};
+
+typedef DenseMap<const BasicBlock *, bool> BBSideEffectsSet;
+typedef SmallVector<Instruction *, 4> SmallVecInsn;
+typedef SmallVectorImpl<Instruction *> SmallVecImplInsn;
+
+// This pass hoists common computations across branches sharing common
+// dominator. The primary goal is to reduce the code size, and in some
+// cases reduce critical path (by exposing more ILP).
+class GVNHoist {
+public:
+  GVN::ValueTable VN;
+  DominatorTree *DT;
+  AliasAnalysis *AA;
+  MemoryDependenceResults *MD;
+  const bool OptForMinSize;
+  DenseMap<const BasicBlock *, unsigned> DFSNumber;
+  BBSideEffectsSet BBSideEffects;
+  MemorySSA *MSSA;
+  enum InsKind { Unknown, Scalar, Load, Store };
+
+  GVNHoist(DominatorTree *Dt, AliasAnalysis *Aa, MemoryDependenceResults *Md,
+           bool OptForMinSize)
+      : DT(Dt), AA(Aa), MD(Md), OptForMinSize(OptForMinSize) {}
+
+  // Return true when there are exception handling in BB.
+  bool hasEH(const BasicBlock *BB) {
+    auto It = BBSideEffects.find(BB);
+    if (It != BBSideEffects.end())
+      return It->second;
+
+    if (BB->isEHPad() || BB->hasAddressTaken()) {
+      BBSideEffects[BB] = true;
+      return true;
+    }
+
+    if (BB->getTerminator()->mayThrow()) {
+      BBSideEffects[BB] = true;
+      return true;
+    }
+
+    BBSideEffects[BB] = false;
+    return false;
+  }
+
+  // Return true when all paths from A to the end of the function pass through
+  // either B or C.
+  bool hoistingFromAllPaths(const BasicBlock *A, const BasicBlock *B,
+                            const BasicBlock *C) {
+    // We fully copy the WL in order to be able to remove items from it.
+    SmallPtrSet<const BasicBlock *, 2> WL;
+    WL.insert(B);
+    WL.insert(C);
+
+    for (auto It = df_begin(A), E = df_end(A); It != E;) {
+      // There exists a path from A to the exit of the function if we are still
+      // iterating in DF traversal and we removed all instructions from the work
+      // list.
+      if (WL.empty())
+        return false;
+
+      const BasicBlock *BB = *It;
+      if (WL.erase(BB)) {
+        // Stop DFS traversal when BB is in the work list.
+        It.skipChildren();
+        continue;
+      }
+
+      // Check for end of function, calls that do not return, etc.
+      if (!isGuaranteedToTransferExecutionToSuccessor(BB->getTerminator()))
+        return false;
+
+      // Increment DFS traversal when not skipping children.
+      ++It;
+    }
+
+    return true;
+  }
+
+  /* Return true when I1 appears before I2 in the instructions of BB.  */
+  bool firstInBB(BasicBlock *BB, const Instruction *I1, const Instruction *I2) {
+    for (Instruction &I : *BB) {
+      if (&I == I1)
+        return true;
+      if (&I == I2)
+        return false;
+    }
+
+    llvm_unreachable("I1 and I2 not found in BB");
+  }
+  // Return true when there are users of Def in BB.
+  bool hasMemoryUseOnPath(MemoryAccess *Def, const BasicBlock *BB,
+                          const Instruction *OldPt) {
+    Value::user_iterator UI = Def->user_begin();
+    Value::user_iterator UE = Def->user_end();
+    const BasicBlock *DefBB = Def->getBlock();
+    const BasicBlock *OldBB = OldPt->getParent();
+
+    for (; UI != UE; ++UI)
+      if (MemoryUse *U = dyn_cast<MemoryUse>(*UI)) {
+        BasicBlock *UBB = U->getBlock();
+        // Only analyze uses in BB.
+        if (BB != UBB)
+          continue;
+
+        // A use in the same block as the Def is on the path.
+        if (UBB == DefBB) {
+          assert(MSSA->locallyDominates(Def, U) && "def not dominating use");
+          return true;
+        }
+
+        if (UBB != OldBB)
+          return true;
+
+        // It is only harmful to hoist when the use is before OldPt.
+        if (firstInBB(UBB, U->getMemoryInst(), OldPt))
+          return true;
+      }
+
+    return false;
+  }
+
+  // Return true when there are exception handling or loads of memory Def
+  // between OldPt and NewPt.
+
+  // Decrement by 1 NBBsOnAllPaths for each block between HoistPt and BB, and
+  // return true when the counter NBBsOnAllPaths reaces 0, except when it is
+  // initialized to -1 which is unlimited.
+  bool hasEHOrLoadsOnPath(const Instruction *NewPt, const Instruction *OldPt,
+                          MemoryAccess *Def, int &NBBsOnAllPaths) {
+    const BasicBlock *NewBB = NewPt->getParent();
+    const BasicBlock *OldBB = OldPt->getParent();
+    assert(DT->dominates(NewBB, OldBB) && "invalid path");
+    assert(DT->dominates(Def->getBlock(), NewBB) &&
+           "def does not dominate new hoisting point");
+
+    // Walk all basic blocks reachable in depth-first iteration on the inverse
+    // CFG from OldBB to NewBB. These blocks are all the blocks that may be
+    // executed between the execution of NewBB and OldBB. Hoisting an expression
+    // from OldBB into NewBB has to be safe on all execution paths.
+    for (auto I = idf_begin(OldBB), E = idf_end(OldBB); I != E;) {
+      if (*I == NewBB) {
+        // Stop traversal when reaching HoistPt.
+        I.skipChildren();
+        continue;
+      }
+
+      // Impossible to hoist with exceptions on the path.
+      if (hasEH(*I))
+        return true;
+
+      // Check that we do not move a store past loads.
+      if (hasMemoryUseOnPath(Def, *I, OldPt))
+        return true;
+
+      // Stop walk once the limit is reached.
+      if (NBBsOnAllPaths == 0)
+        return true;
+
+      // -1 is unlimited number of blocks on all paths.
+      if (NBBsOnAllPaths != -1)
+        --NBBsOnAllPaths;
+
+      ++I;
+    }
+
+    return false;
+  }
+
+  // Return true when there are exception handling between HoistPt and BB.
+  // Decrement by 1 NBBsOnAllPaths for each block between HoistPt and BB, and
+  // return true when the counter NBBsOnAllPaths reaches 0, except when it is
+  // initialized to -1 which is unlimited.
+  bool hasEHOnPath(const BasicBlock *HoistPt, const BasicBlock *BB,
+                   int &NBBsOnAllPaths) {
+    assert(DT->dominates(HoistPt, BB) && "Invalid path");
+
+    // Walk all basic blocks reachable in depth-first iteration on
+    // the inverse CFG from BBInsn to NewHoistPt. These blocks are all the
+    // blocks that may be executed between the execution of NewHoistPt and
+    // BBInsn. Hoisting an expression from BBInsn into NewHoistPt has to be safe
+    // on all execution paths.
+    for (auto I = idf_begin(BB), E = idf_end(BB); I != E;) {
+      if (*I == HoistPt) {
+        // Stop traversal when reaching NewHoistPt.
+        I.skipChildren();
+        continue;
+      }
+
+      // Impossible to hoist with exceptions on the path.
+      if (hasEH(*I))
+        return true;
+
+      // Stop walk once the limit is reached.
+      if (NBBsOnAllPaths == 0)
+        return true;
+
+      // -1 is unlimited number of blocks on all paths.
+      if (NBBsOnAllPaths != -1)
+        --NBBsOnAllPaths;
+
+      ++I;
+    }
+
+    return false;
+  }
+
+  // Return true when it is safe to hoist a memory load or store U from OldPt
+  // to NewPt.
+  bool safeToHoistLdSt(const Instruction *NewPt, const Instruction *OldPt,
+                       MemoryUseOrDef *U, InsKind K, int &NBBsOnAllPaths) {
+
+    // In place hoisting is safe.
+    if (NewPt == OldPt)
+      return true;
+
+    const BasicBlock *NewBB = NewPt->getParent();
+    const BasicBlock *OldBB = OldPt->getParent();
+    const BasicBlock *UBB = U->getBlock();
+
+    // Check for dependences on the Memory SSA.
+    MemoryAccess *D = U->getDefiningAccess();
+    BasicBlock *DBB = D->getBlock();
+    if (DT->properlyDominates(NewBB, DBB))
+      // Cannot move the load or store to NewBB above its definition in DBB.
+      return false;
+
+    if (NewBB == DBB && !MSSA->isLiveOnEntryDef(D))
+      if (MemoryUseOrDef *UD = dyn_cast<MemoryUseOrDef>(D))
+        if (firstInBB(DBB, NewPt, UD->getMemoryInst()))
+          // Cannot move the load or store to NewPt above its definition in D.
+          return false;
+
+    // Check for unsafe hoistings due to side effects.
+    if (K == InsKind::Store) {
+      if (hasEHOrLoadsOnPath(NewPt, OldPt, D, NBBsOnAllPaths))
+        return false;
+    } else if (hasEHOnPath(NewBB, OldBB, NBBsOnAllPaths))
+      return false;
+
+    if (UBB == NewBB) {
+      if (DT->properlyDominates(DBB, NewBB))
+        return true;
+      assert(UBB == DBB);
+      assert(MSSA->locallyDominates(D, U));
+    }
+
+    // No side effects: it is safe to hoist.
+    return true;
+  }
+
+  // Return true when it is safe to hoist scalar instructions from BB1 and BB2
+  // to HoistBB.
+  bool safeToHoistScalar(const BasicBlock *HoistBB, const BasicBlock *BB1,
+                         const BasicBlock *BB2, int &NBBsOnAllPaths) {
+    // Check that the hoisted expression is needed on all paths.  When HoistBB
+    // already contains an instruction to be hoisted, the expression is needed
+    // on all paths.  Enable scalar hoisting at -Oz as it is safe to hoist
+    // scalars to a place where they are partially needed.
+    if (!OptForMinSize && BB1 != HoistBB &&
+        !hoistingFromAllPaths(HoistBB, BB1, BB2))
+      return false;
+
+    if (hasEHOnPath(HoistBB, BB1, NBBsOnAllPaths) ||
+        hasEHOnPath(HoistBB, BB2, NBBsOnAllPaths))
+      return false;
+
+    // Safe to hoist scalars from BB1 and BB2 to HoistBB.
+    return true;
+  }
+
+  // Each element of a hoisting list contains the basic block where to hoist and
+  // a list of instructions to be hoisted.
+  typedef std::pair<BasicBlock *, SmallVecInsn> HoistingPointInfo;
+  typedef SmallVector<HoistingPointInfo, 4> HoistingPointList;
+
+  // Partition InstructionsToHoist into a set of candidates which can share a
+  // common hoisting point. The partitions are collected in HPL. IsScalar is
+  // true when the instructions in InstructionsToHoist are scalars. IsLoad is
+  // true when the InstructionsToHoist are loads, false when they are stores.
+  void partitionCandidates(SmallVecImplInsn &InstructionsToHoist,
+                           HoistingPointList &HPL, InsKind K) {
+    // No need to sort for two instructions.
+    if (InstructionsToHoist.size() > 2) {
+      SortByDFSIn Pred(DFSNumber);
+      std::sort(InstructionsToHoist.begin(), InstructionsToHoist.end(), Pred);
+    }
+
+    int NBBsOnAllPaths = MaxNumberOfBBSInPath;
+
+    SmallVecImplInsn::iterator II = InstructionsToHoist.begin();
+    SmallVecImplInsn::iterator Start = II;
+    Instruction *HoistPt = *II;
+    BasicBlock *HoistBB = HoistPt->getParent();
+    MemoryUseOrDef *UD;
+    if (K != InsKind::Scalar)
+      UD = cast<MemoryUseOrDef>(MSSA->getMemoryAccess(HoistPt));
+
+    for (++II; II != InstructionsToHoist.end(); ++II) {
+      Instruction *Insn = *II;
+      BasicBlock *BB = Insn->getParent();
+      BasicBlock *NewHoistBB;
+      Instruction *NewHoistPt;
+
+      if (BB == HoistBB) {
+        NewHoistBB = HoistBB;
+        NewHoistPt = firstInBB(BB, Insn, HoistPt) ? Insn : HoistPt;
+      } else {
+        NewHoistBB = DT->findNearestCommonDominator(HoistBB, BB);
+        if (NewHoistBB == BB)
+          NewHoistPt = Insn;
+        else if (NewHoistBB == HoistBB)
+          NewHoistPt = HoistPt;
+        else
+          NewHoistPt = NewHoistBB->getTerminator();
+      }
+
+      if (K == InsKind::Scalar) {
+        if (safeToHoistScalar(NewHoistBB, HoistBB, BB, NBBsOnAllPaths)) {
+          // Extend HoistPt to NewHoistPt.
+          HoistPt = NewHoistPt;
+          HoistBB = NewHoistBB;
+          continue;
+        }
+      } else {
+        // When NewBB already contains an instruction to be hoisted, the
+        // expression is needed on all paths.
+        // Check that the hoisted expression is needed on all paths: it is
+        // unsafe to hoist loads to a place where there may be a path not
+        // loading from the same address: for instance there may be a branch on
+        // which the address of the load may not be initialized.
+        if ((HoistBB == NewHoistBB || BB == NewHoistBB ||
+             hoistingFromAllPaths(NewHoistBB, HoistBB, BB)) &&
+            // Also check that it is safe to move the load or store from HoistPt
+            // to NewHoistPt, and from Insn to NewHoistPt.
+            safeToHoistLdSt(NewHoistPt, HoistPt, UD, K, NBBsOnAllPaths) &&
+            safeToHoistLdSt(NewHoistPt, Insn,
+                            cast<MemoryUseOrDef>(MSSA->getMemoryAccess(Insn)),
+                            K, NBBsOnAllPaths)) {
+          // Extend HoistPt to NewHoistPt.
+          HoistPt = NewHoistPt;
+          HoistBB = NewHoistBB;
+          continue;
+        }
+      }
+
+      // At this point it is not safe to extend the current hoisting to
+      // NewHoistPt: save the hoisting list so far.
+      if (std::distance(Start, II) > 1)
+        HPL.push_back(std::make_pair(HoistBB, SmallVecInsn(Start, II)));
+
+      // Start over from BB.
+      Start = II;
+      if (K != InsKind::Scalar)
+        UD = cast<MemoryUseOrDef>(MSSA->getMemoryAccess(*Start));
+      HoistPt = Insn;
+      HoistBB = BB;
+      NBBsOnAllPaths = MaxNumberOfBBSInPath;
+    }
+
+    // Save the last partition.
+    if (std::distance(Start, II) > 1)
+      HPL.push_back(std::make_pair(HoistBB, SmallVecInsn(Start, II)));
+  }
+
+  // Initialize HPL from Map.
+  void computeInsertionPoints(const VNtoInsns &Map, HoistingPointList &HPL,
+                              InsKind K) {
+    for (VNtoInsns::const_iterator It = Map.begin(); It != Map.end(); ++It) {
+      if (MaxHoistedThreshold != -1 && ++HoistedCtr > MaxHoistedThreshold)
+        return;
+
+      const SmallVecInsn &V = It->second;
+      if (V.size() < 2)
+        continue;
+
+      // Compute the insertion point and the list of expressions to be hoisted.
+      SmallVecInsn InstructionsToHoist;
+      for (auto I : V)
+        if (!hasEH(I->getParent()))
+          InstructionsToHoist.push_back(I);
+
+      if (InstructionsToHoist.size())
+        partitionCandidates(InstructionsToHoist, HPL, K);
+    }
+  }
+
+  // Return true when all operands of Instr are available at insertion point
+  // HoistPt. When limiting the number of hoisted expressions, one could hoist
+  // a load without hoisting its access function. So before hoisting any
+  // expression, make sure that all its operands are available at insert point.
+  bool allOperandsAvailable(const Instruction *I,
+                            const BasicBlock *HoistPt) const {
+    for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+      const Value *Op = I->getOperand(i);
+      const Instruction *Inst = dyn_cast<Instruction>(Op);
+      if (Inst && !DT->dominates(Inst->getParent(), HoistPt))
+        return false;
+    }
+
+    return true;
+  }
+
+  Instruction *firstOfTwo(Instruction *I, Instruction *J) const {
+    for (Instruction &I1 : *I->getParent())
+      if (&I1 == I || &I1 == J)
+        return &I1;
+    llvm_unreachable("Both I and J must be from same BB");
+  }
+
+  // Replace the use of From with To in Insn.
+  void replaceUseWith(Instruction *Insn, Value *From, Value *To) const {
+    for (Value::use_iterator UI = From->use_begin(), UE = From->use_end();
+         UI != UE;) {
+      Use &U = *UI++;
+      if (U.getUser() == Insn) {
+        U.set(To);
+        return;
+      }
+    }
+    llvm_unreachable("should replace exactly once");
+  }
+
+  bool makeOperandsAvailable(Instruction *Repl, BasicBlock *HoistPt) const {
+    // Check whether the GEP of a ld/st can be synthesized at HoistPt.
+    Instruction *Gep = nullptr;
+    Instruction *Val = nullptr;
+    if (LoadInst *Ld = dyn_cast<LoadInst>(Repl))
+      Gep = dyn_cast<Instruction>(Ld->getPointerOperand());
+    if (StoreInst *St = dyn_cast<StoreInst>(Repl)) {
+      Gep = dyn_cast<Instruction>(St->getPointerOperand());
+      Val = dyn_cast<Instruction>(St->getValueOperand());
+    }
+
+    if (!Gep || !isa<GetElementPtrInst>(Gep))
+      return false;
+
+    // Check whether we can compute the Gep at HoistPt.
+    if (!allOperandsAvailable(Gep, HoistPt))
+      return false;
+
+    // Also check that the stored value is available.
+    if (Val && !allOperandsAvailable(Val, HoistPt))
+      return false;
+
+    // Copy the gep before moving the ld/st.
+    Instruction *ClonedGep = Gep->clone();
+    ClonedGep->insertBefore(HoistPt->getTerminator());
+    replaceUseWith(Repl, Gep, ClonedGep);
+
+    // Also copy Val when it is a gep: geps are not hoisted by default.
+    if (Val && isa<GetElementPtrInst>(Val)) {
+      Instruction *ClonedVal = Val->clone();
+      ClonedVal->insertBefore(HoistPt->getTerminator());
+      replaceUseWith(Repl, Val, ClonedVal);
+    }
+
+    return true;
+  }
+
+  std::pair<unsigned, unsigned> hoist(HoistingPointList &HPL) {
+    unsigned NI = 0, NL = 0, NS = 0, NC = 0, NR = 0;
+    for (const HoistingPointInfo &HP : HPL) {
+      // Find out whether we already have one of the instructions in HoistPt,
+      // in which case we do not have to move it.
+      BasicBlock *HoistPt = HP.first;
+      const SmallVecInsn &InstructionsToHoist = HP.second;
+      Instruction *Repl = nullptr;
+      for (Instruction *I : InstructionsToHoist)
+        if (I->getParent() == HoistPt) {
+          // If there are two instructions in HoistPt to be hoisted in place:
+          // update Repl to be the first one, such that we can rename the uses
+          // of the second based on the first.
+          Repl = !Repl ? I : firstOfTwo(Repl, I);
+        }
+
+      if (Repl) {
+        // Repl is already in HoistPt: it remains in place.
+        assert(allOperandsAvailable(Repl, HoistPt) &&
+               "instruction depends on operands that are not available");
+      } else {
+        // When we do not find Repl in HoistPt, select the first in the list
+        // and move it to HoistPt.
+        Repl = InstructionsToHoist.front();
+
+        // We can move Repl in HoistPt only when all operands are available.
+        // The order in which hoistings are done may influence the availability
+        // of operands.
+        if (!allOperandsAvailable(Repl, HoistPt) &&
+            !makeOperandsAvailable(Repl, HoistPt))
+          continue;
+        Repl->moveBefore(HoistPt->getTerminator());
+      }
+
+      if (isa<LoadInst>(Repl))
+        ++NL;
+      else if (isa<StoreInst>(Repl))
+        ++NS;
+      else if (isa<CallInst>(Repl))
+        ++NC;
+      else // Scalar
+        ++NI;
+
+      // Remove and rename all other instructions.
+      for (Instruction *I : InstructionsToHoist)
+        if (I != Repl) {
+          ++NR;
+          if (isa<LoadInst>(Repl))
+            ++NumLoadsRemoved;
+          else if (isa<StoreInst>(Repl))
+            ++NumStoresRemoved;
+          else if (isa<CallInst>(Repl))
+            ++NumCallsRemoved;
+          I->replaceAllUsesWith(Repl);
+          I->eraseFromParent();
+        }
+    }
+
+    NumHoisted += NL + NS + NC + NI;
+    NumRemoved += NR;
+    NumLoadsHoisted += NL;
+    NumStoresHoisted += NS;
+    NumCallsHoisted += NC;
+    return {NI, NL + NC + NS};
+  }
+
+  // Hoist all expressions. Returns Number of scalars hoisted
+  // and number of non-scalars hoisted.
+  std::pair<unsigned, unsigned> hoistExpressions(Function &F) {
+    InsnInfo II;
+    LoadInfo LI;
+    StoreInfo SI;
+    CallInfo CI;
+    for (BasicBlock *BB : depth_first(&F.getEntryBlock())) {
+      for (Instruction &I1 : *BB) {
+        if (LoadInst *Load = dyn_cast<LoadInst>(&I1))
+          LI.insert(Load, VN);
+        else if (StoreInst *Store = dyn_cast<StoreInst>(&I1))
+          SI.insert(Store, VN);
+        else if (CallInst *Call = dyn_cast<CallInst>(&I1)) {
+          if (IntrinsicInst *Intr = dyn_cast<IntrinsicInst>(Call)) {
+            if (isa<DbgInfoIntrinsic>(Intr) ||
+                Intr->getIntrinsicID() == Intrinsic::assume)
+              continue;
+          }
+          if (Call->mayHaveSideEffects()) {
+            if (!OptForMinSize)
+              break;
+            // We may continue hoisting across calls which write to memory.
+            if (Call->mayThrow())
+              break;
+          }
+          CI.insert(Call, VN);
+        } else if (OptForMinSize || !isa<GetElementPtrInst>(&I1))
+          // Do not hoist scalars past calls that may write to memory because
+          // that could result in spills later. geps are handled separately.
+          // TODO: We can relax this for targets like AArch64 as they have more
+          // registers than X86.
+          II.insert(&I1, VN);
+      }
+    }
+
+    HoistingPointList HPL;
+    computeInsertionPoints(II.getVNTable(), HPL, InsKind::Scalar);
+    computeInsertionPoints(LI.getVNTable(), HPL, InsKind::Load);
+    computeInsertionPoints(SI.getVNTable(), HPL, InsKind::Store);
+    computeInsertionPoints(CI.getScalarVNTable(), HPL, InsKind::Scalar);
+    computeInsertionPoints(CI.getLoadVNTable(), HPL, InsKind::Load);
+    computeInsertionPoints(CI.getStoreVNTable(), HPL, InsKind::Store);
+    return hoist(HPL);
+  }
+
+  bool run(Function &F) {
+    VN.setDomTree(DT);
+    VN.setAliasAnalysis(AA);
+    VN.setMemDep(MD);
+    bool Res = false;
+
+    unsigned I = 0;
+    for (const BasicBlock *BB : depth_first(&F.getEntryBlock()))
+      DFSNumber.insert(std::make_pair(BB, ++I));
+
+    // FIXME: use lazy evaluation of VN to avoid the fix-point computation.
+    while (1) {
+      // FIXME: only compute MemorySSA once. We need to update the analysis in
+      // the same time as transforming the code.
+      MemorySSA M(F, AA, DT);
+      MSSA = &M;
+
+      auto HoistStat = hoistExpressions(F);
+      if (HoistStat.first + HoistStat.second == 0) {
+        return Res;
+      }
+      if (HoistStat.second > 0) {
+        // To address a limitation of the current GVN, we need to rerun the
+        // hoisting after we hoisted loads in order to be able to hoist all
+        // scalars dependent on the hoisted loads. Same for stores.
+        VN.clear();
+      }
+      Res = true;
+    }
+
+    return Res;
+  }
+};
+
+class GVNHoistLegacyPass : public FunctionPass {
+public:
+  static char ID;
+
+  GVNHoistLegacyPass() : FunctionPass(ID) {
+    initializeGVNHoistLegacyPassPass(*PassRegistry::getPassRegistry());
+  }
+
+  bool runOnFunction(Function &F) override {
+    auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+    auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
+    auto &MD = getAnalysis<MemoryDependenceWrapperPass>().getMemDep();
+
+    GVNHoist G(&DT, &AA, &MD, F.optForMinSize());
+    return G.run(F);
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<DominatorTreeWrapperPass>();
+    AU.addRequired<AAResultsWrapperPass>();
+    AU.addRequired<MemoryDependenceWrapperPass>();
+    AU.addPreserved<DominatorTreeWrapperPass>();
+  }
+};
+} // namespace
+
+PreservedAnalyses GVNHoistPass::run(Function &F,
+                                    AnalysisManager<Function> &AM) {
+  DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(F);
+  AliasAnalysis &AA = AM.getResult<AAManager>(F);
+  MemoryDependenceResults &MD = AM.getResult<MemoryDependenceAnalysis>(F);
+
+  GVNHoist G(&DT, &AA, &MD, F.optForMinSize());
+  if (!G.run(F))
+    return PreservedAnalyses::all();
+
+  PreservedAnalyses PA;
+  PA.preserve<DominatorTreeAnalysis>();
+  return PA;
+}
+
+char GVNHoistLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(GVNHoistLegacyPass, "gvn-hoist",
+                      "Early GVN Hoisting of Expressions", false, false)
+INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_END(GVNHoistLegacyPass, "gvn-hoist",
+                    "Early GVN Hoisting of Expressions", false, false)
+
+FunctionPass *llvm::createGVNHoistPass() { return new GVNHoistLegacyPass(); }
diff --git a/llvm/lib/Transforms/Scalar/Scalar.cpp b/llvm/lib/Transforms/Scalar/Scalar.cpp
index 13610fa..e57fda3 100644
--- a/llvm/lib/Transforms/Scalar/Scalar.cpp
+++ b/llvm/lib/Transforms/Scalar/Scalar.cpp
@@ -44,6 +44,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
   initializeGuardWideningLegacyPassPass(Registry);
   initializeGVNLegacyPassPass(Registry);
   initializeEarlyCSELegacyPassPass(Registry);
+  initializeGVNHoistLegacyPassPass(Registry);
   initializeFlattenCFGPassPass(Registry);
   initializeInductiveRangeCheckEliminationPass(Registry);
   initializeIndVarSimplifyLegacyPassPass(Registry);
@@ -236,6 +237,10 @@ void LLVMAddEarlyCSEPass(LLVMPassManagerRef PM) {
   unwrap(PM)->add(createEarlyCSEPass());
 }
 
+void LLVMAddGVNHoistLegacyPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createGVNHoistPass());
+}
+
 void LLVMAddTypeBasedAliasAnalysisPass(LLVMPassManagerRef PM) {
   unwrap(PM)->add(createTypeBasedAAWrapperPass());
 }
diff --git a/llvm/test/Transforms/GVN/hoist-pr20242.ll b/llvm/test/Transforms/GVN/hoist-pr20242.ll
new file mode 100644
index 0000000..b91f18a
--- /dev/null
+++ b/llvm/test/Transforms/GVN/hoist-pr20242.ll
@@ -0,0 +1,74 @@
+; RUN: opt -gvn-hoist -S < %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Check that all "or" expressions are hoisted.
+; CHECK-LABEL: @encode
+; CHECK: or i32
+; CHECK-NOT: or i32
+
+define i8* @encode(i8* %p, i32 %v) {
+entry:
+  %p.addr = alloca i8*, align 8
+  %v.addr = alloca i32, align 4
+  store i8* %p, i8** %p.addr, align 8
+  store i32 %v, i32* %v.addr, align 4
+  %0 = load i32, i32* %v.addr, align 4
+  %cmp = icmp ult i32 %0, 23
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:                                          ; preds = %entry
+  %1 = load i32, i32* %v.addr, align 4
+  %or = or i32 %1, 128
+  %conv = trunc i32 %or to i8
+  %2 = load i8*, i8** %p.addr, align 8
+  %incdec.ptr = getelementptr inbounds i8, i8* %2, i32 1
+  store i8* %incdec.ptr, i8** %p.addr, align 8
+  store i8 %conv, i8* %2, align 1
+  br label %if.end15
+
+if.else:                                          ; preds = %entry
+  %3 = load i32, i32* %v.addr, align 4
+  %cmp1 = icmp ult i32 %3, 42
+  br i1 %cmp1, label %if.then3, label %if.else9
+
+if.then3:                                         ; preds = %if.else
+  %4 = load i32, i32* %v.addr, align 4
+  %or4 = or i32 %4, 128
+  %conv5 = trunc i32 %or4 to i8
+  %5 = load i8*, i8** %p.addr, align 8
+  %incdec.ptr6 = getelementptr inbounds i8, i8* %5, i32 1
+  store i8* %incdec.ptr6, i8** %p.addr, align 8
+  store i8 %conv5, i8* %5, align 1
+  %6 = load i32, i32* %v.addr, align 4
+  %conv7 = trunc i32 %6 to i8
+  %7 = load i8*, i8** %p.addr, align 8
+  %incdec.ptr8 = getelementptr inbounds i8, i8* %7, i32 1
+  store i8* %incdec.ptr8, i8** %p.addr, align 8
+  store i8 %conv7, i8* %7, align 1
+  br label %if.end
+
+if.else9:                                         ; preds = %if.else
+  %8 = load i32, i32* %v.addr, align 4
+  %or10 = or i32 %8, 128
+  %conv11 = trunc i32 %or10 to i8
+  %9 = load i8*, i8** %p.addr, align 8
+  %incdec.ptr12 = getelementptr inbounds i8, i8* %9, i32 1
+  store i8* %incdec.ptr12, i8** %p.addr, align 8
+  store i8 %conv11, i8* %9, align 1
+  %10 = load i32, i32* %v.addr, align 4
+  %shr = lshr i32 %10, 7
+  %conv13 = trunc i32 %shr to i8
+  %11 = load i8*, i8** %p.addr, align 8
+  %incdec.ptr14 = getelementptr inbounds i8, i8* %11, i32 1
+  store i8* %incdec.ptr14, i8** %p.addr, align 8
+  store i8 %conv13, i8* %11, align 1
+  br label %if.end
+
+if.end:                                           ; preds = %if.else9, %if.then3
+  br label %if.end15
+
+if.end15:                                         ; preds = %if.end, %if.then
+  %12 = load i8*, i8** %p.addr, align 8
+  ret i8* %12
+}
diff --git a/llvm/test/Transforms/GVN/hoist-pr22005.ll b/llvm/test/Transforms/GVN/hoist-pr22005.ll
new file mode 100644
index 0000000..9299f4f
--- /dev/null
+++ b/llvm/test/Transforms/GVN/hoist-pr22005.ll
@@ -0,0 +1,30 @@
+; RUN: opt -gvn-hoist -S < %s | FileCheck %s
+target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Check that all "sub" expressions are hoisted.
+; CHECK-LABEL: @fun
+; CHECK: sub i64
+; CHECK-NOT: sub i64
+
+define i64 @fun(i8* %out, i8* %end) {
+  %1 = icmp ult i8* %out, %end
+  br i1 %1, label %2, label %6
+
+; <label>:2                                       ; preds = %0
+  %3 = ptrtoint i8* %end to i64
+  %4 = ptrtoint i8* %out to i64
+  %5 = sub i64 %3, %4
+  br label %10
+
+; <label>:6                                       ; preds = %0
+  %7 = ptrtoint i8* %out to i64
+  %8 = ptrtoint i8* %end to i64
+  %9 = sub i64 %8, %7
+  br label %10
+
+; <label>:10                                      ; preds = %6, %2
+  %.in = phi i64 [ %5, %2 ], [ %9, %6 ]
+  %11 = add i64 %.in, 257
+  ret i64 %11
+}
diff --git a/llvm/test/Transforms/GVN/hoist.ll b/llvm/test/Transforms/GVN/hoist.ll
new file mode 100644
index 0000000..375dd91
--- /dev/null
+++ b/llvm/test/Transforms/GVN/hoist.ll
@@ -0,0 +1,691 @@
+; RUN: opt -gvn-hoist -S < %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@GlobalVar = internal global float 1.000000e+00
+
+; Check that all scalar expressions are hoisted.
+;
+; CHECK-LABEL: @scalarsHoisting
+; CHECK: fsub
+; CHECK: fmul
+; CHECK: fsub
+; CHECK: fmul
+; CHECK-NOT: fmul
+; CHECK-NOT: fsub
+define float @scalarsHoisting(float %d, float %min, float %max, float %a) {
+entry:
+  %div = fdiv float 1.000000e+00, %d
+  %cmp = fcmp oge float %div, 0.000000e+00
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:                                          ; preds = %entry
+  %sub = fsub float %min, %a
+  %mul = fmul float %sub, %div
+  %sub1 = fsub float %max, %a
+  %mul2 = fmul float %sub1, %div
+  br label %if.end
+
+if.else:                                          ; preds = %entry
+  %sub3 = fsub float %max, %a
+  %mul4 = fmul float %sub3, %div
+  %sub5 = fsub float %min, %a
+  %mul6 = fmul float %sub5, %div
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+  %tmax.0 = phi float [ %mul2, %if.then ], [ %mul6, %if.else ]
+  %tmin.0 = phi float [ %mul, %if.then ], [ %mul4, %if.else ]
+  %add = fadd float %tmax.0, %tmin.0
+  ret float %add
+}
+
+; Check that all loads and scalars depending on the loads are hoisted.
+; Check that getelementptr computation gets hoisted before the load.
+;
+; CHECK-LABEL: @readsAndScalarsHoisting
+; CHECK: load
+; CHECK: load
+; CHECK: load
+; CHECK: fsub
+; CHECK: fmul
+; CHECK: fsub
+; CHECK: fmul
+; CHECK-NOT: load
+; CHECK-NOT: fmul
+; CHECK-NOT: fsub
+define float @readsAndScalarsHoisting(float %d, float* %min, float* %max, float* %a) {
+entry:
+  %div = fdiv float 1.000000e+00, %d
+  %cmp = fcmp oge float %div, 0.000000e+00
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:                                          ; preds = %entry
+  %A = getelementptr float, float* %min, i32 1
+  %0 = load float, float* %A, align 4
+  %1 = load float, float* %a, align 4
+  %sub = fsub float %0, %1
+  %mul = fmul float %sub, %div
+  %2 = load float, float* %max, align 4
+  %sub1 = fsub float %2, %1
+  %mul2 = fmul float %sub1, %div
+  br label %if.end
+
+if.else:                                          ; preds = %entry
+  %3 = load float, float* %max, align 4
+  %4 = load float, float* %a, align 4
+  %sub3 = fsub float %3, %4
+  %mul4 = fmul float %sub3, %div
+  %B = getelementptr float, float* %min, i32 1
+  %5 = load float, float* %B, align 4
+  %sub5 = fsub float %5, %4
+  %mul6 = fmul float %sub5, %div
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+  %tmax.0 = phi float [ %mul2, %if.then ], [ %mul6, %if.else ]
+  %tmin.0 = phi float [ %mul, %if.then ], [ %mul4, %if.else ]
+  %add = fadd float %tmax.0, %tmin.0
+  ret float %add
+}
+
+; Check that we do not hoist loads after a store: the first two loads will be
+; hoisted, and then the third load will not be hoisted.
+;
+; CHECK-LABEL: @readsAndWrites
+; CHECK: load
+; CHECK: load
+; CHECK: fsub
+; CHECK: fmul
+; CHECK: store
+; CHECK: load
+; CHECK: fsub
+; CHECK: fmul
+; CHECK: load
+; CHECK: fsub
+; CHECK: fmul
+; CHECK-NOT: load
+; CHECK-NOT: fmul
+; CHECK-NOT: fsub
+define float @readsAndWrites(float %d, float* %min, float* %max, float* %a) {
+entry:
+  %div = fdiv float 1.000000e+00, %d
+  %cmp = fcmp oge float %div, 0.000000e+00
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:                                          ; preds = %entry
+  %0 = load float, float* %min, align 4
+  %1 = load float, float* %a, align 4
+  store float %0, float* @GlobalVar
+  %sub = fsub float %0, %1
+  %mul = fmul float %sub, %div
+  %2 = load float, float* %max, align 4
+  %sub1 = fsub float %2, %1
+  %mul2 = fmul float %sub1, %div
+  br label %if.end
+
+if.else:                                          ; preds = %entry
+  %3 = load float, float* %max, align 4
+  %4 = load float, float* %a, align 4
+  %sub3 = fsub float %3, %4
+  %mul4 = fmul float %sub3, %div
+  %5 = load float, float* %min, align 4
+  %sub5 = fsub float %5, %4
+  %mul6 = fmul float %sub5, %div
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+  %tmax.0 = phi float [ %mul2, %if.then ], [ %mul6, %if.else ]
+  %tmin.0 = phi float [ %mul, %if.then ], [ %mul4, %if.else ]
+  %add = fadd float %tmax.0, %tmin.0
+  ret float %add
+}
+
+; Check that we do hoist loads when the store is above the insertion point.
+;
+; CHECK-LABEL: @readsAndWriteAboveInsertPt
+; CHECK: load
+; CHECK: load
+; CHECK: load
+; CHECK: fsub
+; CHECK: fsub
+; CHECK: fmul
+; CHECK: fmul
+; CHECK-NOT: load
+; CHECK-NOT: fmul
+; CHECK-NOT: fsub
+define float @readsAndWriteAboveInsertPt(float %d, float* %min, float* %max, float* %a) {
+entry:
+  %div = fdiv float 1.000000e+00, %d
+  store float 0.000000e+00, float* @GlobalVar
+  %cmp = fcmp oge float %div, 0.000000e+00
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:                                          ; preds = %entry
+  %0 = load float, float* %min, align 4
+  %1 = load float, float* %a, align 4
+  %sub = fsub float %0, %1
+  %mul = fmul float %sub, %div
+  %2 = load float, float* %max, align 4
+  %sub1 = fsub float %2, %1
+  %mul2 = fmul float %sub1, %div
+  br label %if.end
+
+if.else:                                          ; preds = %entry
+  %3 = load float, float* %max, align 4
+  %4 = load float, float* %a, align 4
+  %sub3 = fsub float %3, %4
+  %mul4 = fmul float %sub3, %div
+  %5 = load float, float* %min, align 4
+  %sub5 = fsub float %5, %4
+  %mul6 = fmul float %sub5, %div
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+  %tmax.0 = phi float [ %mul2, %if.then ], [ %mul6, %if.else ]
+  %tmin.0 = phi float [ %mul, %if.then ], [ %mul4, %if.else ]
+  %add = fadd float %tmax.0, %tmin.0
+  ret float %add
+}
+
+; Check that dependent expressions are hoisted.
+; CHECK-LABEL: @dependentScalarsHoisting
+; CHECK: fsub
+; CHECK: fadd
+; CHECK: fdiv
+; CHECK: fmul
+; CHECK-NOT: fsub
+; CHECK-NOT: fadd
+; CHECK-NOT: fdiv
+; CHECK-NOT: fmul
+define float @dependentScalarsHoisting(float %a, float %b, i1 %c) {
+entry:
+  br i1 %c, label %if.then, label %if.else
+
+if.then:
+  %d = fsub float %b, %a
+  %e = fadd float %d, %a
+  %f = fdiv float %e, %a
+  %g = fmul float %f, %a
+  br label %if.end
+
+if.else:
+  %h = fsub float %b, %a
+  %i = fadd float %h, %a
+  %j = fdiv float %i, %a
+  %k = fmul float %j, %a
+  br label %if.end
+
+if.end:
+  %r = phi float [ %g, %if.then ], [ %k, %if.else ]
+  ret float %r
+}
+
+; Check that all independent expressions are hoisted.
+; CHECK-LABEL: @independentScalarsHoisting
+; CHECK: fmul
+; CHECK: fadd
+; CHECK: fdiv
+; CHECK: fsub
+; CHECK-NOT: fsub
+; CHECK-NOT: fdiv
+; CHECK-NOT: fmul
+define float @independentScalarsHoisting(float %a, float %b, i1 %c) {
+entry:
+  br i1 %c, label %if.then, label %if.else
+
+if.then:
+  %d = fadd float %b, %a
+  %e = fsub float %b, %a
+  %f = fdiv float %b, %a
+  %g = fmul float %b, %a
+  br label %if.end
+
+if.else:
+  %i = fadd float %b, %a
+  %h = fsub float %b, %a
+  %j = fdiv float %b, %a
+  %k = fmul float %b, %a
+  br label %if.end
+
+if.end:
+  %p = phi float [ %d, %if.then ], [ %i, %if.else ]
+  %q = phi float [ %e, %if.then ], [ %h, %if.else ]
+  %r = phi float [ %f, %if.then ], [ %j, %if.else ]
+  %s = phi float [ %g, %if.then ], [ %k, %if.else ]
+  %t = fadd float %p, %q
+  %u = fadd float %r, %s
+  %v = fadd float %t, %u
+  ret float %v
+}
+
+; Check that we hoist load and scalar expressions in triangles.
+; CHECK-LABEL: @triangleHoisting
+; CHECK: load
+; CHECK: load
+; CHECK: load
+; CHECK: fsub
+; CHECK: fsub
+; CHECK: fmul
+; CHECK: fmul
+; CHECK-NOT: load
+; CHECK-NOT: fmul
+; CHECK-NOT: fsub
+define float @triangleHoisting(float %d, float* %min, float* %max, float* %a) {
+entry:
+  %div = fdiv float 1.000000e+00, %d
+  %cmp = fcmp oge float %div, 0.000000e+00
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  %0 = load float, float* %min, align 4
+  %1 = load float, float* %a, align 4
+  %sub = fsub float %0, %1
+  %mul = fmul float %sub, %div
+  %2 = load float, float* %max, align 4
+  %sub1 = fsub float %2, %1
+  %mul2 = fmul float %sub1, %div
+  br label %if.end
+
+if.end:                                          ; preds = %entry
+  %p1 = phi float [ %mul2, %if.then ], [ 0.000000e+00, %entry ]
+  %p2 = phi float [ %mul, %if.then ], [ 0.000000e+00, %entry ]
+  %3 = load float, float* %max, align 4
+  %4 = load float, float* %a, align 4
+  %sub3 = fsub float %3, %4
+  %mul4 = fmul float %sub3, %div
+  %5 = load float, float* %min, align 4
+  %sub5 = fsub float %5, %4
+  %mul6 = fmul float %sub5, %div
+
+  %x = fadd float %p1, %mul6
+  %y = fadd float %p2, %mul4
+  %z = fadd float %x, %y
+  ret float %z
+}
+
+; Check that we hoist load and scalar expressions in dominator.
+; CHECK-LABEL: @dominatorHoisting
+; CHECK: load
+; CHECK: load
+; CHECK: fsub
+; CHECK: fmul
+; CHECK: load
+; CHECK: fsub
+; CHECK: fmul
+; CHECK-NOT: load
+; CHECK-NOT: fmul
+; CHECK-NOT: fsub
+define float @dominatorHoisting(float %d, float* %min, float* %max, float* %a) {
+entry:
+  %div = fdiv float 1.000000e+00, %d
+  %0 = load float, float* %min, align 4
+  %1 = load float, float* %a, align 4
+  %sub = fsub float %0, %1
+  %mul = fmul float %sub, %div
+  %2 = load float, float* %max, align 4
+  %sub1 = fsub float %2, %1
+  %mul2 = fmul float %sub1, %div
+  %cmp = fcmp oge float %div, 0.000000e+00
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  %3 = load float, float* %max, align 4
+  %4 = load float, float* %a, align 4
+  %sub3 = fsub float %3, %4
+  %mul4 = fmul float %sub3, %div
+  %5 = load float, float* %min, align 4
+  %sub5 = fsub float %5, %4
+  %mul6 = fmul float %sub5, %div
+  br label %if.end
+
+if.end:                                          ; preds = %entry
+  %p1 = phi float [ %mul4, %if.then ], [ 0.000000e+00, %entry ]
+  %p2 = phi float [ %mul6, %if.then ], [ 0.000000e+00, %entry ]
+
+  %x = fadd float %p1, %mul2
+  %y = fadd float %p2, %mul
+  %z = fadd float %x, %y
+  ret float %z
+}
+
+; Check that we hoist load and scalar expressions in dominator.
+; CHECK-LABEL: @domHoisting
+; CHECK: load
+; CHECK: load
+; CHECK: fsub
+; CHECK: fmul
+; CHECK: load
+; CHECK: fsub
+; CHECK: fmul
+; CHECK-NOT: load
+; CHECK-NOT: fmul
+; CHECK-NOT: fsub
+define float @domHoisting(float %d, float* %min, float* %max, float* %a) {
+entry:
+  %div = fdiv float 1.000000e+00, %d
+  %0 = load float, float* %min, align 4
+  %1 = load float, float* %a, align 4
+  %sub = fsub float %0, %1
+  %mul = fmul float %sub, %div
+  %2 = load float, float* %max, align 4
+  %sub1 = fsub float %2, %1
+  %mul2 = fmul float %sub1, %div
+  %cmp = fcmp oge float %div, 0.000000e+00
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+  %3 = load float, float* %max, align 4
+  %4 = load float, float* %a, align 4
+  %sub3 = fsub float %3, %4
+  %mul4 = fmul float %sub3, %div
+  %5 = load float, float* %min, align 4
+  %sub5 = fsub float %5, %4
+  %mul6 = fmul float %sub5, %div
+  br label %if.end
+
+if.else:
+  %6 = load float, float* %max, align 4
+  %7 = load float, float* %a, align 4
+  %sub9 = fsub float %6, %7
+  %mul10 = fmul float %sub9, %div
+  %8 = load float, float* %min, align 4
+  %sub12 = fsub float %8, %7
+  %mul13 = fmul float %sub12, %div
+  br label %if.end
+
+if.end:
+  %p1 = phi float [ %mul4, %if.then ], [ %mul10, %if.else ]
+  %p2 = phi float [ %mul6, %if.then ], [ %mul13, %if.else ]
+
+  %x = fadd float %p1, %mul2
+  %y = fadd float %p2, %mul
+  %z = fadd float %x, %y
+  ret float %z
+}
+
+; Check that we do not hoist loads past stores within a same basic block.
+; CHECK-LABEL: @noHoistInSingleBBWithStore
+; CHECK: load
+; CHECK: store
+; CHECK: load
+; CHECK: store
+define i32 @noHoistInSingleBBWithStore() {
+entry:
+  %D = alloca i32, align 4
+  %0 = bitcast i32* %D to i8*
+  %bf = load i8, i8* %0, align 4
+  %bf.clear = and i8 %bf, -3
+  store i8 %bf.clear, i8* %0, align 4
+  %bf1 = load i8, i8* %0, align 4
+  %bf.clear1 = and i8 %bf1, 1
+  store i8 %bf.clear1, i8* %0, align 4
+  ret i32 0
+}
+
+; Check that we do not hoist loads past calls within a same basic block.
+; CHECK-LABEL: @noHoistInSingleBBWithCall
+; CHECK: load
+; CHECK: call
+; CHECK: load
+declare void @foo()
+define i32 @noHoistInSingleBBWithCall() {
+entry:
+  %D = alloca i32, align 4
+  %0 = bitcast i32* %D to i8*
+  %bf = load i8, i8* %0, align 4
+  %bf.clear = and i8 %bf, -3
+  call void @foo()
+  %bf1 = load i8, i8* %0, align 4
+  %bf.clear1 = and i8 %bf1, 1
+  ret i32 0
+}
+
+; Check that we do not hoist loads past stores in any branch of a diamond.
+; CHECK-LABEL: @noHoistInDiamondWithOneStore1
+; CHECK: fdiv
+; CHECK: fcmp
+; CHECK: br
+define float @noHoistInDiamondWithOneStore1(float %d, float* %min, float* %max, float* %a) {
+entry:
+  %div = fdiv float 1.000000e+00, %d
+  %cmp = fcmp oge float %div, 0.000000e+00
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:                                          ; preds = %entry
+  store float 0.000000e+00, float* @GlobalVar
+  %0 = load float, float* %min, align 4
+  %1 = load float, float* %a, align 4
+  %sub = fsub float %0, %1
+  %mul = fmul float %sub, %div
+  %2 = load float, float* %max, align 4
+  %sub1 = fsub float %2, %1
+  %mul2 = fmul float %sub1, %div
+  br label %if.end
+
+if.else:                                          ; preds = %entry
+  ; There are no side effects on the if.else branch.
+  %3 = load float, float* %max, align 4
+  %4 = load float, float* %a, align 4
+  %sub3 = fsub float %3, %4
+  %mul4 = fmul float %sub3, %div
+  %5 = load float, float* %min, align 4
+  %sub5 = fsub float %5, %4
+  %mul6 = fmul float %sub5, %div
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+  %tmax.0 = phi float [ %mul2, %if.then ], [ %mul6, %if.else ]
+  %tmin.0 = phi float [ %mul, %if.then ], [ %mul4, %if.else ]
+
+  %6 = load float, float* %max, align 4
+  %7 = load float, float* %a, align 4
+  %sub6 = fsub float %6, %7
+  %mul7 = fmul float %sub6, %div
+  %8 = load float, float* %min, align 4
+  %sub8 = fsub float %8, %7
+  %mul9 = fmul float %sub8, %div
+
+  %add = fadd float %tmax.0, %tmin.0
+  ret float %add
+}
+
+; Check that we do not hoist loads past stores from half diamond.
+; CHECK-LABEL: @noHoistInHalfDiamondPastStore
+; CHECK: load
+; CHECK-NEXT: load
+; CHECK-NEXT: store
+; CHECK-NEXT: br
+; CHECK: load
+; CHECK: load
+; CHECK: load
+; CHECK: br
+define float @noHoistInHalfDiamondPastStore(float %d, float* %min, float* %max, float* %a) {
+entry:
+  %div = fdiv float 1.000000e+00, %d
+  %cmp = fcmp oge float %div, 0.000000e+00
+  %0 = load float, float* %min, align 4
+  %1 = load float, float* %a, align 4
+
+  ; Loads should not be hoisted above this store.
+  store float 0.000000e+00, float* @GlobalVar
+
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+  ; There are no side effects on the if.then branch.
+  %2 = load float, float* %max, align 4
+  %3 = load float, float* %a, align 4
+  %sub3 = fsub float %2, %3
+  %mul4 = fmul float %sub3, %div
+  %4 = load float, float* %min, align 4
+  %sub5 = fsub float %4, %3
+  %mul6 = fmul float %sub5, %div
+  br label %if.end
+
+if.end:
+  %tmax.0 = phi float [ %mul4, %if.then ], [ %0, %entry ]
+  %tmin.0 = phi float [ %mul6, %if.then ], [ %1, %entry ]
+
+  %add = fadd float %tmax.0, %tmin.0
+  ret float %add
+}
+
+; Check that we do not hoist loads past a store in any branch of a diamond.
+; CHECK-LABEL: @noHoistInDiamondWithOneStore2
+; CHECK: fdiv
+; CHECK: fcmp
+; CHECK: br
+define float @noHoistInDiamondWithOneStore2(float %d, float* %min, float* %max, float* %a) {
+entry:
+  %div = fdiv float 1.000000e+00, %d
+  %cmp = fcmp oge float %div, 0.000000e+00
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:                                          ; preds = %entry
+  ; There are no side effects on the if.then branch.
+  %0 = load float, float* %min, align 4
+  %1 = load float, float* %a, align 4
+  %sub = fsub float %0, %1
+  %mul = fmul float %sub, %div
+  %2 = load float, float* %max, align 4
+  %sub1 = fsub float %2, %1
+  %mul2 = fmul float %sub1, %div
+  br label %if.end
+
+if.else:                                          ; preds = %entry
+  store float 0.000000e+00, float* @GlobalVar
+  %3 = load float, float* %max, align 4
+  %4 = load float, float* %a, align 4
+  %sub3 = fsub float %3, %4
+  %mul4 = fmul float %sub3, %div
+  %5 = load float, float* %min, align 4
+  %sub5 = fsub float %5, %4
+  %mul6 = fmul float %sub5, %div
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+  %tmax.0 = phi float [ %mul2, %if.then ], [ %mul6, %if.else ]
+  %tmin.0 = phi float [ %mul, %if.then ], [ %mul4, %if.else ]
+
+  %6 = load float, float* %max, align 4
+  %7 = load float, float* %a, align 4
+  %sub6 = fsub float %6, %7
+  %mul7 = fmul float %sub6, %div
+  %8 = load float, float* %min, align 4
+  %sub8 = fsub float %8, %7
+  %mul9 = fmul float %sub8, %div
+
+  %add = fadd float %tmax.0, %tmin.0
+  ret float %add
+}
+
+; Check that we do not hoist loads outside a loop containing stores.
+; CHECK-LABEL: @noHoistInLoopsWithStores
+; CHECK: fdiv
+; CHECK: fcmp
+; CHECK: br
+define float @noHoistInLoopsWithStores(float %d, float* %min, float* %max, float* %a) {
+entry:
+  %div = fdiv float 1.000000e+00, %d
+  %cmp = fcmp oge float %div, 0.000000e+00
+  br i1 %cmp, label %do.body, label %if.else
+
+do.body:
+  %0 = load float, float* %min, align 4
+  %1 = load float, float* %a, align 4
+
+  ; It is unsafe to hoist the loads outside the loop because of the store.
+  store float 0.000000e+00, float* @GlobalVar
+
+  %sub = fsub float %0, %1
+  %mul = fmul float %sub, %div
+  %2 = load float, float* %max, align 4
+  %sub1 = fsub float %2, %1
+  %mul2 = fmul float %sub1, %div
+  br label %while.cond
+
+while.cond:
+  %cmp1 = fcmp oge float %mul2, 0.000000e+00
+  br i1 %cmp1, label %if.end, label %do.body
+
+if.else:
+  %3 = load float, float* %max, align 4
+  %4 = load float, float* %a, align 4
+  %sub3 = fsub float %3, %4
+  %mul4 = fmul float %sub3, %div
+  %5 = load float, float* %min, align 4
+  %sub5 = fsub float %5, %4
+  %mul6 = fmul float %sub5, %div
+  br label %if.end
+
+if.end:
+  %tmax.0 = phi float [ %mul2, %while.cond ], [ %mul6, %if.else ]
+  %tmin.0 = phi float [ %mul, %while.cond ], [ %mul4, %if.else ]
+
+  %add = fadd float %tmax.0, %tmin.0
+  ret float %add
+}
+
+; Check that we hoist stores: all the instructions from the then branch
+; should be hoisted.
+; CHECK-LABEL: @hoistStores
+; CHECK: zext
+; CHECK: trunc
+; CHECK: getelementptr
+; CHECK: load
+; CHECK: getelementptr
+; CHECK: store
+; CHECK: load
+; CHECK: load
+; CHECK: zext
+; CHECK: add
+; CHECK: store
+; CHECK: br
+; CHECK: if.then
+; CHECK: br
+
+%struct.foo = type { i16* }
+
+define void @hoistStores(%struct.foo* %s, i32* %coord, i1 zeroext %delta) {
+entry:
+  %frombool = zext i1 %delta to i8
+  %tobool = trunc i8 %frombool to i1
+  br i1 %tobool, label %if.then, label %if.else
+
+if.then:                                          ; preds = %entry
+  %p = getelementptr inbounds %struct.foo, %struct.foo* %s, i32 0, i32 0
+  %0 = load i16*, i16** %p, align 8
+  %incdec.ptr = getelementptr inbounds i16, i16* %0, i32 1
+  store i16* %incdec.ptr, i16** %p, align 8
+  %1 = load i16, i16* %0, align 2
+  %conv = zext i16 %1 to i32
+  %2 = load i32, i32* %coord, align 4
+  %add = add i32 %2, %conv
+  store i32 %add, i32* %coord, align 4
+  br label %if.end
+
+if.else:                                          ; preds = %entry
+  %p1 = getelementptr inbounds %struct.foo, %struct.foo* %s, i32 0, i32 0
+  %3 = load i16*, i16** %p1, align 8
+  %incdec.ptr2 = getelementptr inbounds i16, i16* %3, i32 1
+  store i16* %incdec.ptr2, i16** %p1, align 8
+  %4 = load i16, i16* %3, align 2
+  %conv3 = zext i16 %4 to i32
+  %5 = load i32, i32* %coord, align 4
+  %add4 = add i32 %5, %conv3
+  store i32 %add4, i32* %coord, align 4
+  %6 = load i16*, i16** %p1, align 8
+  %incdec.ptr6 = getelementptr inbounds i16, i16* %6, i32 1
+  store i16* %incdec.ptr6, i16** %p1, align 8
+  %7 = load i16, i16* %6, align 2
+  %conv7 = zext i16 %7 to i32
+  %shl = shl i32 %conv7, 8
+  %8 = load i32, i32* %coord, align 4
+  %add8 = add i32 %8, %shl
+  store i32 %add8, i32* %coord, align 4
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+  ret void
+}
-- 
2.7.4