From: David Green <david.green@arm.com>
Date: Sun, 21 Jun 2020 10:28:31 +0000 (+0100)
Subject: [CGP] Convert phi types
X-Git-Tag: llvmorg-12-init~2394
X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=730ecb63ec0bac2ce83157d37bce4d2f7d4d1244;p=platform%2Fupstream%2Fllvm.git

[CGP] Convert phi types

If a collection of interconnected phi nodes is only ever loaded, stored
or bitcast then we can convert the whole set to the bitcast type,
potentially helping to reduce the number of register moves needed as the
phi's are passed across basic block boundaries. This has to be done in
CodegenPrepare as it naturally straddles basic blocks.

The alorithm just looks from phi nodes, looking at uses and operands for
a collection of nodes that all together are bitcast between float and
integer types. We record visited phi nodes to not have to process them
more than once. The whole subgraph is then replaced with a new type.
Loads and Stores are bitcast to the correct type, which should then be
folded into the load/store, changing it's type.

This comes up in the biquad testcase due to the way MVE needs to keep
values in integer registers. I have also seen it come up from aarch64
partner example code, where a complicated set of sroa/inlining produced
integer phis, where float would have been a better choice.

I also added undef and extract element handling which increased the
potency in some cases.

This adds it with an option that defaults to off, and disabled for 32bit
X86 due to potential issues around canonicalizing NaNs.

Differential Revision: https://reviews.llvm.org/D81827
---

diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index f180b27..6238184 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -2368,6 +2368,14 @@ public:
     return nullptr;
   }
 
+  /// Given a set in interconnected phis of type 'From' that are loaded/stored
+  /// or bitcast to type 'To', return true if the set should be converted to
+  /// 'To'.
+  virtual bool shouldConvertPhiType(Type *From, Type *To) const {
+    return (From->isIntegerTy() || From->isFloatingPointTy()) &&
+           (To->isIntegerTy() || To->isFloatingPointTy());
+  }
+
   /// Returns true if the opcode is a commutative binary operation.
   virtual bool isCommutativeBinOp(unsigned Opcode) const {
     // FIXME: This should get its info from the td file.
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index f01a1fe..71a846e 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -245,6 +245,10 @@ static cl::opt<bool>
                      cl::desc("Enable BFI update verification for "
                               "CodeGenPrepare."));
 
+static cl::opt<bool> OptimizePhiTypes(
+    "cgp-optimize-phi-types", cl::Hidden, cl::init(false),
+    cl::desc("Enable converting phi types in CodeGenPrepare"));
+
 namespace {
 
 enum ExtType {
@@ -407,6 +411,9 @@ class TypePromotionTransaction;
                           unsigned CreatedInstsCost = 0);
     bool mergeSExts(Function &F);
     bool splitLargeGEPOffsets();
+    bool optimizePhiType(PHINode *Inst, SmallPtrSetImpl<PHINode *> &Visited,
+                         SmallPtrSetImpl<Instruction *> &DeletedInstrs);
+    bool optimizePhiTypes(Function &F);
     bool performAddressTypePromotion(
         Instruction *&Inst,
         bool AllowPromotionWithoutCommonHeader,
@@ -515,6 +522,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
       MadeChange |= mergeSExts(F);
     if (!LargeOffsetGEPMap.empty())
       MadeChange |= splitLargeGEPOffsets();
+    MadeChange |= optimizePhiTypes(F);
 
     if (MadeChange)
       eliminateFallThrough(F);
@@ -5717,6 +5725,155 @@ bool CodeGenPrepare::splitLargeGEPOffsets() {
   return Changed;
 }
 
+bool CodeGenPrepare::optimizePhiType(
+    PHINode *I, SmallPtrSetImpl<PHINode *> &Visited,
+    SmallPtrSetImpl<Instruction *> &DeletedInstrs) {
+  // We are looking for a collection on interconnected phi nodes that together
+  // only use loads/bitcasts and are used by stores/bitcasts, and the bitcasts
+  // are of the same type. Convert the whole set of nodes to the type of the
+  // bitcast.
+  Type *PhiTy = I->getType();
+  Type *ConvertTy = nullptr;
+  if (Visited.count(I) ||
+      (!I->getType()->isIntegerTy() && !I->getType()->isFloatingPointTy()))
+    return false;
+
+  SmallVector<Instruction *, 4> Worklist;
+  Worklist.push_back(cast<Instruction>(I));
+  SmallPtrSet<PHINode *, 4> PhiNodes;
+  PhiNodes.insert(I);
+  Visited.insert(I);
+  SmallPtrSet<Instruction *, 4> Defs;
+  SmallPtrSet<Instruction *, 4> Uses;
+
+  while (!Worklist.empty()) {
+    Instruction *II = Worklist.pop_back_val();
+
+    if (auto *Phi = dyn_cast<PHINode>(II)) {
+      // Handle Defs, which might also be PHI's
+      for (Value *V : Phi->incoming_values()) {
+        if (auto *OpPhi = dyn_cast<PHINode>(V)) {
+          if (!PhiNodes.count(OpPhi)) {
+            if (Visited.count(OpPhi))
+              return false;
+            PhiNodes.insert(OpPhi);
+            Visited.insert(OpPhi);
+            Worklist.push_back(OpPhi);
+          }
+        } else if (auto *OpLoad = dyn_cast<LoadInst>(V)) {
+          if (!Defs.count(OpLoad)) {
+            Defs.insert(OpLoad);
+            Worklist.push_back(OpLoad);
+          }
+        } else if (auto *OpEx = dyn_cast<ExtractElementInst>(V)) {
+          if (!Defs.count(OpEx)) {
+            Defs.insert(OpEx);
+            Worklist.push_back(OpEx);
+          }
+        } else if (auto *OpBC = dyn_cast<BitCastInst>(V)) {
+          if (!ConvertTy)
+            ConvertTy = OpBC->getOperand(0)->getType();
+          if (OpBC->getOperand(0)->getType() != ConvertTy)
+            return false;
+          if (!Defs.count(OpBC)) {
+            Defs.insert(OpBC);
+            Worklist.push_back(OpBC);
+          }
+        } else if (!isa<UndefValue>(V))
+          return false;
+      }
+    }
+
+    // Handle uses which might also be phi's
+    for (User *V : II->users()) {
+      if (auto *OpPhi = dyn_cast<PHINode>(V)) {
+        if (!PhiNodes.count(OpPhi)) {
+          if (Visited.count(OpPhi))
+            return false;
+          PhiNodes.insert(OpPhi);
+          Visited.insert(OpPhi);
+          Worklist.push_back(OpPhi);
+        }
+      } else if (auto *OpStore = dyn_cast<StoreInst>(V)) {
+        if (OpStore->getOperand(0) != II)
+          return false;
+        Uses.insert(OpStore);
+      } else if (auto *OpBC = dyn_cast<BitCastInst>(V)) {
+        if (!ConvertTy)
+          ConvertTy = OpBC->getType();
+        if (OpBC->getType() != ConvertTy)
+          return false;
+        Uses.insert(OpBC);
+      } else
+        return false;
+    }
+  }
+
+  if (!ConvertTy || !TLI->shouldConvertPhiType(PhiTy, ConvertTy))
+    return false;
+
+  LLVM_DEBUG(dbgs() << "Converting " << *I << "\n  and connected nodes to "
+                    << *ConvertTy << "\n");
+
+  // Create all the new phi nodes of the new type, and bitcast any loads to the
+  // correct type.
+  ValueToValueMap ValMap;
+  ValMap[UndefValue::get(PhiTy)] = UndefValue::get(ConvertTy);
+  for (Instruction *D : Defs) {
+    if (isa<BitCastInst>(D))
+      ValMap[D] = D->getOperand(0);
+    else
+      ValMap[D] =
+          new BitCastInst(D, ConvertTy, D->getName() + ".bc", D->getNextNode());
+  }
+  for (PHINode *Phi : PhiNodes)
+    ValMap[Phi] = PHINode::Create(ConvertTy, Phi->getNumIncomingValues(),
+                                  Phi->getName() + ".tc", Phi);
+  // Pipe together all the PhiNodes.
+  for (PHINode *Phi : PhiNodes) {
+    PHINode *NewPhi = cast<PHINode>(ValMap[Phi]);
+    for (int i = 0, e = Phi->getNumIncomingValues(); i < e; i++)
+      NewPhi->addIncoming(ValMap[Phi->getIncomingValue(i)],
+                          Phi->getIncomingBlock(i));
+  }
+  // And finally pipe up the stores and bitcasts
+  for (Instruction *U : Uses) {
+    if (isa<BitCastInst>(U)) {
+      DeletedInstrs.insert(U);
+      U->replaceAllUsesWith(ValMap[U->getOperand(0)]);
+    } else
+      U->setOperand(0,
+                    new BitCastInst(ValMap[U->getOperand(0)], PhiTy, "bc", U));
+  }
+
+  // Save the removed phis to be deleted later.
+  for (PHINode *Phi : PhiNodes)
+    DeletedInstrs.insert(Phi);
+  return true;
+}
+
+bool CodeGenPrepare::optimizePhiTypes(Function &F) {
+  if (!OptimizePhiTypes)
+    return false;
+
+  bool Changed = false;
+  SmallPtrSet<PHINode *, 4> Visited;
+  SmallPtrSet<Instruction *, 4> DeletedInstrs;
+
+  // Attempt to optimize all the phis in the functions to the correct type.
+  for (auto &BB : F)
+    for (auto &Phi : BB.phis())
+      Changed |= optimizePhiType(&Phi, Visited, DeletedInstrs);
+
+  // Remove any old phi's that have been converted.
+  for (auto *I : DeletedInstrs) {
+    I->replaceAllUsesWith(UndefValue::get(I->getType()));
+    I->eraseFromParent();
+  }
+
+  return Changed;
+}
+
 /// Return true, if an ext(load) can be formed from an extension in
 /// \p MovedExts.
 bool CodeGenPrepare::canFormExtLd(
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index c91a72d..90de46b6 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -30748,6 +30748,12 @@ bool X86TargetLowering::shouldSinkOperands(Instruction *I,
   return false;
 }
 
+bool X86TargetLowering::shouldConvertPhiType(Type *From, Type *To) const {
+  if (!Subtarget.is64Bit())
+    return false;
+  return TargetLowering::shouldConvertPhiType(From, To);
+}
+
 bool X86TargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
   if (isa<MaskedLoadSDNode>(ExtVal.getOperand(0)))
     return false;
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index 6f95e1b..560da44 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -1187,6 +1187,7 @@ namespace llvm {
 
     bool shouldSinkOperands(Instruction *I,
                             SmallVectorImpl<Use *> &Ops) const override;
+    bool shouldConvertPhiType(Type *From, Type *To) const override;
 
     /// Return true if folding a vector load into ExtVal (a sign, zero, or any
     /// extend node) is profitable.
diff --git a/llvm/test/CodeGen/AArch64/convertphitype.ll b/llvm/test/CodeGen/AArch64/convertphitype.ll
index 7277ed1..bb82ea2 100644
--- a/llvm/test/CodeGen/AArch64/convertphitype.ll
+++ b/llvm/test/CodeGen/AArch64/convertphitype.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -codegenprepare %s -S | FileCheck %s
+; RUN: opt -codegenprepare -cgp-optimize-phi-types %s -S | FileCheck %s
 
 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 target triple = "aarch64--linux-gnu"
@@ -11,14 +11,15 @@ define float @convphi1(i32 *%s, i32 *%d, i32 %n) {
 ; CHECK-NEXT:    br i1 [[CMP15]], label [[THEN:%.*]], label [[ELSE:%.*]]
 ; CHECK:       then:
 ; CHECK-NEXT:    [[LS:%.*]] = load i32, i32* [[S:%.*]], align 4
+; CHECK-NEXT:    [[LS_BC:%.*]] = bitcast i32 [[LS]] to float
 ; CHECK-NEXT:    br label [[END:%.*]]
 ; CHECK:       else:
 ; CHECK-NEXT:    [[LD:%.*]] = load i32, i32* [[D:%.*]], align 4
+; CHECK-NEXT:    [[LD_BC:%.*]] = bitcast i32 [[LD]] to float
 ; CHECK-NEXT:    br label [[END]]
 ; CHECK:       end:
-; CHECK-NEXT:    [[PHI:%.*]] = phi i32 [ [[LS]], [[THEN]] ], [ [[LD]], [[ELSE]] ]
-; CHECK-NEXT:    [[B:%.*]] = bitcast i32 [[PHI]] to float
-; CHECK-NEXT:    ret float [[B]]
+; CHECK-NEXT:    [[PHI_TC:%.*]] = phi float [ [[LS_BC]], [[THEN]] ], [ [[LD_BC]], [[ELSE]] ]
+; CHECK-NEXT:    ret float [[PHI_TC]]
 ;
 entry:
   %cmp15 = icmp sgt i32 %n, 0
@@ -45,11 +46,11 @@ define float @convphi2(i32 *%s, i32 *%d, i32 %n) {
 ; CHECK-NEXT:    br i1 [[CMP15]], label [[THEN:%.*]], label [[END:%.*]]
 ; CHECK:       then:
 ; CHECK-NEXT:    [[LS:%.*]] = load i32, i32* [[S:%.*]], align 4
+; CHECK-NEXT:    [[LS_BC:%.*]] = bitcast i32 [[LS]] to float
 ; CHECK-NEXT:    br label [[END]]
 ; CHECK:       end:
-; CHECK-NEXT:    [[PHI:%.*]] = phi i32 [ [[LS]], [[THEN]] ], [ undef, [[ENTRY:%.*]] ]
-; CHECK-NEXT:    [[B:%.*]] = bitcast i32 [[PHI]] to float
-; CHECK-NEXT:    ret float [[B]]
+; CHECK-NEXT:    [[PHI_TC:%.*]] = phi float [ [[LS_BC]], [[THEN]] ], [ undef, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    ret float [[PHI_TC]]
 ;
 entry:
   %cmp15 = icmp sgt i32 %n, 0
@@ -73,11 +74,11 @@ define float @convphi3(i32 *%s, i32 *%d, i32 %n, float %f) {
 ; CHECK-NEXT:    br i1 [[CMP15]], label [[THEN:%.*]], label [[END:%.*]]
 ; CHECK:       then:
 ; CHECK-NEXT:    [[LS:%.*]] = load i32, i32* [[S:%.*]], align 4
+; CHECK-NEXT:    [[LS_BC:%.*]] = bitcast i32 [[LS]] to float
 ; CHECK-NEXT:    br label [[END]]
 ; CHECK:       end:
-; CHECK-NEXT:    [[PHI:%.*]] = phi i32 [ [[LS]], [[THEN]] ], [ [[FB]], [[ENTRY:%.*]] ]
-; CHECK-NEXT:    [[B:%.*]] = bitcast i32 [[PHI]] to float
-; CHECK-NEXT:    ret float [[B]]
+; CHECK-NEXT:    [[PHI_TC:%.*]] = phi float [ [[LS_BC]], [[THEN]] ], [ [[F]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    ret float [[PHI_TC]]
 ;
 entry:
   %cmp15 = icmp sgt i32 %n, 0
@@ -102,10 +103,12 @@ define void @convphi4(i32 *%s, i32 *%d, i32 %n, float %f) {
 ; CHECK-NEXT:    br i1 [[CMP15]], label [[THEN:%.*]], label [[END:%.*]]
 ; CHECK:       then:
 ; CHECK-NEXT:    [[LS:%.*]] = load i32, i32* [[S:%.*]], align 4
+; CHECK-NEXT:    [[LS_BC:%.*]] = bitcast i32 [[LS]] to float
 ; CHECK-NEXT:    br label [[END]]
 ; CHECK:       end:
-; CHECK-NEXT:    [[PHI:%.*]] = phi i32 [ [[LS]], [[THEN]] ], [ [[FB]], [[ENTRY:%.*]] ]
-; CHECK-NEXT:    store i32 [[PHI]], i32* [[D:%.*]], align 4
+; CHECK-NEXT:    [[PHI_TC:%.*]] = phi float [ [[LS_BC]], [[THEN]] ], [ [[F]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[BC:%.*]] = bitcast float [[PHI_TC]] to i32
+; CHECK-NEXT:    store i32 [[BC]], i32* [[D:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -130,14 +133,15 @@ define i64 @convphi_d2i(double *%s, double *%d, i32 %n) {
 ; CHECK-NEXT:    br i1 [[CMP15]], label [[THEN:%.*]], label [[ELSE:%.*]]
 ; CHECK:       then:
 ; CHECK-NEXT:    [[LS:%.*]] = load double, double* [[S:%.*]], align 4
+; CHECK-NEXT:    [[LS_BC:%.*]] = bitcast double [[LS]] to i64
 ; CHECK-NEXT:    br label [[END:%.*]]
 ; CHECK:       else:
 ; CHECK-NEXT:    [[LD:%.*]] = load double, double* [[D:%.*]], align 4
+; CHECK-NEXT:    [[LD_BC:%.*]] = bitcast double [[LD]] to i64
 ; CHECK-NEXT:    br label [[END]]
 ; CHECK:       end:
-; CHECK-NEXT:    [[PHI:%.*]] = phi double [ [[LS]], [[THEN]] ], [ [[LD]], [[ELSE]] ]
-; CHECK-NEXT:    [[B:%.*]] = bitcast double [[PHI]] to i64
-; CHECK-NEXT:    ret i64 [[B]]
+; CHECK-NEXT:    [[PHI_TC:%.*]] = phi i64 [ [[LS_BC]], [[THEN]] ], [ [[LD_BC]], [[ELSE]] ]
+; CHECK-NEXT:    ret i64 [[PHI_TC]]
 ;
 entry:
   %cmp15 = icmp sgt i32 %n, 0
@@ -164,14 +168,15 @@ define i32 @convphi_f2i(float *%s, float *%d, i32 %n) {
 ; CHECK-NEXT:    br i1 [[CMP15]], label [[THEN:%.*]], label [[ELSE:%.*]]
 ; CHECK:       then:
 ; CHECK-NEXT:    [[LS:%.*]] = load float, float* [[S:%.*]], align 4
+; CHECK-NEXT:    [[LS_BC:%.*]] = bitcast float [[LS]] to i32
 ; CHECK-NEXT:    br label [[END:%.*]]
 ; CHECK:       else:
 ; CHECK-NEXT:    [[LD:%.*]] = load float, float* [[D:%.*]], align 4
+; CHECK-NEXT:    [[LD_BC:%.*]] = bitcast float [[LD]] to i32
 ; CHECK-NEXT:    br label [[END]]
 ; CHECK:       end:
-; CHECK-NEXT:    [[PHI:%.*]] = phi float [ [[LS]], [[THEN]] ], [ [[LD]], [[ELSE]] ]
-; CHECK-NEXT:    [[B:%.*]] = bitcast float [[PHI]] to i32
-; CHECK-NEXT:    ret i32 [[B]]
+; CHECK-NEXT:    [[PHI_TC:%.*]] = phi i32 [ [[LS_BC]], [[THEN]] ], [ [[LD_BC]], [[ELSE]] ]
+; CHECK-NEXT:    ret i32 [[PHI_TC]]
 ;
 entry:
   %cmp15 = icmp sgt i32 %n, 0
@@ -198,14 +203,15 @@ define i16 @convphi_h2i(half *%s, half *%d, i32 %n) {
 ; CHECK-NEXT:    br i1 [[CMP15]], label [[THEN:%.*]], label [[ELSE:%.*]]
 ; CHECK:       then:
 ; CHECK-NEXT:    [[LS:%.*]] = load half, half* [[S:%.*]], align 4
+; CHECK-NEXT:    [[LS_BC:%.*]] = bitcast half [[LS]] to i16
 ; CHECK-NEXT:    br label [[END:%.*]]
 ; CHECK:       else:
 ; CHECK-NEXT:    [[LD:%.*]] = load half, half* [[D:%.*]], align 4
+; CHECK-NEXT:    [[LD_BC:%.*]] = bitcast half [[LD]] to i16
 ; CHECK-NEXT:    br label [[END]]
 ; CHECK:       end:
-; CHECK-NEXT:    [[PHI:%.*]] = phi half [ [[LS]], [[THEN]] ], [ [[LD]], [[ELSE]] ]
-; CHECK-NEXT:    [[B:%.*]] = bitcast half [[PHI]] to i16
-; CHECK-NEXT:    ret i16 [[B]]
+; CHECK-NEXT:    [[PHI_TC:%.*]] = phi i16 [ [[LS_BC]], [[THEN]] ], [ [[LD_BC]], [[ELSE]] ]
+; CHECK-NEXT:    ret i16 [[PHI_TC]]
 ;
 entry:
   %cmp15 = icmp sgt i32 %n, 0
@@ -232,14 +238,15 @@ define i128 @convphi_ld2i(fp128 *%s, fp128 *%d, i32 %n) {
 ; CHECK-NEXT:    br i1 [[CMP15]], label [[THEN:%.*]], label [[ELSE:%.*]]
 ; CHECK:       then:
 ; CHECK-NEXT:    [[LS:%.*]] = load fp128, fp128* [[S:%.*]], align 4
+; CHECK-NEXT:    [[LS_BC:%.*]] = bitcast fp128 [[LS]] to i128
 ; CHECK-NEXT:    br label [[END:%.*]]
 ; CHECK:       else:
 ; CHECK-NEXT:    [[LD:%.*]] = load fp128, fp128* [[D:%.*]], align 4
+; CHECK-NEXT:    [[LD_BC:%.*]] = bitcast fp128 [[LD]] to i128
 ; CHECK-NEXT:    br label [[END]]
 ; CHECK:       end:
-; CHECK-NEXT:    [[PHI:%.*]] = phi fp128 [ [[LS]], [[THEN]] ], [ [[LD]], [[ELSE]] ]
-; CHECK-NEXT:    [[B:%.*]] = bitcast fp128 [[PHI]] to i128
-; CHECK-NEXT:    ret i128 [[B]]
+; CHECK-NEXT:    [[PHI_TC:%.*]] = phi i128 [ [[LS_BC]], [[THEN]] ], [ [[LD_BC]], [[ELSE]] ]
+; CHECK-NEXT:    ret i128 [[PHI_TC]]
 ;
 entry:
   %cmp15 = icmp sgt i32 %n, 0
@@ -298,18 +305,19 @@ define float @convphi_loop(i32 *%s, i32 *%d, i64 %n) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CMP15:%.*]] = icmp sgt i64 [[N:%.*]], 0
 ; CHECK-NEXT:    [[LS:%.*]] = load i32, i32* [[S:%.*]], align 4
+; CHECK-NEXT:    [[LS_BC:%.*]] = bitcast i32 [[LS]] to float
 ; CHECK-NEXT:    br i1 [[CMP15]], label [[LOOP:%.*]], label [[END:%.*]]
 ; CHECK:       loop:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
-; CHECK-NEXT:    [[LPHI:%.*]] = phi i32 [ [[LS]], [[ENTRY]] ], [ [[LD:%.*]], [[LOOP]] ]
-; CHECK-NEXT:    [[LD]] = load i32, i32* [[D:%.*]], align 4
+; CHECK-NEXT:    [[LPHI_TC:%.*]] = phi float [ [[LS_BC]], [[ENTRY]] ], [ [[LD_BC:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[LD:%.*]] = load i32, i32* [[D:%.*]], align 4
+; CHECK-NEXT:    [[LD_BC]] = bitcast i32 [[LD]] to float
 ; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
 ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[END]], label [[LOOP]]
 ; CHECK:       end:
-; CHECK-NEXT:    [[PHI:%.*]] = phi i32 [ undef, [[ENTRY]] ], [ [[LPHI]], [[LOOP]] ]
-; CHECK-NEXT:    [[B:%.*]] = bitcast i32 [[PHI]] to float
-; CHECK-NEXT:    ret float [[B]]
+; CHECK-NEXT:    [[PHI_TC:%.*]] = phi float [ undef, [[ENTRY]] ], [ [[LPHI_TC]], [[LOOP]] ]
+; CHECK-NEXT:    ret float [[PHI_TC]]
 ;
 entry:
   %cmp15 = icmp sgt i64 %n, 0
@@ -370,19 +378,20 @@ define float @convphi_loopdelayed2(i32 *%s, i32 *%d, i64 %n) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CMP15:%.*]] = icmp sgt i64 [[N:%.*]], 0
 ; CHECK-NEXT:    [[LS:%.*]] = load i32, i32* [[S:%.*]], align 4
+; CHECK-NEXT:    [[LS_BC:%.*]] = bitcast i32 [[LS]] to float
 ; CHECK-NEXT:    br i1 [[CMP15]], label [[LOOP:%.*]], label [[END:%.*]]
 ; CHECK:       loop:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
-; CHECK-NEXT:    [[LPHI:%.*]] = phi i32 [ [[LS]], [[ENTRY]] ], [ [[LD:%.*]], [[LOOP]] ]
-; CHECK-NEXT:    [[LPHI2:%.*]] = phi i32 [ undef, [[ENTRY]] ], [ [[LPHI]], [[LOOP]] ]
-; CHECK-NEXT:    [[LD]] = load i32, i32* [[D:%.*]], align 4
+; CHECK-NEXT:    [[LPHI_TC:%.*]] = phi float [ [[LS_BC]], [[ENTRY]] ], [ [[LD_BC:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[LPHI2_TC:%.*]] = phi float [ undef, [[ENTRY]] ], [ [[LPHI_TC]], [[LOOP]] ]
+; CHECK-NEXT:    [[LD:%.*]] = load i32, i32* [[D:%.*]], align 4
+; CHECK-NEXT:    [[LD_BC]] = bitcast i32 [[LD]] to float
 ; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
 ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[END]], label [[LOOP]]
 ; CHECK:       end:
-; CHECK-NEXT:    [[PHI:%.*]] = phi i32 [ undef, [[ENTRY]] ], [ [[LPHI2]], [[LOOP]] ]
-; CHECK-NEXT:    [[B:%.*]] = bitcast i32 [[PHI]] to float
-; CHECK-NEXT:    ret float [[B]]
+; CHECK-NEXT:    [[PHI_TC:%.*]] = phi float [ undef, [[ENTRY]] ], [ [[LPHI2_TC]], [[LOOP]] ]
+; CHECK-NEXT:    ret float [[PHI_TC]]
 ;
 entry:
   %cmp15 = icmp sgt i64 %n, 0
@@ -409,31 +418,33 @@ define float @convphi_loopmore(i32 *%s, i32 *%d, i64 %n) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i64 [[N:%.*]], 1
 ; CHECK-NEXT:    [[LS:%.*]] = load i32, i32* [[S:%.*]], align 4
+; CHECK-NEXT:    [[LS_BC:%.*]] = bitcast i32 [[LS]] to float
 ; CHECK-NEXT:    br i1 [[CMP]], label [[THEN:%.*]], label [[IFEND:%.*]]
 ; CHECK:       then:
 ; CHECK-NEXT:    [[LD:%.*]] = load i32, i32* [[D:%.*]], align 4
+; CHECK-NEXT:    [[LD_BC:%.*]] = bitcast i32 [[LD]] to float
 ; CHECK-NEXT:    br label [[IFEND]]
 ; CHECK:       ifend:
-; CHECK-NEXT:    [[PHI1:%.*]] = phi i32 [ [[LD]], [[THEN]] ], [ [[LS]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[PHI1_TC:%.*]] = phi float [ [[LD_BC]], [[THEN]] ], [ [[LS_BC]], [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    [[CMP15:%.*]] = icmp sgt i64 [[N]], 0
 ; CHECK-NEXT:    br i1 [[CMP15]], label [[LOOP:%.*]], label [[END:%.*]]
 ; CHECK:       loop:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[IFEND]] ], [ [[IV_NEXT:%.*]], [[LOOPEND:%.*]] ]
-; CHECK-NEXT:    [[PHI2:%.*]] = phi i32 [ [[PHI1]], [[IFEND]] ], [ [[PHI3:%.*]], [[LOOPEND]] ]
+; CHECK-NEXT:    [[PHI2_TC:%.*]] = phi float [ [[PHI1_TC]], [[IFEND]] ], [ [[PHI3_TC:%.*]], [[LOOPEND]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = icmp eq i64 [[N]], 1
 ; CHECK-NEXT:    br i1 [[TMP0]], label [[LOOPTHEN:%.*]], label [[LOOPEND]]
 ; CHECK:       loopthen:
 ; CHECK-NEXT:    [[LL:%.*]] = load i32, i32* [[D]], align 4
+; CHECK-NEXT:    [[LL_BC:%.*]] = bitcast i32 [[LL]] to float
 ; CHECK-NEXT:    br label [[LOOPEND]]
 ; CHECK:       loopend:
-; CHECK-NEXT:    [[PHI3]] = phi i32 [ [[LL]], [[LOOPTHEN]] ], [ [[PHI2]], [[LOOP]] ]
+; CHECK-NEXT:    [[PHI3_TC]] = phi float [ [[LL_BC]], [[LOOPTHEN]] ], [ [[PHI2_TC]], [[LOOP]] ]
 ; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
 ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[END]], label [[LOOP]]
 ; CHECK:       end:
-; CHECK-NEXT:    [[PHI:%.*]] = phi i32 [ [[PHI1]], [[IFEND]] ], [ [[PHI3]], [[LOOPEND]] ]
-; CHECK-NEXT:    [[B:%.*]] = bitcast i32 [[PHI]] to float
-; CHECK-NEXT:    ret float [[B]]
+; CHECK-NEXT:    [[PHI_TC:%.*]] = phi float [ [[PHI1_TC]], [[IFEND]] ], [ [[PHI3_TC]], [[LOOPEND]] ]
+; CHECK-NEXT:    ret float [[PHI_TC]]
 ;
 entry:
   %cmp = icmp eq i64 %n, 1
diff --git a/llvm/test/CodeGen/X86/convertphitype.ll b/llvm/test/CodeGen/X86/convertphitype.ll
new file mode 100644
index 0000000..cd323ac
--- /dev/null
+++ b/llvm/test/CodeGen/X86/convertphitype.ll
@@ -0,0 +1,39 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -codegenprepare -cgp-optimize-phi-types=true %s -S | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128"
+target triple = "i386-unknown-linux-gnu"
+
+define float @convphi1(i32 *%s, i32 *%d, i32 %n) {
+; CHECK-LABEL: @convphi1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP15:%.*]] = icmp sgt i32 [[N:%.*]], 0
+; CHECK-NEXT:    br i1 [[CMP15]], label [[THEN:%.*]], label [[ELSE:%.*]]
+; CHECK:       then:
+; CHECK-NEXT:    [[LS:%.*]] = load i32, i32* [[S:%.*]], align 4
+; CHECK-NEXT:    br label [[END:%.*]]
+; CHECK:       else:
+; CHECK-NEXT:    [[LD:%.*]] = load i32, i32* [[D:%.*]], align 4
+; CHECK-NEXT:    br label [[END]]
+; CHECK:       end:
+; CHECK-NEXT:    [[PHI:%.*]] = phi i32 [ [[LS]], [[THEN]] ], [ [[LD]], [[ELSE]] ]
+; CHECK-NEXT:    [[B:%.*]] = bitcast i32 [[PHI]] to float
+; CHECK-NEXT:    ret float [[B]]
+;
+entry:
+  %cmp15 = icmp sgt i32 %n, 0
+  br i1 %cmp15, label %then, label %else
+
+then:
+  %ls = load i32, i32* %s, align 4
+  br label %end
+
+else:
+  %ld = load i32, i32* %d, align 4
+  br label %end
+
+end:
+  %phi = phi i32 [ %ls, %then ], [ %ld, %else ]
+  %b = bitcast i32 %phi to float
+  ret float %b
+}