From: Nikita Popov <nikita.ppv@gmail.com>
Date: Thu, 20 Feb 2020 21:42:42 +0000 (+0100)
Subject: [InstCombine] DCE instructions earlier
X-Git-Tag: llvmorg-12-init~13455
X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=4ef272ec9c5f737b8d63a3cc8870d8d63bf0385e;p=platform%2Fupstream%2Fllvm.git

[InstCombine] DCE instructions earlier

When InstCombine initially populates the worklist, it already
performs constant folding and DCE. However, as the instructions
are initially visited in program order, this DCE can pick up only
the last instruction of a dead chain, the rest would only get
picked up in the main InstCombine run.

To avoid this, we instead perform the DCE in separate pass over the
collected instructions in reverse order, which will allow us to
pick up full dead instruction chains. We already need to do this
reverse iteration anyway to populate the worklist, so this
shouldn't add extra cost.

This by itself only fixes a small part of the problem though:
The same basic issue also applies during the main InstCombine loop.
We generally always want DCE to occur as early as possible,
because it will allow one-use folds to happen. Address this by also
performing DCE while adding deferred instructions to the main worklist.

This drops the number of tests that perform more than 2 InstCombine
iterations from ~80 to ~40. There's some spurious test changes due
to operand order / icmp toggling.

Differential Revision: https://reviews.llvm.org/D75008
---

diff --git a/llvm/include/llvm/Transforms/InstCombine/InstCombineWorklist.h b/llvm/include/llvm/Transforms/InstCombine/InstCombineWorklist.h
index f019bb5..25aabe1 100644
--- a/llvm/include/llvm/Transforms/InstCombine/InstCombineWorklist.h
+++ b/llvm/include/llvm/Transforms/InstCombine/InstCombineWorklist.h
@@ -38,7 +38,7 @@ public:
   InstCombineWorklist(InstCombineWorklist &&) = default;
   InstCombineWorklist &operator=(InstCombineWorklist &&) = default;
 
-  bool isEmpty() const { return Worklist.empty(); }
+  bool isEmpty() const { return Worklist.empty() && Deferred.empty(); }
 
   /// Add instruction to the worklist.
   /// Instructions will be visited in the order they are added.
@@ -72,26 +72,15 @@ public:
       push(I);
   }
 
-  void addDeferredInstructions() {
-    for (Instruction *I : reverse(Deferred))
-      push(I);
-    Deferred.clear();
+  Instruction *popDeferred() {
+    if (Deferred.empty())
+      return nullptr;
+    return Deferred.pop_back_val();
   }
 
-  /// AddInitialGroup - Add the specified batch of stuff in reverse order.
-  /// which should only be done when the worklist is empty and when the group
-  /// has no duplicates.
-  void addInitialGroup(ArrayRef<Instruction *> List) {
-    assert(Worklist.empty() && "Worklist must be empty to add initial group");
-    Worklist.reserve(List.size()+16);
-    WorklistMap.reserve(List.size());
-    LLVM_DEBUG(dbgs() << "IC: ADDING: " << List.size()
-                      << " instrs to worklist\n");
-    unsigned Idx = 0;
-    for (Instruction *I : reverse(List)) {
-      WorklistMap.insert(std::make_pair(I, Idx++));
-      Worklist.push_back(I);
-    }
+  void reserve(size_t Size) {
+    Worklist.reserve(Size + 16);
+    WorklistMap.reserve(Size);
   }
 
   /// Remove I from the worklist if it exists.
@@ -107,6 +96,8 @@ public:
   }
 
   Instruction *removeOne() {
+    if (Worklist.empty())
+      return nullptr;
     Instruction *I = Worklist.pop_back_val();
     WorklistMap.erase(I);
     return I;
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index f80da43..38ba742 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -724,7 +724,7 @@ public:
     if (I.getNumOperands() < 8) {
       for (Use &Operand : I.operands())
         if (auto *Inst = dyn_cast<Instruction>(Operand))
-          Worklist.push(Inst);
+          Worklist.add(Inst);
     }
     Worklist.remove(&I);
     I.eraseFromParent();
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index e25dfde..58d49d8 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -3406,6 +3406,22 @@ static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) {
 
 bool InstCombiner::run() {
   while (!Worklist.isEmpty()) {
+    // Walk deferred instructions in reverse order, and push them to the
+    // worklist, which means they'll end up popped from the worklist in-order.
+    while (Instruction *I = Worklist.popDeferred()) {
+      // Check to see if we can DCE the instruction. We do this already here to
+      // reduce the number of uses and thus allow other folds to trigger.
+      // Note that eraseInstFromFunction() may push additional instructions on
+      // the deferred worklist, so this will DCE whole instruction chains.
+      if (isInstructionTriviallyDead(I, &TLI)) {
+        eraseInstFromFunction(*I);
+        ++NumDeadInst;
+        continue;
+      }
+
+      Worklist.push(I);
+    }
+
     Instruction *I = Worklist.removeOne();
     if (I == nullptr) continue;  // skip null values.
 
@@ -3552,7 +3568,6 @@ bool InstCombiner::run() {
       }
       MadeIRChange = true;
     }
-    Worklist.addDeferredInstructions();
   }
 
   Worklist.zap();
@@ -3588,16 +3603,6 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, const DataLayout &DL,
     for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) {
       Instruction *Inst = &*BBI++;
 
-      // DCE instruction if trivially dead.
-      if (isInstructionTriviallyDead(Inst, TLI)) {
-        ++NumDeadInst;
-        LLVM_DEBUG(dbgs() << "IC: DCE: " << *Inst << '\n');
-        salvageDebugInfoOrMarkUndef(*Inst);
-        Inst->eraseFromParent();
-        MadeIRChange = true;
-        continue;
-      }
-
       // ConstantProp instruction if trivially constant.
       if (!Inst->use_empty() &&
           (Inst->getNumOperands() == 0 || isa<Constant>(Inst->getOperand(0))))
@@ -3665,7 +3670,21 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, const DataLayout &DL,
   // of the function down.  This jives well with the way that it adds all uses
   // of instructions to the worklist after doing a transformation, thus avoiding
   // some N^2 behavior in pathological cases.
-  ICWorklist.addInitialGroup(InstrsForInstCombineWorklist);
+  ICWorklist.reserve(InstrsForInstCombineWorklist.size());
+  for (Instruction *Inst : reverse(InstrsForInstCombineWorklist)) {
+    // DCE instruction if trivially dead. As we iterate in reverse program
+    // order here, we will clean up whole chains of dead instructions.
+    if (isInstructionTriviallyDead(Inst, TLI)) {
+      ++NumDeadInst;
+      LLVM_DEBUG(dbgs() << "IC: DCE: " << *Inst << '\n');
+      salvageDebugInfoOrMarkUndef(*Inst);
+      Inst->eraseFromParent();
+      MadeIRChange = true;
+      continue;
+    }
+
+    ICWorklist.push(Inst);
+  }
 
   return MadeIRChange;
 }
diff --git a/llvm/test/Transforms/InstCombine/2010-11-01-lshr-mask.ll b/llvm/test/Transforms/InstCombine/2010-11-01-lshr-mask.ll
index ed32bb1..169bf70 100644
--- a/llvm/test/Transforms/InstCombine/2010-11-01-lshr-mask.ll
+++ b/llvm/test/Transforms/InstCombine/2010-11-01-lshr-mask.ll
@@ -1,4 +1,4 @@
-; RUN: opt -instcombine -instcombine-infinite-loop-threshold=3 -S < %s | FileCheck %s
+; RUN: opt -instcombine -instcombine-infinite-loop-threshold=2 -S < %s | FileCheck %s
 
 ; <rdar://problem/8606771>
 define i32 @main(i32 %argc) {
diff --git a/llvm/test/Transforms/InstCombine/demorgan-sink-not-into-xor.ll b/llvm/test/Transforms/InstCombine/demorgan-sink-not-into-xor.ll
index c378033..8d03a55 100644
--- a/llvm/test/Transforms/InstCombine/demorgan-sink-not-into-xor.ll
+++ b/llvm/test/Transforms/InstCombine/demorgan-sink-not-into-xor.ll
@@ -24,7 +24,7 @@ define i1 @positive_easyinvert(i16 %x, i8 %y) {
 ; CHECK-LABEL: @positive_easyinvert(
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i16 [[X:%.*]], 0
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt i8 [[Y:%.*]], -1
-; CHECK-NEXT:    [[TMP4:%.*]] = xor i1 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = xor i1 [[TMP2]], [[TMP1]]
 ; CHECK-NEXT:    ret i1 [[TMP4]]
 ;
   %tmp1 = icmp slt i16 %x, 0
@@ -38,7 +38,7 @@ define i1 @positive_easyinvert0(i8 %y) {
 ; CHECK-LABEL: @positive_easyinvert0(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call i1 @gen1()
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt i8 [[Y:%.*]], -1
-; CHECK-NEXT:    [[TMP4:%.*]] = xor i1 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = xor i1 [[TMP2]], [[TMP1]]
 ; CHECK-NEXT:    ret i1 [[TMP4]]
 ;
   %tmp1 = call i1 @gen1()
@@ -52,7 +52,7 @@ define i1 @positive_easyinvert1(i8 %y) {
 ; CHECK-LABEL: @positive_easyinvert1(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call i1 @gen1()
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt i8 [[Y:%.*]], -1
-; CHECK-NEXT:    [[TMP4:%.*]] = xor i1 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = xor i1 [[TMP2]], [[TMP1]]
 ; CHECK-NEXT:    ret i1 [[TMP4]]
 ;
   %tmp1 = call i1 @gen1()
diff --git a/llvm/test/Transforms/InstCombine/logical-select.ll b/llvm/test/Transforms/InstCombine/logical-select.ll
index ac14fce..e14e2bf 100644
--- a/llvm/test/Transforms/InstCombine/logical-select.ll
+++ b/llvm/test/Transforms/InstCombine/logical-select.ll
@@ -535,8 +535,8 @@ define <4 x i32> @vec_sel_xor_multi_use(<4 x i32> %a, <4 x i32> %b, <4 x i1> %c)
 
 define i32 @allSignBits(i32 %cond, i32 %tval, i32 %fval) {
 ; CHECK-LABEL: @allSignBits(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i32 [[COND:%.*]], 0
-; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[TVAL:%.*]], i32 [[FVAL:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i32 [[COND:%.*]], -1
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[FVAL:%.*]], i32 [[TVAL:%.*]]
 ; CHECK-NEXT:    ret i32 [[TMP2]]
 ;
   %bitmask = ashr i32 %cond, 31
@@ -549,8 +549,8 @@ define i32 @allSignBits(i32 %cond, i32 %tval, i32 %fval) {
 
 define <4 x i8> @allSignBits_vec(<4 x i8> %cond, <4 x i8> %tval, <4 x i8> %fval) {
 ; CHECK-LABEL: @allSignBits_vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <4 x i8> [[COND:%.*]], zeroinitializer
-; CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i8> [[TVAL:%.*]], <4 x i8> [[FVAL:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <4 x i8> [[COND:%.*]], <i8 -1, i8 -1, i8 -1, i8 -1>
+; CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i8> [[FVAL:%.*]], <4 x i8> [[TVAL:%.*]]
 ; CHECK-NEXT:    ret <4 x i8> [[TMP2]]
 ;
   %bitmask = ashr <4 x i8> %cond, <i8 7, i8 7, i8 7, i8 7>
diff --git a/llvm/test/Transforms/InstCombine/pr44245.ll b/llvm/test/Transforms/InstCombine/pr44245.ll
index 15e4df2..f7eb806 100644
--- a/llvm/test/Transforms/InstCombine/pr44245.ll
+++ b/llvm/test/Transforms/InstCombine/pr44245.ll
@@ -159,9 +159,9 @@ define void @test_2(i1 %c) local_unnamed_addr {
 ; CHECK:       cond.true133:
 ; CHECK-NEXT:    br label [[COND_END144:%.*]]
 ; CHECK:       cond.false138:
+; CHECK-NEXT:    store %type_2* undef, %type_2** null, align 536870912
 ; CHECK-NEXT:    br label [[COND_END144]]
 ; CHECK:       cond.end144:
-; CHECK-NEXT:    store %type_3* undef, %type_3** null, align 536870912
 ; CHECK-NEXT:    br label [[WHILE_COND]]
 ;
 entry:
diff --git a/llvm/test/Transforms/InstCombine/select-imm-canon.ll b/llvm/test/Transforms/InstCombine/select-imm-canon.ll
index 4c59be6..272d4a4 100644
--- a/llvm/test/Transforms/InstCombine/select-imm-canon.ll
+++ b/llvm/test/Transforms/InstCombine/select-imm-canon.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -instcombine -instcombine-infinite-loop-threshold=3 -S | FileCheck %s
+; RUN: opt < %s -instcombine -instcombine-infinite-loop-threshold=2 -S | FileCheck %s
 
 define i8 @single(i32 %A) {
 ; CHECK-LABEL: @single(
diff --git a/llvm/test/Transforms/InstCombine/sub-ashr-and-to-icmp-select.ll b/llvm/test/Transforms/InstCombine/sub-ashr-and-to-icmp-select.ll
index 6cbb252..66609f1 100644
--- a/llvm/test/Transforms/InstCombine/sub-ashr-and-to-icmp-select.ll
+++ b/llvm/test/Transforms/InstCombine/sub-ashr-and-to-icmp-select.ll
@@ -12,7 +12,7 @@
 
 define i8 @sub_ashr_and_i8(i8 %x, i8 %y) {
 ; CHECK-LABEL: @sub_ashr_and_i8(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i8 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i8 [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    [[AND:%.*]] = select i1 [[TMP1]], i8 [[X]], i8 0
 ; CHECK-NEXT:    ret i8 [[AND]]
 ;
@@ -24,7 +24,7 @@ define i8 @sub_ashr_and_i8(i8 %x, i8 %y) {
 
 define i16 @sub_ashr_and_i16(i16 %x, i16 %y) {
 ; CHECK-LABEL: @sub_ashr_and_i16(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i16 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i16 [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    [[AND:%.*]] = select i1 [[TMP1]], i16 [[X]], i16 0
 ; CHECK-NEXT:    ret i16 [[AND]]
 ;
@@ -37,7 +37,7 @@ define i16 @sub_ashr_and_i16(i16 %x, i16 %y) {
 
 define i32 @sub_ashr_and_i32(i32 %x, i32 %y) {
 ; CHECK-LABEL: @sub_ashr_and_i32(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i32 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i32 [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    [[AND:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 0
 ; CHECK-NEXT:    ret i32 [[AND]]
 ;
@@ -49,7 +49,7 @@ define i32 @sub_ashr_and_i32(i32 %x, i32 %y) {
 
 define i64 @sub_ashr_and_i64(i64 %x, i64 %y) {
 ; CHECK-LABEL: @sub_ashr_and_i64(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i64 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i64 [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    [[AND:%.*]] = select i1 [[TMP1]], i64 [[X]], i64 0
 ; CHECK-NEXT:    ret i64 [[AND]]
 ;
@@ -63,7 +63,7 @@ define i64 @sub_ashr_and_i64(i64 %x, i64 %y) {
 
 define i32 @sub_ashr_and_i32_nuw_nsw(i32 %x, i32 %y) {
 ; CHECK-LABEL: @sub_ashr_and_i32_nuw_nsw(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i32 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i32 [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    [[AND:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 0
 ; CHECK-NEXT:    ret i32 [[AND]]
 ;
@@ -77,7 +77,7 @@ define i32 @sub_ashr_and_i32_nuw_nsw(i32 %x, i32 %y) {
 
 define i32 @sub_ashr_and_i32_commute(i32 %x, i32 %y) {
 ; CHECK-LABEL: @sub_ashr_and_i32_commute(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i32 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i32 [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    [[AND:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 0
 ; CHECK-NEXT:    ret i32 [[AND]]
 ;
@@ -91,7 +91,7 @@ define i32 @sub_ashr_and_i32_commute(i32 %x, i32 %y) {
 
 define <4 x i32> @sub_ashr_and_i32_vec(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: @sub_ashr_and_i32_vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <4 x i32> [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <4 x i32> [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    [[AND:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[X]], <4 x i32> zeroinitializer
 ; CHECK-NEXT:    ret <4 x i32> [[AND]]
 ;
@@ -103,7 +103,7 @@ define <4 x i32> @sub_ashr_and_i32_vec(<4 x i32> %x, <4 x i32> %y) {
 
 define <4 x i32> @sub_ashr_and_i32_vec_nuw_nsw(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: @sub_ashr_and_i32_vec_nuw_nsw(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <4 x i32> [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <4 x i32> [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    [[AND:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[X]], <4 x i32> zeroinitializer
 ; CHECK-NEXT:    ret <4 x i32> [[AND]]
 ;
@@ -115,7 +115,7 @@ define <4 x i32> @sub_ashr_and_i32_vec_nuw_nsw(<4 x i32> %x, <4 x i32> %y) {
 
 define <4 x i32> @sub_ashr_and_i32_vec_commute(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: @sub_ashr_and_i32_vec_commute(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <4 x i32> [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <4 x i32> [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    [[AND:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[X]], <4 x i32> zeroinitializer
 ; CHECK-NEXT:    ret <4 x i32> [[AND]]
 ;
@@ -144,7 +144,7 @@ define i32 @sub_ashr_and_i32_extra_use_sub(i32 %x, i32 %y, i32* %p) {
 
 define i32 @sub_ashr_and_i32_extra_use_and(i32 %x, i32 %y, i32* %p) {
 ; CHECK-LABEL: @sub_ashr_and_i32_extra_use_and(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i32 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i32 [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    [[AND:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 0
 ; CHECK-NEXT:    store i32 [[AND]], i32* [[P:%.*]], align 4
 ; CHECK-NEXT:    ret i32 [[AND]]
diff --git a/llvm/test/Transforms/InstCombine/sub-ashr-or-to-icmp-select.ll b/llvm/test/Transforms/InstCombine/sub-ashr-or-to-icmp-select.ll
index 6289e8f..937bff6 100644
--- a/llvm/test/Transforms/InstCombine/sub-ashr-or-to-icmp-select.ll
+++ b/llvm/test/Transforms/InstCombine/sub-ashr-or-to-icmp-select.ll
@@ -26,7 +26,7 @@ define i32 @clamp255_i32(i32 %x) {
 
 define i8 @sub_ashr_or_i8(i8 %x, i8 %y) {
 ; CHECK-LABEL: @sub_ashr_or_i8(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i8 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i8 [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    [[OR:%.*]] = select i1 [[TMP1]], i8 -1, i8 [[X]]
 ; CHECK-NEXT:    ret i8 [[OR]]
 ;
@@ -38,7 +38,7 @@ define i8 @sub_ashr_or_i8(i8 %x, i8 %y) {
 
 define i16 @sub_ashr_or_i16(i16 %x, i16 %y) {
 ; CHECK-LABEL: @sub_ashr_or_i16(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i16 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i16 [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    [[OR:%.*]] = select i1 [[TMP1]], i16 -1, i16 [[X]]
 ; CHECK-NEXT:    ret i16 [[OR]]
 ;
@@ -50,7 +50,7 @@ define i16 @sub_ashr_or_i16(i16 %x, i16 %y) {
 
 define i32 @sub_ashr_or_i32(i32 %x, i32 %y) {
 ; CHECK-LABEL: @sub_ashr_or_i32(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i32 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i32 [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    [[OR:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[X]]
 ; CHECK-NEXT:    ret i32 [[OR]]
 ;
@@ -62,7 +62,7 @@ define i32 @sub_ashr_or_i32(i32 %x, i32 %y) {
 
 define i64 @sub_ashr_or_i64(i64 %x, i64 %y) {
 ; CHECK-LABEL: @sub_ashr_or_i64(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i64 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i64 [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    [[OR:%.*]] = select i1 [[TMP1]], i64 -1, i64 [[X]]
 ; CHECK-NEXT:    ret i64 [[OR]]
 ;
@@ -76,7 +76,7 @@ define i64 @sub_ashr_or_i64(i64 %x, i64 %y) {
 
 define i32 @sub_ashr_or_i32_nuw_nsw(i32 %x, i32 %y) {
 ; CHECK-LABEL: @sub_ashr_or_i32_nuw_nsw(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i32 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i32 [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    [[OR:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[X]]
 ; CHECK-NEXT:    ret i32 [[OR]]
 ;
@@ -90,7 +90,7 @@ define i32 @sub_ashr_or_i32_nuw_nsw(i32 %x, i32 %y) {
 
 define i32 @sub_ashr_or_i32_commute(i32 %x, i32 %y) {
 ; CHECK-LABEL: @sub_ashr_or_i32_commute(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i32 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i32 [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    [[OR:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[X]]
 ; CHECK-NEXT:    ret i32 [[OR]]
 ;
@@ -104,7 +104,7 @@ define i32 @sub_ashr_or_i32_commute(i32 %x, i32 %y) {
 
 define <4 x i32> @sub_ashr_or_i32_vec(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: @sub_ashr_or_i32_vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <4 x i32> [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <4 x i32> [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    [[OR:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> [[X]]
 ; CHECK-NEXT:    ret <4 x i32> [[OR]]
 ;
@@ -116,7 +116,7 @@ define <4 x i32> @sub_ashr_or_i32_vec(<4 x i32> %x, <4 x i32> %y) {
 
 define <4 x i32> @sub_ashr_or_i32_vec_nuw_nsw(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: @sub_ashr_or_i32_vec_nuw_nsw(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <4 x i32> [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <4 x i32> [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    [[OR:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> [[X]]
 ; CHECK-NEXT:    ret <4 x i32> [[OR]]
 ;
@@ -128,7 +128,7 @@ define <4 x i32> @sub_ashr_or_i32_vec_nuw_nsw(<4 x i32> %x, <4 x i32> %y) {
 
 define <4 x i32> @sub_ashr_or_i32_vec_commute(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: @sub_ashr_or_i32_vec_commute(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <4 x i32> [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <4 x i32> [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    [[OR:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> [[X]]
 ; CHECK-NEXT:    ret <4 x i32> [[OR]]
 ;
@@ -157,7 +157,7 @@ define i32 @sub_ashr_or_i32_extra_use_sub(i32 %x, i32 %y, i32* %p) {
 
 define i32 @sub_ashr_or_i32_extra_use_or(i32 %x, i32 %y, i32* %p) {
 ; CHECK-LABEL: @sub_ashr_or_i32_extra_use_or(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i32 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i32 [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    [[OR:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[X]]
 ; CHECK-NEXT:    store i32 [[OR]], i32* [[P:%.*]], align 4
 ; CHECK-NEXT:    ret i32 [[OR]]
diff --git a/llvm/test/Transforms/InstCombine/vec_sext.ll b/llvm/test/Transforms/InstCombine/vec_sext.ll
index e0e263b..39bd408 100644
--- a/llvm/test/Transforms/InstCombine/vec_sext.ll
+++ b/llvm/test/Transforms/InstCombine/vec_sext.ll
@@ -4,8 +4,8 @@
 define <4 x i32> @vec_select(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LABEL: @vec_select(
 ; CHECK-NEXT:    [[SUB:%.*]] = sub nsw <4 x i32> zeroinitializer, [[A:%.*]]
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <4 x i32> [[B:%.*]], zeroinitializer
-; CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[SUB]], <4 x i32> [[A]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <4 x i32> [[B:%.*]], <i32 -1, i32 -1, i32 -1, i32 -1>
+; CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[A]], <4 x i32> [[SUB]]
 ; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
 ;
   %cmp = icmp slt <4 x i32> %b, zeroinitializer
@@ -23,8 +23,8 @@ define <4 x i32> @vec_select(<4 x i32> %a, <4 x i32> %b) {
 define <4 x i32> @vec_select_alternate_sign_bit_test(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LABEL: @vec_select_alternate_sign_bit_test(
 ; CHECK-NEXT:    [[SUB:%.*]] = sub nsw <4 x i32> zeroinitializer, [[A:%.*]]
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <4 x i32> [[B:%.*]], zeroinitializer
-; CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[A]], <4 x i32> [[SUB]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <4 x i32> [[B:%.*]], <i32 -1, i32 -1, i32 -1, i32 -1>
+; CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[SUB]], <4 x i32> [[A]]
 ; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
 ;
   %cmp = icmp sgt <4 x i32> %b, <i32 -1, i32 -1, i32 -1, i32 -1>
diff --git a/llvm/test/Transforms/SimplifyCFG/merge-cond-stores.ll b/llvm/test/Transforms/SimplifyCFG/merge-cond-stores.ll
index 5ad0e13..e9a3e60 100644
--- a/llvm/test/Transforms/SimplifyCFG/merge-cond-stores.ll
+++ b/llvm/test/Transforms/SimplifyCFG/merge-cond-stores.ll
@@ -77,9 +77,9 @@ end:
 define void @test_recursive(i32* %p, i32 %a, i32 %b, i32 %c, i32 %d) {
 ; CHECK-LABEL: @test_recursive(
 ; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = or i32 [[B:%.*]], [[A:%.*]]
 ; CHECK-NEXT:    [[X4:%.*]] = icmp eq i32 [[D:%.*]], 0
-; CHECK-NEXT:    [[TMP0:%.*]] = or i32 [[C:%.*]], [[B:%.*]]
-; CHECK-NEXT:    [[TMP1:%.*]] = or i32 [[TMP0]], [[A:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = or i32 [[TMP0]], [[C:%.*]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = xor i1 [[X4]], true
 ; CHECK-NEXT:    [[TMP4:%.*]] = or i1 [[TMP2]], [[TMP3]]