From 03de1cb715c9a106f3ea4139042a33afcd7ca6ae Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov@redhat.com>
Date: Fri, 9 Jun 2023 16:21:39 +0200
Subject: [PATCH] [InstCombine][CGP] Move swapMayExposeCSEOpportunities() fold

InstCombine tries to swap compare operands to match sub instructions
in order to expose "CSE opportunities". However, it doesn't really
make sense to perform this transform in the middle-end, as we cannot
actually CSE the instructions there.

The backend already performs this fold in
https://github.com/llvm/llvm-project/blob/18f5446a45da5a61dbfb1b7667d27fb441ac62db/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp#L4236
on the SDAG level, however this only works within a single basic block.

To handle cross-BB cases, we do need to handle this in the IR layer.
This patch moves the fold from InstCombine to CGP in the backend,
while keeping the same (somewhat dubious) heuristic.

Differential Revision: https://reviews.llvm.org/D152541
---
 llvm/lib/CodeGen/CodeGenPrepare.cpp                |  34 +++++++
 .../Transforms/InstCombine/InstCombineCompares.cpp |  34 +------
 llvm/test/CodeGen/Lanai/sub-cmp-peephole.ll        |   8 +-
 llvm/test/CodeGen/RISCV/aext-to-sext.ll            |   2 +-
 .../LowOverheadLoops/varying-outer-2d-reduction.ll |   8 +-
 llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll        |   4 +-
 llvm/test/CodeGen/X86/cmp-merge.ll                 |  15 ++-
 llvm/test/CodeGen/X86/jump_sign.ll                 |   3 +-
 llvm/test/Transforms/InstCombine/abs-intrinsic.ll  |   4 +-
 llvm/test/Transforms/InstCombine/icmp.ll           | 103 ---------------------
 .../result-of-usub-is-non-zero-and-no-overflow.ll  |  28 +++---
 ...nderflow-check-to-comparison-of-sub-operands.ll |   4 +-
 .../InstCombine/sub-ashr-or-to-icmp-select.ll      |   2 +-
 .../unsigned-sub-lack-of-overflow-check.ll         |   2 +-
 .../InstCombine/unsigned-sub-overflow-check.ll     |   2 +-
 15 files changed, 74 insertions(+), 179 deletions(-)
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 06e9c2e..ee4c193 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -1831,6 +1831,37 @@ static bool foldICmpWithDominatingICmp(CmpInst *Cmp,
   return true;
 }
 
+/// Many architectures use the same instruction for both subtract and cmp. Try
+/// to swap cmp operands to match subtract operations to allow for CSE.
+static bool swapICmpOperandsToExposeCSEOpportunities(CmpInst *Cmp) {
+  Value *Op0 = Cmp->getOperand(0);
+  Value *Op1 = Cmp->getOperand(1);
+  if (!Op0->getType()->isIntegerTy() || isa<Constant>(Op0) ||
+      isa<Constant>(Op1))
+    return false;
+
+  // If a subtract already has the same operands as a compare, swapping would be
+  // bad. If a subtract has the same operands as a compare but in reverse order,
+  // then swapping is good.
+  int GoodToSwap = 0;
+  unsigned NumInspected = 0;
+  for (const User *U : Op0->users()) {
+    // Avoid walking many users.
+    if (++NumInspected > 128)
+      return false;
+    if (match(U, m_Sub(m_Specific(Op1), m_Specific(Op0))))
+      GoodToSwap++;
+    else if (match(U, m_Sub(m_Specific(Op0), m_Specific(Op1))))
+      GoodToSwap--;
+  }
+
+  if (GoodToSwap > 0) {
+    Cmp->swapOperands();
+    return true;
+  }
+  return false;
+}
+
 bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT) {
   if (sinkCmpExpression(Cmp, *TLI))
     return true;
@@ -1844,6 +1875,9 @@ bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT) {
   if (foldICmpWithDominatingICmp(Cmp, *TLI))
     return true;
 
+  if (swapICmpOperandsToExposeCSEOpportunities(Cmp))
+    return true;
+
   return false;
 }
 
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index e989510..95e34bf 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -5637,37 +5637,6 @@ static APInt getDemandedBitsLHSMask(ICmpInst &I, unsigned BitWidth) {
   }
 }
 
-/// Check if the order of \p Op0 and \p Op1 as operands in an ICmpInst
-/// should be swapped.
-/// The decision is based on how many times these two operands are reused
-/// as subtract operands and their positions in those instructions.
-/// The rationale is that several architectures use the same instruction for
-/// both subtract and cmp. Thus, it is better if the order of those operands
-/// match.
-/// TODO: Shouldn't this be part of CGP instead?
-/// \return true if Op0 and Op1 should be swapped.
-static bool swapMayExposeCSEOpportunities(const Value *Op0, const Value *Op1) {
-  // Filter out pointer values as those cannot appear directly in subtract.
-  // FIXME: we may want to go through inttoptrs or bitcasts.
-  if (Op0->getType()->isPointerTy() || isa<Constant>(Op0))
-    return false;
-  // If a subtract already has the same operands as a compare, swapping would be
-  // bad. If a subtract has the same operands as a compare but in reverse order,
-  // then swapping is good.
-  int GoodToSwap = 0;
-  unsigned NumInspected = 0;
-  for (const User *U : Op0->users()) {
-    // Avoid walking many users.
-    if (++NumInspected > 128)
-      return false;
-    if (match(U, m_Sub(m_Specific(Op1), m_Specific(Op0))))
-      GoodToSwap++;
-    else if (match(U, m_Sub(m_Specific(Op0), m_Specific(Op1))))
-      GoodToSwap--;
-  }
-  return GoodToSwap > 0;
-}
-
 /// Check that one use is in the same block as the definition and all
 /// other uses are in blocks dominated by a given block.
 ///
@@ -6517,8 +6486,7 @@ Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) {
   /// Orders the operands of the compare so that they are listed from most
   /// complex to least complex.  This puts constants before unary operators,
   /// before binary operators.
-  if (Op0Cplxity < Op1Cplxity ||
-      (Op0Cplxity == Op1Cplxity && swapMayExposeCSEOpportunities(Op0, Op1))) {
+  if (Op0Cplxity < Op1Cplxity) {
     I.swapOperands();
     std::swap(Op0, Op1);
     Changed = true;
diff --git a/llvm/test/CodeGen/Lanai/sub-cmp-peephole.ll b/llvm/test/CodeGen/Lanai/sub-cmp-peephole.ll
index fec23f6..84d23e4 100644
--- a/llvm/test/CodeGen/Lanai/sub-cmp-peephole.ll
+++ b/llvm/test/CodeGen/Lanai/sub-cmp-peephole.ll
@@ -75,12 +75,12 @@ define i32 @j(i32 inreg %a, i32 inreg %b) nounwind {
 ; CHECK:       ! %bb.0: ! %entry
 ; CHECK-NEXT:    st %fp, [--%sp]
 ; CHECK-NEXT:    add %sp, 0x8, %fp
-; CHECK-NEXT:    sub.f %r6, %r7, %rv
-; CHECK-NEXT:    bne .LBB4_2
 ; CHECK-NEXT:    sub %sp, 0x8, %sp
+; CHECK-NEXT:    sub.f %r6, %r7, %r0
+; CHECK-NEXT:    bne .LBB4_2
+; CHECK-NEXT:    sub %r6, %r7, %rv
 ; CHECK-NEXT:  .LBB4_1: ! %if.then
-; CHECK-NEXT:    sub.f %r7, %r6, %r0
-; CHECK-NEXT:    sel.gt %rv, %r6, %rv
+; CHECK-NEXT:    sel.lt %rv, %r6, %rv
 ; CHECK-NEXT:  .LBB4_2: ! %if.else
 ; CHECK-NEXT:    ld -4[%fp], %pc ! return
 ; CHECK-NEXT:    add %fp, 0x0, %sp
diff --git a/llvm/test/CodeGen/RISCV/aext-to-sext.ll b/llvm/test/CodeGen/RISCV/aext-to-sext.ll
index 0aa04f4..6322f84 100644
--- a/llvm/test/CodeGen/RISCV/aext-to-sext.ll
+++ b/llvm/test/CodeGen/RISCV/aext-to-sext.ll
@@ -11,7 +11,7 @@
 define void @quux(i32 signext %arg, i32 signext %arg1) nounwind {
 ; RV64I-LABEL: quux:
 ; RV64I:       # %bb.0: # %bb
-; RV64I-NEXT:    beq a0, a1, .LBB0_4
+; RV64I-NEXT:    beq a1, a0, .LBB0_4
 ; RV64I-NEXT:  # %bb.1: # %bb2.preheader
 ; RV64I-NEXT:    addi sp, sp, -16
 ; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/varying-outer-2d-reduction.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/varying-outer-2d-reduction.ll
index d143cad..07c06e1 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/varying-outer-2d-reduction.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/varying-outer-2d-reduction.ll
@@ -45,8 +45,8 @@ define dso_local void @varying_outer_2d_reduction(ptr nocapture readonly %Input,
 ; ENABLED-NEXT:  .LBB0_4: @ %for.body
 ; ENABLED-NEXT:    @ =>This Loop Header: Depth=1
 ; ENABLED-NEXT:    @ Child Loop BB0_6 Depth 2
-; ENABLED-NEXT:    cmp r8, r2
-; ENABLED-NEXT:    bge .LBB0_2
+; ENABLED-NEXT:    cmp r2, r8
+; ENABLED-NEXT:    ble .LBB0_2
 ; ENABLED-NEXT:  @ %bb.5: @ %vector.ph
 ; ENABLED-NEXT:    @ in Loop: Header=BB0_4 Depth=1
 ; ENABLED-NEXT:    bic r0, r9, #3
@@ -116,8 +116,8 @@ define dso_local void @varying_outer_2d_reduction(ptr nocapture readonly %Input,
 ; NOREDUCTIONS-NEXT:  .LBB0_4: @ %for.body
 ; NOREDUCTIONS-NEXT:    @ =>This Loop Header: Depth=1
 ; NOREDUCTIONS-NEXT:    @ Child Loop BB0_6 Depth 2
-; NOREDUCTIONS-NEXT:    cmp r8, r2
-; NOREDUCTIONS-NEXT:    bge .LBB0_2
+; NOREDUCTIONS-NEXT:    cmp r2, r8
+; NOREDUCTIONS-NEXT:    ble .LBB0_2
 ; NOREDUCTIONS-NEXT:  @ %bb.5: @ %vector.ph
 ; NOREDUCTIONS-NEXT:    @ in Loop: Header=BB0_4 Depth=1
 ; NOREDUCTIONS-NEXT:    bic r0, r9, #3
diff --git a/llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll b/llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll
index 7594c18..2aa183c 100644
--- a/llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll
@@ -725,7 +725,7 @@ define i8* @signext(i8* %input_row, i8* %input_col, i16 zeroext %output_ch, i16
 ; CHECK-NEXT:    cmp r1, r1
 ; CHECK-NEXT:    str r1, [sp, #24] @ 4-byte Spill
 ; CHECK-NEXT:    ldr.w r12, [r0, r11, lsl #2]
-; CHECK-NEXT:    bge .LBB5_3
+; CHECK-NEXT:    ble .LBB5_3
 ; CHECK-NEXT:  @ %bb.6: @ %for.body24.preheader
 ; CHECK-NEXT:    @ in Loop: Header=BB5_5 Depth=1
 ; CHECK-NEXT:    ldr.w lr, [sp, #92]
@@ -897,7 +897,7 @@ define i8* @signext_optsize(i8* %input_row, i8* %input_col, i16 zeroext %output_
 ; CHECK-NEXT:    cmp r1, r1
 ; CHECK-NEXT:    str r1, [sp, #24] @ 4-byte Spill
 ; CHECK-NEXT:    ldr.w r12, [r0, r11, lsl #2]
-; CHECK-NEXT:    bge .LBB6_6
+; CHECK-NEXT:    ble .LBB6_6
 ; CHECK-NEXT:  @ %bb.4: @ %for.body24.preheader
 ; CHECK-NEXT:    @ in Loop: Header=BB6_3 Depth=1
 ; CHECK-NEXT:    ldr.w lr, [sp, #92]
diff --git a/llvm/test/CodeGen/X86/cmp-merge.ll b/llvm/test/CodeGen/X86/cmp-merge.ll
index 7528eab..0c355af 100644
--- a/llvm/test/CodeGen/X86/cmp-merge.ll
+++ b/llvm/test/CodeGen/X86/cmp-merge.ll
@@ -122,22 +122,19 @@ define void @cmp_sub_different_order(i32 %x, i32 %y, ptr %p) {
 ; X86-LABEL: cmp_sub_different_order:
 ; X86:       # %bb.0: # %entry
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    cmpl %ecx, %eax
-; X86-NEXT:    jle .LBB3_2
+; X86-NEXT:    subl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    jge .LBB3_2
 ; X86-NEXT:  # %bb.1: # %cond.true
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT:    subl %eax, %ecx
-; X86-NEXT:    movl %ecx, (%edx)
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %eax, (%ecx)
 ; X86-NEXT:  .LBB3_2: # %cond.end
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: cmp_sub_different_order:
 ; X64:       # %bb.0: # %entry
-; X64-NEXT:    cmpl %edi, %esi
-; X64-NEXT:    jle .LBB3_2
-; X64-NEXT:  # %bb.1: # %cond.true
 ; X64-NEXT:    subl %esi, %edi
+; X64-NEXT:    jge .LBB3_2
+; X64-NEXT:  # %bb.1: # %cond.true
 ; X64-NEXT:    movl %edi, (%rdx)
 ; X64-NEXT:  .LBB3_2: # %cond.end
 ; X64-NEXT:    retq
diff --git a/llvm/test/CodeGen/X86/jump_sign.ll b/llvm/test/CodeGen/X86/jump_sign.ll
index de7aaf2..9eaa654 100644
--- a/llvm/test/CodeGen/X86/jump_sign.ll
+++ b/llvm/test/CodeGen/X86/jump_sign.ll
@@ -134,10 +134,9 @@ define i32 @func_m(i32 %a, i32 %b) nounwind {
 define i32 @func_l2(i32 %a, i32 %b) nounwind {
 ; CHECK-LABEL: func_l2:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; CHECK-NEXT:    movl %eax, %ecx
-; CHECK-NEXT:    subl %edx, %ecx
+; CHECK-NEXT:    subl {{[0-9]+}}(%esp), %ecx
 ; CHECK-NEXT:    jne .LBB8_2
 ; CHECK-NEXT:  # %bb.1: # %if.then
 ; CHECK-NEXT:    cmovll %ecx, %eax
diff --git a/llvm/test/Transforms/InstCombine/abs-intrinsic.ll b/llvm/test/Transforms/InstCombine/abs-intrinsic.ll
index ebaf866..a761f50 100644
--- a/llvm/test/Transforms/InstCombine/abs-intrinsic.ll
+++ b/llvm/test/Transforms/InstCombine/abs-intrinsic.ll
@@ -455,7 +455,7 @@ cond.end:
 define i32 @sub_abs_lt(i32 %x, i32 %y) {
 ; CHECK-LABEL: @sub_abs_lt(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_END:%.*]]
 ; CHECK:       cond.true:
 ; CHECK-NEXT:    [[SUB_NEG:%.*]] = sub i32 [[Y]], [[X]]
@@ -481,7 +481,7 @@ cond.end:
 define i32 @sub_abs_lt_min_not_poison(i32 %x, i32 %y) {
 ; CHECK-LABEL: @sub_abs_lt_min_not_poison(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_END:%.*]]
 ; CHECK:       cond.true:
 ; CHECK-NEXT:    [[SUB_NEG:%.*]] = sub i32 [[Y]], [[X]]
diff --git a/llvm/test/Transforms/InstCombine/icmp.ll b/llvm/test/Transforms/InstCombine/icmp.ll
index ec9ac67..7fd7b86 100644
--- a/llvm/test/Transforms/InstCombine/icmp.ll
+++ b/llvm/test/Transforms/InstCombine/icmp.ll
@@ -2965,109 +2965,6 @@ define i1 @xor_ugt_extra_use(i8 %x, ptr %p) {
   ret i1 %r
 }
 
-define i1 @icmp_swap_operands_for_cse(i32 %X, i32 %Y) {
-; CHECK-LABEL: @icmp_swap_operands_for_cse(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[SUB:%.*]] = sub i32 [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[X]], [[Y]]
-; CHECK-NEXT:    br i1 [[CMP]], label [[TRUE:%.*]], label [[FALSE:%.*]]
-; CHECK:       true:
-; CHECK-NEXT:    [[TMP0:%.*]] = and i32 [[SUB]], 1
-; CHECK-NEXT:    br label [[END:%.*]]
-; CHECK:       false:
-; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[SUB]], 16
-; CHECK-NEXT:    br label [[END]]
-; CHECK:       end:
-; CHECK-NEXT:    [[RES_IN:%.*]] = phi i32 [ [[TMP0]], [[TRUE]] ], [ [[TMP1]], [[FALSE]] ]
-; CHECK-NEXT:    [[RES:%.*]] = icmp ne i32 [[RES_IN]], 0
-; CHECK-NEXT:    ret i1 [[RES]]
-;
-entry:
-  %sub = sub i32 %X, %Y
-  %cmp = icmp ugt i32 %Y, %X
-  br i1 %cmp, label %true, label %false
-true:
-  %restrue = trunc i32 %sub to i1
-  br label %end
-false:
-  %shift = lshr i32 %sub, 4
-  %resfalse = trunc i32 %shift to i1
-  br label %end
-end:
-  %res = phi i1 [%restrue, %true], [%resfalse, %false]
-  ret i1 %res
-}
-
-define i1 @icmp_swap_operands_for_cse2(i32 %X, i32 %Y) {
-; CHECK-LABEL: @icmp_swap_operands_for_cse2(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    br i1 [[CMP]], label [[TRUE:%.*]], label [[FALSE:%.*]]
-; CHECK:       true:
-; CHECK-NEXT:    [[SUB:%.*]] = sub i32 [[X]], [[Y]]
-; CHECK-NEXT:    [[SUB1:%.*]] = sub i32 [[X]], [[Y]]
-; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]]
-; CHECK-NEXT:    br label [[END:%.*]]
-; CHECK:       false:
-; CHECK-NEXT:    [[SUB2:%.*]] = sub i32 [[Y]], [[X]]
-; CHECK-NEXT:    br label [[END]]
-; CHECK:       end:
-; CHECK-NEXT:    [[RES_IN_IN:%.*]] = phi i32 [ [[ADD]], [[TRUE]] ], [ [[SUB2]], [[FALSE]] ]
-; CHECK-NEXT:    [[RES_IN:%.*]] = and i32 [[RES_IN_IN]], 1
-; CHECK-NEXT:    [[RES:%.*]] = icmp ne i32 [[RES_IN]], 0
-; CHECK-NEXT:    ret i1 [[RES]]
-;
-entry:
-  %cmp = icmp ugt i32 %Y, %X
-  br i1 %cmp, label %true, label %false
-true:
-  %sub = sub i32 %X, %Y
-  %sub1 = sub i32 %X, %Y
-  %add = add i32 %sub, %sub1
-  %restrue = trunc i32 %add to i1
-  br label %end
-false:
-  %sub2 = sub i32 %Y, %X
-  %resfalse = trunc i32 %sub2 to i1
-  br label %end
-end:
-  %res = phi i1 [%restrue, %true], [%resfalse, %false]
-  ret i1 %res
-}
-
-define i1 @icmp_do_not_swap_operands_for_cse(i32 %X, i32 %Y) {
-; CHECK-LABEL: @icmp_do_not_swap_operands_for_cse(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[Y:%.*]], [[X:%.*]]
-; CHECK-NEXT:    br i1 [[CMP]], label [[TRUE:%.*]], label [[FALSE:%.*]]
-; CHECK:       true:
-; CHECK-NEXT:    [[SUB:%.*]] = sub i32 [[X]], [[Y]]
-; CHECK-NEXT:    br label [[END:%.*]]
-; CHECK:       false:
-; CHECK-NEXT:    [[SUB2:%.*]] = sub i32 [[Y]], [[X]]
-; CHECK-NEXT:    br label [[END]]
-; CHECK:       end:
-; CHECK-NEXT:    [[RES_IN_IN:%.*]] = phi i32 [ [[SUB]], [[TRUE]] ], [ [[SUB2]], [[FALSE]] ]
-; CHECK-NEXT:    [[RES_IN:%.*]] = and i32 [[RES_IN_IN]], 1
-; CHECK-NEXT:    [[RES:%.*]] = icmp ne i32 [[RES_IN]], 0
-; CHECK-NEXT:    ret i1 [[RES]]
-;
-entry:
-  %cmp = icmp ugt i32 %Y, %X
-  br i1 %cmp, label %true, label %false
-true:
-  %sub = sub i32 %X, %Y
-  %restrue = trunc i32 %sub to i1
-  br label %end
-false:
-  %sub2 = sub i32 %Y, %X
-  %resfalse = trunc i32 %sub2 to i1
-  br label %end
-end:
-  %res = phi i1 [%restrue, %true], [%resfalse, %false]
-  ret i1 %res
-}
-
 define i1 @icmp_lshr_lshr_eq(i32 %a, i32 %b) {
 ; CHECK-LABEL: @icmp_lshr_lshr_eq(
 ; CHECK-NEXT:    [[Z_UNSHIFTED:%.*]] = xor i32 [[A:%.*]], [[B:%.*]]
diff --git a/llvm/test/Transforms/InstCombine/result-of-usub-is-non-zero-and-no-overflow.ll b/llvm/test/Transforms/InstCombine/result-of-usub-is-non-zero-and-no-overflow.ll
index 2f845f5..a8be818 100644
--- a/llvm/test/Transforms/InstCombine/result-of-usub-is-non-zero-and-no-overflow.ll
+++ b/llvm/test/Transforms/InstCombine/result-of-usub-is-non-zero-and-no-overflow.ll
@@ -20,11 +20,11 @@ define i1 @t0_noncanonical_ignoreme(i8 %base, i8 %offset) {
 ; CHECK-LABEL: @t0_noncanonical_ignoreme(
 ; CHECK-NEXT:    [[ADJUSTED:%.*]] = sub i8 [[BASE:%.*]], [[OFFSET:%.*]]
 ; CHECK-NEXT:    call void @use8(i8 [[ADJUSTED]])
-; CHECK-NEXT:    [[NO_UNDERFLOW:%.*]] = icmp uge i8 [[BASE]], [[OFFSET]]
+; CHECK-NEXT:    [[NO_UNDERFLOW:%.*]] = icmp ule i8 [[OFFSET]], [[BASE]]
 ; CHECK-NEXT:    call void @use1(i1 [[NO_UNDERFLOW]])
 ; CHECK-NEXT:    [[NOT_NULL:%.*]] = icmp ne i8 [[BASE]], [[OFFSET]]
 ; CHECK-NEXT:    call void @use1(i1 [[NOT_NULL]])
-; CHECK-NEXT:    [[R:%.*]] = icmp ugt i8 [[BASE]], [[OFFSET]]
+; CHECK-NEXT:    [[R:%.*]] = icmp ult i8 [[OFFSET]], [[BASE]]
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %adjusted = sub i8 %base, %offset
@@ -41,11 +41,11 @@ define i1 @t0_noncanonical_ignoreme_logical(i8 %base, i8 %offset) {
 ; CHECK-LABEL: @t0_noncanonical_ignoreme_logical(
 ; CHECK-NEXT:    [[ADJUSTED:%.*]] = sub i8 [[BASE:%.*]], [[OFFSET:%.*]]
 ; CHECK-NEXT:    call void @use8(i8 [[ADJUSTED]])
-; CHECK-NEXT:    [[NO_UNDERFLOW:%.*]] = icmp uge i8 [[BASE]], [[OFFSET]]
+; CHECK-NEXT:    [[NO_UNDERFLOW:%.*]] = icmp ule i8 [[OFFSET]], [[BASE]]
 ; CHECK-NEXT:    call void @use1(i1 [[NO_UNDERFLOW]])
 ; CHECK-NEXT:    [[NOT_NULL:%.*]] = icmp ne i8 [[BASE]], [[OFFSET]]
 ; CHECK-NEXT:    call void @use1(i1 [[NOT_NULL]])
-; CHECK-NEXT:    [[R:%.*]] = icmp ugt i8 [[BASE]], [[OFFSET]]
+; CHECK-NEXT:    [[R:%.*]] = icmp ult i8 [[OFFSET]], [[BASE]]
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %adjusted = sub i8 %base, %offset
@@ -199,11 +199,11 @@ define i1 @t3_commutability0(i8 %base, i8 %offset) {
 ; CHECK-LABEL: @t3_commutability0(
 ; CHECK-NEXT:    [[ADJUSTED:%.*]] = sub i8 [[BASE:%.*]], [[OFFSET:%.*]]
 ; CHECK-NEXT:    call void @use8(i8 [[ADJUSTED]])
-; CHECK-NEXT:    [[NO_UNDERFLOW:%.*]] = icmp uge i8 [[BASE]], [[OFFSET]]
+; CHECK-NEXT:    [[NO_UNDERFLOW:%.*]] = icmp ule i8 [[OFFSET]], [[BASE]]
 ; CHECK-NEXT:    call void @use1(i1 [[NO_UNDERFLOW]])
 ; CHECK-NEXT:    [[NOT_NULL:%.*]] = icmp ne i8 [[BASE]], [[OFFSET]]
 ; CHECK-NEXT:    call void @use1(i1 [[NOT_NULL]])
-; CHECK-NEXT:    [[R:%.*]] = icmp ugt i8 [[BASE]], [[OFFSET]]
+; CHECK-NEXT:    [[R:%.*]] = icmp ult i8 [[OFFSET]], [[BASE]]
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %adjusted = sub i8 %base, %offset
@@ -220,11 +220,11 @@ define i1 @t3_commutability0_logical(i8 %base, i8 %offset) {
 ; CHECK-LABEL: @t3_commutability0_logical(
 ; CHECK-NEXT:    [[ADJUSTED:%.*]] = sub i8 [[BASE:%.*]], [[OFFSET:%.*]]
 ; CHECK-NEXT:    call void @use8(i8 [[ADJUSTED]])
-; CHECK-NEXT:    [[NO_UNDERFLOW:%.*]] = icmp uge i8 [[BASE]], [[OFFSET]]
+; CHECK-NEXT:    [[NO_UNDERFLOW:%.*]] = icmp ule i8 [[OFFSET]], [[BASE]]
 ; CHECK-NEXT:    call void @use1(i1 [[NO_UNDERFLOW]])
 ; CHECK-NEXT:    [[NOT_NULL:%.*]] = icmp ne i8 [[BASE]], [[OFFSET]]
 ; CHECK-NEXT:    call void @use1(i1 [[NOT_NULL]])
-; CHECK-NEXT:    [[R:%.*]] = icmp ugt i8 [[BASE]], [[OFFSET]]
+; CHECK-NEXT:    [[R:%.*]] = icmp ult i8 [[OFFSET]], [[BASE]]
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %adjusted = sub i8 %base, %offset
@@ -281,7 +281,7 @@ define i1 @t5_commutability2(i8 %base, i8 %offset) {
 ; CHECK-LABEL: @t5_commutability2(
 ; CHECK-NEXT:    [[ADJUSTED:%.*]] = sub i8 [[BASE:%.*]], [[OFFSET:%.*]]
 ; CHECK-NEXT:    call void @use8(i8 [[ADJUSTED]])
-; CHECK-NEXT:    [[NO_UNDERFLOW:%.*]] = icmp uge i8 [[BASE]], [[OFFSET]]
+; CHECK-NEXT:    [[NO_UNDERFLOW:%.*]] = icmp ule i8 [[OFFSET]], [[BASE]]
 ; CHECK-NEXT:    call void @use1(i1 [[NO_UNDERFLOW]])
 ; CHECK-NEXT:    [[NOT_NULL:%.*]] = icmp ne i8 [[BASE]], [[OFFSET]]
 ; CHECK-NEXT:    call void @use1(i1 [[NOT_NULL]])
@@ -302,7 +302,7 @@ define i1 @t5_commutability2_logical(i8 %base, i8 %offset) {
 ; CHECK-LABEL: @t5_commutability2_logical(
 ; CHECK-NEXT:    [[ADJUSTED:%.*]] = sub i8 [[BASE:%.*]], [[OFFSET:%.*]]
 ; CHECK-NEXT:    call void @use8(i8 [[ADJUSTED]])
-; CHECK-NEXT:    [[NO_UNDERFLOW:%.*]] = icmp uge i8 [[BASE]], [[OFFSET]]
+; CHECK-NEXT:    [[NO_UNDERFLOW:%.*]] = icmp ule i8 [[OFFSET]], [[BASE]]
 ; CHECK-NEXT:    call void @use1(i1 [[NO_UNDERFLOW]])
 ; CHECK-NEXT:    [[NOT_NULL:%.*]] = icmp ne i8 [[BASE]], [[OFFSET]]
 ; CHECK-NEXT:    call void @use1(i1 [[NOT_NULL]])
@@ -509,11 +509,11 @@ define i1 @t9_commutative(i8 %base, i8 %offset) {
 ; CHECK-LABEL: @t9_commutative(
 ; CHECK-NEXT:    [[ADJUSTED:%.*]] = sub i8 [[BASE:%.*]], [[OFFSET:%.*]]
 ; CHECK-NEXT:    call void @use8(i8 [[ADJUSTED]])
-; CHECK-NEXT:    [[UNDERFLOW:%.*]] = icmp ult i8 [[BASE]], [[OFFSET]]
+; CHECK-NEXT:    [[UNDERFLOW:%.*]] = icmp ugt i8 [[OFFSET]], [[BASE]]
 ; CHECK-NEXT:    call void @use1(i1 [[UNDERFLOW]])
 ; CHECK-NEXT:    [[NULL:%.*]] = icmp eq i8 [[BASE]], [[OFFSET]]
 ; CHECK-NEXT:    call void @use1(i1 [[NULL]])
-; CHECK-NEXT:    [[R:%.*]] = icmp ule i8 [[BASE]], [[OFFSET]]
+; CHECK-NEXT:    [[R:%.*]] = icmp uge i8 [[OFFSET]], [[BASE]]
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %adjusted = sub i8 %base, %offset
@@ -530,11 +530,11 @@ define i1 @t9_commutative_logical(i8 %base, i8 %offset) {
 ; CHECK-LABEL: @t9_commutative_logical(
 ; CHECK-NEXT:    [[ADJUSTED:%.*]] = sub i8 [[BASE:%.*]], [[OFFSET:%.*]]
 ; CHECK-NEXT:    call void @use8(i8 [[ADJUSTED]])
-; CHECK-NEXT:    [[UNDERFLOW:%.*]] = icmp ult i8 [[BASE]], [[OFFSET]]
+; CHECK-NEXT:    [[UNDERFLOW:%.*]] = icmp ugt i8 [[OFFSET]], [[BASE]]
 ; CHECK-NEXT:    call void @use1(i1 [[UNDERFLOW]])
 ; CHECK-NEXT:    [[NULL:%.*]] = icmp eq i8 [[BASE]], [[OFFSET]]
 ; CHECK-NEXT:    call void @use1(i1 [[NULL]])
-; CHECK-NEXT:    [[R:%.*]] = icmp ule i8 [[BASE]], [[OFFSET]]
+; CHECK-NEXT:    [[R:%.*]] = icmp uge i8 [[OFFSET]], [[BASE]]
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %adjusted = sub i8 %base, %offset
diff --git a/llvm/test/Transforms/InstCombine/strict-sub-underflow-check-to-comparison-of-sub-operands.ll b/llvm/test/Transforms/InstCombine/strict-sub-underflow-check-to-comparison-of-sub-operands.ll
index 28e02c6..d55d741 100644
--- a/llvm/test/Transforms/InstCombine/strict-sub-underflow-check-to-comparison-of-sub-operands.ll
+++ b/llvm/test/Transforms/InstCombine/strict-sub-underflow-check-to-comparison-of-sub-operands.ll
@@ -11,7 +11,7 @@ define i1 @t0(i8 %base, i8 %offset) {
 ; CHECK-NEXT:    call void @llvm.assume(i1 [[CMP]])
 ; CHECK-NEXT:    [[ADJUSTED:%.*]] = sub i8 [[BASE:%.*]], [[OFFSET]]
 ; CHECK-NEXT:    call void @use8(i8 [[ADJUSTED]])
-; CHECK-NEXT:    [[RES:%.*]] = icmp uge i8 [[BASE]], [[OFFSET]]
+; CHECK-NEXT:    [[RES:%.*]] = icmp ule i8 [[OFFSET]], [[BASE]]
 ; CHECK-NEXT:    ret i1 [[RES]]
 ;
   %cmp = icmp slt i8 %offset, 0
@@ -28,7 +28,7 @@ define i1 @t1(i8 %base, i8 %offset) {
 ; CHECK-NEXT:    call void @llvm.assume(i1 [[CMP]])
 ; CHECK-NEXT:    [[ADJUSTED:%.*]] = sub i8 [[BASE:%.*]], [[OFFSET]]
 ; CHECK-NEXT:    call void @use8(i8 [[ADJUSTED]])
-; CHECK-NEXT:    [[RES:%.*]] = icmp ult i8 [[BASE]], [[OFFSET]]
+; CHECK-NEXT:    [[RES:%.*]] = icmp ugt i8 [[OFFSET]], [[BASE]]
 ; CHECK-NEXT:    ret i1 [[RES]]
 ;
   %cmp = icmp slt i8 %offset, 0
diff --git a/llvm/test/Transforms/InstCombine/sub-ashr-or-to-icmp-select.ll b/llvm/test/Transforms/InstCombine/sub-ashr-or-to-icmp-select.ll
index b075673..0379f82 100644
--- a/llvm/test/Transforms/InstCombine/sub-ashr-or-to-icmp-select.ll
+++ b/llvm/test/Transforms/InstCombine/sub-ashr-or-to-icmp-select.ll
@@ -195,7 +195,7 @@ define i32 @sub_ashr_or_i32_extra_use_sub(i32 %x, i32 %y, ptr %p) {
 ; CHECK-LABEL: @sub_ashr_or_i32_extra_use_sub(
 ; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 [[Y:%.*]], [[X:%.*]]
 ; CHECK-NEXT:    store i32 [[SUB]], ptr [[P:%.*]], align 4
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i32 [[Y]], [[X]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i32 [[X]], [[Y]]
 ; CHECK-NEXT:    [[OR:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[X]]
 ; CHECK-NEXT:    ret i32 [[OR]]
 ;
diff --git a/llvm/test/Transforms/InstCombine/unsigned-sub-lack-of-overflow-check.ll b/llvm/test/Transforms/InstCombine/unsigned-sub-lack-of-overflow-check.ll
index 35b3fa3..500d61a 100644
--- a/llvm/test/Transforms/InstCombine/unsigned-sub-lack-of-overflow-check.ll
+++ b/llvm/test/Transforms/InstCombine/unsigned-sub-lack-of-overflow-check.ll
@@ -46,7 +46,7 @@ define i1 @t3_extrause0(i8 %x, i8 %y) {
 ; CHECK-LABEL: @t3_extrause0(
 ; CHECK-NEXT:    [[T0:%.*]] = sub i8 [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    call void @use8(i8 [[T0]])
-; CHECK-NEXT:    [[R:%.*]] = icmp uge i8 [[X]], [[Y]]
+; CHECK-NEXT:    [[R:%.*]] = icmp ule i8 [[Y]], [[X]]
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %t0 = sub i8 %x, %y
diff --git a/llvm/test/Transforms/InstCombine/unsigned-sub-overflow-check.ll b/llvm/test/Transforms/InstCombine/unsigned-sub-overflow-check.ll
index ddab069..5b27302 100644
--- a/llvm/test/Transforms/InstCombine/unsigned-sub-overflow-check.ll
+++ b/llvm/test/Transforms/InstCombine/unsigned-sub-overflow-check.ll
@@ -46,7 +46,7 @@ define i1 @t3_extrause0(i8 %x, i8 %y) {
 ; CHECK-LABEL: @t3_extrause0(
 ; CHECK-NEXT:    [[T0:%.*]] = sub i8 [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    call void @use8(i8 [[T0]])
-; CHECK-NEXT:    [[R:%.*]] = icmp ult i8 [[X]], [[Y]]
+; CHECK-NEXT:    [[R:%.*]] = icmp ugt i8 [[Y]], [[X]]
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %t0 = sub i8 %x, %y
-- 
2.7.4