From de506632aaf9722b270d4ff29b323da893a92800 Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Mon, 16 Jul 2018 12:44:10 +0000
Subject: [PATCH] [X86][AArch64][DAGCombine] Unfold 'check for [no] signed
 truncation' pattern

Summary:

[[ https://bugs.llvm.org/show_bug.cgi?id=38149 | PR38149 ]]

As discussed in https://reviews.llvm.org/D49179#1158957 and later,
the IR for 'check for [no] signed truncation' pattern can be improved:
https://rise4fun.com/Alive/gBf
^ that pattern will be produced by Implicit Integer Truncation sanitizer,
https://reviews.llvm.org/D48958 https://bugs.llvm.org/show_bug.cgi?id=21530
in signed case, therefore it is probably a good idea to improve it.

But the IR-optimal patter does not lower efficiently, so we want to undo it..

This handles the simple pattern.
There is a second pattern with predicate and constants inverted.

NOTE: we do not check uses here. we always do the transform.

Reviewers: spatel, craig.topper, RKSimon, javed.absar

Reviewed By: spatel

Subscribers: kristof.beyls, llvm-commits

Differential Revision: https://reviews.llvm.org/D49266

llvm-svn: 337166
---
 llvm/include/llvm/CodeGen/TargetLowering.h    | 18 ++++
 .../CodeGen/SelectionDAG/TargetLowering.cpp   | 78 +++++++++++++++
 llvm/lib/Target/AArch64/AArch64ISelLowering.h | 17 ++++
 llvm/lib/Target/X86/X86ISelLowering.h         | 18 ++++
 .../lack-of-signed-truncation-check.ll        | 34 +++----
 .../AArch64/signed-truncation-check.ll        | 36 ++++---
 .../X86/lack-of-signed-truncation-check.ll    | 97 ++++++++-----------
 .../CodeGen/X86/signed-truncation-check.ll    | 89 ++++++++---------
 8 files changed, 249 insertions(+), 138 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index e43f83497a62..d14c6410953d 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -519,6 +519,19 @@ public:
     return false;
   }
 
+  /// Should we tranform the IR-optimal check for whether given truncation
+  /// down into KeptBits would be truncating or not:
+  ///   (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
+  /// Into it's more traditional form:
+  ///   ((%x << C) a>> C) dstcond %x
+  /// Return true if we should transform.
+  /// Return false if there is no preference.
+  virtual bool shouldTransformSignedTruncationCheck(EVT XVT,
+                                                    unsigned KeptBits) const {
+    // By default, let's assume that no one prefers shifts.
+    return false;
+  }
+
   /// Return true if the target wants to use the optimization that
   /// turns ext(promotableInst1(...(promotableInstN(load)))) into
   /// promotedInst1(...(promotedInstN(ext(load)))).
@@ -3667,6 +3680,11 @@ private:
   SDValue simplifySetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
                                ISD::CondCode Cond, DAGCombinerInfo &DCI,
                                const SDLoc &DL) const;
+
+  SDValue optimizeSetCCOfSignedTruncationCheck(EVT SCCVT, SDValue N0,
+                                               SDValue N1, ISD::CondCode Cond,
+                                               DAGCombinerInfo &DCI,
+                                               const SDLoc &DL) const;
 };
 
 /// Given an LLVM IR type and return type attributes, compute the return value
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 76e3cd6b7534..fbe74f27b8fb 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -1850,6 +1850,80 @@ SDValue TargetLowering::simplifySetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
   return SDValue();
 }
 
+/// There are multiple IR patterns that could be checking whether certain
+/// truncation of a signed number would be lossy or not. The pattern which is
+/// best at IR level, may not lower optimally. Thus, we want to unfold it.
+/// We are looking for the following pattern: (KeptBits is a constant)
+///   (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
+/// KeptBits won't be bitwidth(x), that will be constant-folded to true/false.
+/// KeptBits also can't be 1, that would have been folded to  %x dstcond 0
+/// We will unfold it into the natural trunc+sext pattern:
+///   ((%x << C) a>> C) dstcond %x
+/// Where  C = bitwidth(x) - KeptBits  and  C u< bitwidth(x)
+SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(
+    EVT SCCVT, SDValue N0, SDValue N1, ISD::CondCode Cond, DAGCombinerInfo &DCI,
+    const SDLoc &DL) const {
+  ISD::CondCode NewCond;
+  if (Cond == ISD::CondCode::SETULT)
+    NewCond = ISD::CondCode::SETEQ;
+  else if (Cond == ISD::CondCode::SETUGE)
+    NewCond = ISD::CondCode::SETNE;
+  else
+    return SDValue();
+
+  // We must be comparing with a constant.
+  ConstantSDNode *C1;
+  if (!(C1 = dyn_cast<ConstantSDNode>(N1)))
+    return SDValue();
+
+  // N0 should be:  add %x, (1 << (KeptBits-1))
+  if (N0->getOpcode() != ISD::ADD)
+    return SDValue();
+
+  // And we must be 'add'ing a constant.
+  ConstantSDNode *C01;
+  if (!(C01 = dyn_cast<ConstantSDNode>(N0->getOperand(1))))
+    return SDValue();
+
+  SDValue X = N0->getOperand(0);
+  EVT XVT = X.getValueType();
+
+  // Validate constants ...
+
+  const APInt &I1 = C1->getAPIntValue();
+  const APInt &I01 = C01->getAPIntValue();
+  // Both of them must be power-of-two, and the constant from setcc is bigger.
+  if (!(I1.ugt(I01) && I1.isPowerOf2() && I01.isPowerOf2()))
+    return SDValue();
+
+  // They are power-of-two, so which bit is set?
+  const unsigned KeptBits = I1.logBase2();
+  const unsigned KeptBitsMinusOne = I01.logBase2();
+
+  // Magic!
+  if (KeptBits != (KeptBitsMinusOne + 1))
+    return SDValue();
+  assert(KeptBits > 0 && KeptBits < XVT.getSizeInBits() && "unreachable");
+
+  // We don't want to do this in every single case.
+  SelectionDAG &DAG = DCI.DAG;
+  if (!DAG.getTargetLoweringInfo().shouldTransformSignedTruncationCheck(
+          XVT, KeptBits))
+    return SDValue();
+
+  const unsigned MaskedBits = XVT.getSizeInBits() - KeptBits;
+  assert(MaskedBits > 0 && MaskedBits < XVT.getSizeInBits() && "unreachable");
+
+  // Unfold into:  ((%x << C) a>> C) cond %x
+  // Where 'cond' will be either 'eq' or 'ne'.
+  SDValue ShiftAmt = DAG.getConstant(MaskedBits, DL, XVT);
+  SDValue T0 = DAG.getNode(ISD::SHL, DL, XVT, X, ShiftAmt);
+  SDValue T1 = DAG.getNode(ISD::SRA, DL, XVT, T0, ShiftAmt);
+  SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, X, NewCond);
+
+  return T2;
+}
+
 /// Try to simplify a setcc built with the specified operands and cc. If it is
 /// unable to simplify it, return a null SDValue.
 SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
@@ -2246,6 +2320,10 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
                               Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
       }
     }
+
+    if (SDValue V =
+            optimizeSetCCOfSignedTruncationCheck(VT, N0, N1, Cond, DCI, dl))
+      return V;
   }
 
   // These simplifications apply to splat vectors as well.
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 8d883c14c2c4..592845640a44 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -457,6 +457,23 @@ public:
     return VT.getSizeInBits() >= 64; // vector 'bic'
   }
 
+  bool shouldTransformSignedTruncationCheck(EVT XVT,
+                                            unsigned KeptBits) const override {
+    // For vectors, we don't have a preference..
+    if (XVT.isVector())
+      return false;
+
+    auto VTIsOk = [](EVT VT) -> bool {
+      return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 ||
+             VT == MVT::i64;
+    };
+
+    // We are ok with KeptBitsVT being byte/word/dword, what SXT supports.
+    // XVT will be larger than KeptBitsVT.
+    MVT KeptBitsVT = MVT::getIntegerVT(KeptBits);
+    return VTIsOk(XVT) && VTIsOk(KeptBitsVT);
+  }
+
   bool hasBitPreservingFPLogic(EVT VT) const override {
     // FIXME: Is this always true? It should be true for vectors at least.
     return VT == MVT::f32 || VT == MVT::f64;
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index 37002939eb98..32215b170a8c 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -815,6 +815,24 @@ namespace llvm {
 
     bool preferShiftsToClearExtremeBits(SDValue Y) const override;
 
+    bool
+    shouldTransformSignedTruncationCheck(EVT XVT,
+                                         unsigned KeptBits) const override {
+      // For vectors, we don't have a preference..
+      if (XVT.isVector())
+        return false;
+
+      auto VTIsOk = [](EVT VT) -> bool {
+        return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 ||
+               VT == MVT::i64;
+      };
+
+      // We are ok with KeptBitsVT being byte/word/dword, what MOVS supports.
+      // XVT will be larger than KeptBitsVT.
+      MVT KeptBitsVT = MVT::getIntegerVT(KeptBits);
+      return VTIsOk(XVT) && VTIsOk(KeptBitsVT);
+    }
+
     bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
       return VT.isScalarInteger();
     }
diff --git a/llvm/test/CodeGen/AArch64/lack-of-signed-truncation-check.ll b/llvm/test/CodeGen/AArch64/lack-of-signed-truncation-check.ll
index 4e2e789d23a9..84b0ba15d0ae 100644
--- a/llvm/test/CodeGen/AArch64/lack-of-signed-truncation-check.ll
+++ b/llvm/test/CodeGen/AArch64/lack-of-signed-truncation-check.ll
@@ -183,10 +183,10 @@ define i1 @add_ultcmp_i64_i8(i64 %x) nounwind {
 define i1 @add_ugecmp_i16_i8(i16 %x) nounwind {
 ; CHECK-LABEL: add_ugecmp_i16_i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    add w8, w0, #128 // =128
+; CHECK-NEXT:    sxtb w8, w0
 ; CHECK-NEXT:    and w8, w8, #0xffff
-; CHECK-NEXT:    cmp w8, #255 // =255
-; CHECK-NEXT:    cset w0, hi
+; CHECK-NEXT:    cmp w8, w0, uxth
+; CHECK-NEXT:    cset w0, ne
 ; CHECK-NEXT:    ret
   %tmp0 = add i16 %x, 128 ; 1U << (8-1)
   %tmp1 = icmp uge i16 %tmp0, 256 ; 1U << 8
@@ -196,9 +196,8 @@ define i1 @add_ugecmp_i16_i8(i16 %x) nounwind {
 define i1 @add_ugecmp_i32_i16(i32 %x) nounwind {
 ; CHECK-LABEL: add_ugecmp_i32_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    add w8, w0, #8, lsl #12 // =32768
-; CHECK-NEXT:    lsr w8, w8, #16
-; CHECK-NEXT:    cmp w8, #0 // =0
+; CHECK-NEXT:    sxth w8, w0
+; CHECK-NEXT:    cmp w8, w0
 ; CHECK-NEXT:    cset w0, ne
 ; CHECK-NEXT:    ret
   %tmp0 = add i32 %x, 32768 ; 1U << (16-1)
@@ -209,9 +208,9 @@ define i1 @add_ugecmp_i32_i16(i32 %x) nounwind {
 define i1 @add_ugecmp_i32_i8(i32 %x) nounwind {
 ; CHECK-LABEL: add_ugecmp_i32_i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    add w8, w0, #128 // =128
-; CHECK-NEXT:    cmp w8, #255 // =255
-; CHECK-NEXT:    cset w0, hi
+; CHECK-NEXT:    sxtb w8, w0
+; CHECK-NEXT:    cmp w8, w0
+; CHECK-NEXT:    cset w0, ne
 ; CHECK-NEXT:    ret
   %tmp0 = add i32 %x, 128 ; 1U << (8-1)
   %tmp1 = icmp uge i32 %tmp0, 256 ; 1U << 8
@@ -221,10 +220,8 @@ define i1 @add_ugecmp_i32_i8(i32 %x) nounwind {
 define i1 @add_ugecmp_i64_i32(i64 %x) nounwind {
 ; CHECK-LABEL: add_ugecmp_i64_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    orr w8, wzr, #0x80000000
-; CHECK-NEXT:    add x8, x0, x8
-; CHECK-NEXT:    lsr x8, x8, #32
-; CHECK-NEXT:    cmp x8, #0 // =0
+; CHECK-NEXT:    sxtw x8, w0
+; CHECK-NEXT:    cmp x8, x0
 ; CHECK-NEXT:    cset w0, ne
 ; CHECK-NEXT:    ret
   %tmp0 = add i64 %x, 2147483648 ; 1U << (32-1)
@@ -235,9 +232,8 @@ define i1 @add_ugecmp_i64_i32(i64 %x) nounwind {
 define i1 @add_ugecmp_i64_i16(i64 %x) nounwind {
 ; CHECK-LABEL: add_ugecmp_i64_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    add x8, x0, #8, lsl #12 // =32768
-; CHECK-NEXT:    lsr x8, x8, #16
-; CHECK-NEXT:    cmp x8, #0 // =0
+; CHECK-NEXT:    sxth x8, w0
+; CHECK-NEXT:    cmp x8, x0
 ; CHECK-NEXT:    cset w0, ne
 ; CHECK-NEXT:    ret
   %tmp0 = add i64 %x, 32768 ; 1U << (16-1)
@@ -248,9 +244,9 @@ define i1 @add_ugecmp_i64_i16(i64 %x) nounwind {
 define i1 @add_ugecmp_i64_i8(i64 %x) nounwind {
 ; CHECK-LABEL: add_ugecmp_i64_i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    add x8, x0, #128 // =128
-; CHECK-NEXT:    cmp x8, #255 // =255
-; CHECK-NEXT:    cset w0, hi
+; CHECK-NEXT:    sxtb x8, w0
+; CHECK-NEXT:    cmp x8, x0
+; CHECK-NEXT:    cset w0, ne
 ; CHECK-NEXT:    ret
   %tmp0 = add i64 %x, 128 ; 1U << (8-1)
   %tmp1 = icmp uge i64 %tmp0, 256 ; 1U << 8
diff --git a/llvm/test/CodeGen/AArch64/signed-truncation-check.ll b/llvm/test/CodeGen/AArch64/signed-truncation-check.ll
index 1c26d1ae8407..7c8627580da8 100644
--- a/llvm/test/CodeGen/AArch64/signed-truncation-check.ll
+++ b/llvm/test/CodeGen/AArch64/signed-truncation-check.ll
@@ -185,10 +185,10 @@ define i1 @add_ugecmp_i64_i8(i64 %x) nounwind {
 define i1 @add_ultcmp_i16_i8(i16 %x) nounwind {
 ; CHECK-LABEL: add_ultcmp_i16_i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    add w8, w0, #128 // =128
+; CHECK-NEXT:    sxtb w8, w0
 ; CHECK-NEXT:    and w8, w8, #0xffff
-; CHECK-NEXT:    cmp w8, #256 // =256
-; CHECK-NEXT:    cset w0, lo
+; CHECK-NEXT:    cmp w8, w0, uxth
+; CHECK-NEXT:    cset w0, eq
 ; CHECK-NEXT:    ret
   %tmp0 = add i16 %x, 128 ; 1U << (8-1)
   %tmp1 = icmp ult i16 %tmp0, 256 ; 1U << 8
@@ -198,9 +198,9 @@ define i1 @add_ultcmp_i16_i8(i16 %x) nounwind {
 define i1 @add_ultcmp_i32_i16(i32 %x) nounwind {
 ; CHECK-LABEL: add_ultcmp_i32_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    add w8, w0, #8, lsl #12 // =32768
-; CHECK-NEXT:    cmp w8, #16, lsl #12 // =65536
-; CHECK-NEXT:    cset w0, lo
+; CHECK-NEXT:    sxth w8, w0
+; CHECK-NEXT:    cmp w8, w0
+; CHECK-NEXT:    cset w0, eq
 ; CHECK-NEXT:    ret
   %tmp0 = add i32 %x, 32768 ; 1U << (16-1)
   %tmp1 = icmp ult i32 %tmp0, 65536 ; 1U << 16
@@ -210,9 +210,9 @@ define i1 @add_ultcmp_i32_i16(i32 %x) nounwind {
 define i1 @add_ultcmp_i32_i8(i32 %x) nounwind {
 ; CHECK-LABEL: add_ultcmp_i32_i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    add w8, w0, #128 // =128
-; CHECK-NEXT:    cmp w8, #256 // =256
-; CHECK-NEXT:    cset w0, lo
+; CHECK-NEXT:    sxtb w8, w0
+; CHECK-NEXT:    cmp w8, w0
+; CHECK-NEXT:    cset w0, eq
 ; CHECK-NEXT:    ret
   %tmp0 = add i32 %x, 128 ; 1U << (8-1)
   %tmp1 = icmp ult i32 %tmp0, 256 ; 1U << 8
@@ -222,10 +222,8 @@ define i1 @add_ultcmp_i32_i8(i32 %x) nounwind {
 define i1 @add_ultcmp_i64_i32(i64 %x) nounwind {
 ; CHECK-LABEL: add_ultcmp_i64_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    orr w8, wzr, #0x80000000
-; CHECK-NEXT:    add x8, x0, x8
-; CHECK-NEXT:    lsr x8, x8, #32
-; CHECK-NEXT:    cmp x8, #0 // =0
+; CHECK-NEXT:    sxtw x8, w0
+; CHECK-NEXT:    cmp x8, x0
 ; CHECK-NEXT:    cset w0, eq
 ; CHECK-NEXT:    ret
   %tmp0 = add i64 %x, 2147483648 ; 1U << (32-1)
@@ -236,9 +234,9 @@ define i1 @add_ultcmp_i64_i32(i64 %x) nounwind {
 define i1 @add_ultcmp_i64_i16(i64 %x) nounwind {
 ; CHECK-LABEL: add_ultcmp_i64_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    add x8, x0, #8, lsl #12 // =32768
-; CHECK-NEXT:    cmp x8, #16, lsl #12 // =65536
-; CHECK-NEXT:    cset w0, lo
+; CHECK-NEXT:    sxth x8, w0
+; CHECK-NEXT:    cmp x8, x0
+; CHECK-NEXT:    cset w0, eq
 ; CHECK-NEXT:    ret
   %tmp0 = add i64 %x, 32768 ; 1U << (16-1)
   %tmp1 = icmp ult i64 %tmp0, 65536 ; 1U << 16
@@ -248,9 +246,9 @@ define i1 @add_ultcmp_i64_i16(i64 %x) nounwind {
 define i1 @add_ultcmp_i64_i8(i64 %x) nounwind {
 ; CHECK-LABEL: add_ultcmp_i64_i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    add x8, x0, #128 // =128
-; CHECK-NEXT:    cmp x8, #256 // =256
-; CHECK-NEXT:    cset w0, lo
+; CHECK-NEXT:    sxtb x8, w0
+; CHECK-NEXT:    cmp x8, x0
+; CHECK-NEXT:    cset w0, eq
 ; CHECK-NEXT:    ret
   %tmp0 = add i64 %x, 128 ; 1U << (8-1)
   %tmp1 = icmp ult i64 %tmp0, 256 ; 1U << 8
diff --git a/llvm/test/CodeGen/X86/lack-of-signed-truncation-check.ll b/llvm/test/CodeGen/X86/lack-of-signed-truncation-check.ll
index cc876f3fe202..397b7f98f1d5 100644
--- a/llvm/test/CodeGen/X86/lack-of-signed-truncation-check.ll
+++ b/llvm/test/CodeGen/X86/lack-of-signed-truncation-check.ll
@@ -295,19 +295,17 @@ define i1 @add_ultcmp_i64_i8(i64 %x) nounwind {
 define i1 @add_ugecmp_i16_i8(i16 %x) nounwind {
 ; X86-LABEL: add_ugecmp_i16_i8:
 ; X86:       # %bb.0:
-; X86-NEXT:    movl $128, %eax
-; X86-NEXT:    addl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movzwl %ax, %eax
-; X86-NEXT:    cmpl $255, %eax
-; X86-NEXT:    seta %al
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movsbl %al, %ecx
+; X86-NEXT:    cmpw %ax, %cx
+; X86-NEXT:    setne %al
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: add_ugecmp_i16_i8:
 ; X64:       # %bb.0:
-; X64-NEXT:    subl $-128, %edi
-; X64-NEXT:    movzwl %di, %eax
-; X64-NEXT:    cmpl $255, %eax
-; X64-NEXT:    seta %al
+; X64-NEXT:    movsbl %dil, %eax
+; X64-NEXT:    cmpw %di, %ax
+; X64-NEXT:    setne %al
 ; X64-NEXT:    retq
   %tmp0 = add i16 %x, 128 ; 1U << (8-1)
   %tmp1 = icmp uge i16 %tmp0, 256 ; 1U << 8
@@ -317,17 +315,17 @@ define i1 @add_ugecmp_i16_i8(i16 %x) nounwind {
 define i1 @add_ugecmp_i32_i16(i32 %x) nounwind {
 ; X86-LABEL: add_ugecmp_i32_i16:
 ; X86:       # %bb.0:
-; X86-NEXT:    movl $32768, %eax # imm = 0x8000
-; X86-NEXT:    addl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    cmpl $65535, %eax # imm = 0xFFFF
-; X86-NEXT:    seta %al
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movswl %ax, %ecx
+; X86-NEXT:    cmpl %eax, %ecx
+; X86-NEXT:    setne %al
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: add_ugecmp_i32_i16:
 ; X64:       # %bb.0:
-; X64-NEXT:    addl $32768, %edi # imm = 0x8000
-; X64-NEXT:    cmpl $65535, %edi # imm = 0xFFFF
-; X64-NEXT:    seta %al
+; X64-NEXT:    movswl %di, %eax
+; X64-NEXT:    cmpl %edi, %eax
+; X64-NEXT:    setne %al
 ; X64-NEXT:    retq
   %tmp0 = add i32 %x, 32768 ; 1U << (16-1)
   %tmp1 = icmp uge i32 %tmp0, 65536 ; 1U << 16
@@ -337,17 +335,17 @@ define i1 @add_ugecmp_i32_i16(i32 %x) nounwind {
 define i1 @add_ugecmp_i32_i8(i32 %x) nounwind {
 ; X86-LABEL: add_ugecmp_i32_i8:
 ; X86:       # %bb.0:
-; X86-NEXT:    movl $128, %eax
-; X86-NEXT:    addl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    cmpl $255, %eax
-; X86-NEXT:    seta %al
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movsbl %al, %ecx
+; X86-NEXT:    cmpl %eax, %ecx
+; X86-NEXT:    setne %al
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: add_ugecmp_i32_i8:
 ; X64:       # %bb.0:
-; X64-NEXT:    subl $-128, %edi
-; X64-NEXT:    cmpl $255, %edi
-; X64-NEXT:    seta %al
+; X64-NEXT:    movsbl %dil, %eax
+; X64-NEXT:    cmpl %edi, %eax
+; X64-NEXT:    setne %al
 ; X64-NEXT:    retq
   %tmp0 = add i32 %x, 128 ; 1U << (8-1)
   %tmp1 = icmp uge i32 %tmp0, 256 ; 1U << 8
@@ -358,16 +356,15 @@ define i1 @add_ugecmp_i64_i32(i64 %x) nounwind {
 ; X86-LABEL: add_ugecmp_i64_i32:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movl $-2147483648, %ecx # imm = 0x80000000
-; X86-NEXT:    addl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    adcl $0, %eax
+; X86-NEXT:    sarl $31, %eax
+; X86-NEXT:    xorl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    setne %al
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: add_ugecmp_i64_i32:
 ; X64:       # %bb.0:
-; X64-NEXT:    subq $-2147483648, %rdi # imm = 0x80000000
-; X64-NEXT:    shrq $32, %rdi
+; X64-NEXT:    movslq %edi, %rax
+; X64-NEXT:    cmpq %rdi, %rax
 ; X64-NEXT:    setne %al
 ; X64-NEXT:    retq
   %tmp0 = add i64 %x, 2147483648 ; 1U << (32-1)
@@ -378,24 +375,20 @@ define i1 @add_ugecmp_i64_i32(i64 %x) nounwind {
 define i1 @add_ugecmp_i64_i16(i64 %x) nounwind {
 ; X86-LABEL: add_ugecmp_i64_i16:
 ; X86:       # %bb.0:
-; X86-NEXT:    pushl %esi
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movl $32768, %ecx # imm = 0x8000
-; X86-NEXT:    addl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    adcl $0, %eax
-; X86-NEXT:    xorl %edx, %edx
-; X86-NEXT:    movl $65535, %esi # imm = 0xFFFF
-; X86-NEXT:    cmpl %ecx, %esi
-; X86-NEXT:    sbbl %eax, %edx
-; X86-NEXT:    setb %al
-; X86-NEXT:    popl %esi
+; X86-NEXT:    movswl %ax, %ecx
+; X86-NEXT:    xorl %ecx, %eax
+; X86-NEXT:    sarl $31, %ecx
+; X86-NEXT:    xorl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    orl %eax, %ecx
+; X86-NEXT:    setne %al
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: add_ugecmp_i64_i16:
 ; X64:       # %bb.0:
-; X64-NEXT:    addq $32768, %rdi # imm = 0x8000
-; X64-NEXT:    cmpq $65535, %rdi # imm = 0xFFFF
-; X64-NEXT:    seta %al
+; X64-NEXT:    movswq %di, %rax
+; X64-NEXT:    cmpq %rdi, %rax
+; X64-NEXT:    setne %al
 ; X64-NEXT:    retq
   %tmp0 = add i64 %x, 32768 ; 1U << (16-1)
   %tmp1 = icmp uge i64 %tmp0, 65536 ; 1U << 16
@@ -405,24 +398,20 @@ define i1 @add_ugecmp_i64_i16(i64 %x) nounwind {
 define i1 @add_ugecmp_i64_i8(i64 %x) nounwind {
 ; X86-LABEL: add_ugecmp_i64_i8:
 ; X86:       # %bb.0:
-; X86-NEXT:    pushl %esi
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movl $128, %ecx
-; X86-NEXT:    addl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    adcl $0, %eax
-; X86-NEXT:    xorl %edx, %edx
-; X86-NEXT:    movl $255, %esi
-; X86-NEXT:    cmpl %ecx, %esi
-; X86-NEXT:    sbbl %eax, %edx
-; X86-NEXT:    setb %al
-; X86-NEXT:    popl %esi
+; X86-NEXT:    movsbl %al, %ecx
+; X86-NEXT:    xorl %ecx, %eax
+; X86-NEXT:    sarl $31, %ecx
+; X86-NEXT:    xorl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    orl %eax, %ecx
+; X86-NEXT:    setne %al
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: add_ugecmp_i64_i8:
 ; X64:       # %bb.0:
-; X64-NEXT:    subq $-128, %rdi
-; X64-NEXT:    cmpq $255, %rdi
-; X64-NEXT:    seta %al
+; X64-NEXT:    movsbq %dil, %rax
+; X64-NEXT:    cmpq %rdi, %rax
+; X64-NEXT:    setne %al
 ; X64-NEXT:    retq
   %tmp0 = add i64 %x, 128 ; 1U << (8-1)
   %tmp1 = icmp uge i64 %tmp0, 256 ; 1U << 8
diff --git a/llvm/test/CodeGen/X86/signed-truncation-check.ll b/llvm/test/CodeGen/X86/signed-truncation-check.ll
index 91cdef2a75bb..f18307fbbc74 100644
--- a/llvm/test/CodeGen/X86/signed-truncation-check.ll
+++ b/llvm/test/CodeGen/X86/signed-truncation-check.ll
@@ -299,19 +299,17 @@ define i1 @add_ugecmp_i64_i8(i64 %x) nounwind {
 define i1 @add_ultcmp_i16_i8(i16 %x) nounwind {
 ; X86-LABEL: add_ultcmp_i16_i8:
 ; X86:       # %bb.0:
-; X86-NEXT:    movl $128, %eax
-; X86-NEXT:    addl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movzwl %ax, %eax
-; X86-NEXT:    cmpl $256, %eax # imm = 0x100
-; X86-NEXT:    setb %al
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movsbl %al, %ecx
+; X86-NEXT:    cmpw %ax, %cx
+; X86-NEXT:    sete %al
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: add_ultcmp_i16_i8:
 ; X64:       # %bb.0:
-; X64-NEXT:    subl $-128, %edi
-; X64-NEXT:    movzwl %di, %eax
-; X64-NEXT:    cmpl $256, %eax # imm = 0x100
-; X64-NEXT:    setb %al
+; X64-NEXT:    movsbl %dil, %eax
+; X64-NEXT:    cmpw %di, %ax
+; X64-NEXT:    sete %al
 ; X64-NEXT:    retq
   %tmp0 = add i16 %x, 128 ; 1U << (8-1)
   %tmp1 = icmp ult i16 %tmp0, 256 ; 1U << 8
@@ -321,17 +319,17 @@ define i1 @add_ultcmp_i16_i8(i16 %x) nounwind {
 define i1 @add_ultcmp_i32_i16(i32 %x) nounwind {
 ; X86-LABEL: add_ultcmp_i32_i16:
 ; X86:       # %bb.0:
-; X86-NEXT:    movl $32768, %eax # imm = 0x8000
-; X86-NEXT:    addl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    cmpl $65536, %eax # imm = 0x10000
-; X86-NEXT:    setb %al
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movswl %ax, %ecx
+; X86-NEXT:    cmpl %eax, %ecx
+; X86-NEXT:    sete %al
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: add_ultcmp_i32_i16:
 ; X64:       # %bb.0:
-; X64-NEXT:    addl $32768, %edi # imm = 0x8000
-; X64-NEXT:    cmpl $65536, %edi # imm = 0x10000
-; X64-NEXT:    setb %al
+; X64-NEXT:    movswl %di, %eax
+; X64-NEXT:    cmpl %edi, %eax
+; X64-NEXT:    sete %al
 ; X64-NEXT:    retq
   %tmp0 = add i32 %x, 32768 ; 1U << (16-1)
   %tmp1 = icmp ult i32 %tmp0, 65536 ; 1U << 16
@@ -341,17 +339,17 @@ define i1 @add_ultcmp_i32_i16(i32 %x) nounwind {
 define i1 @add_ultcmp_i32_i8(i32 %x) nounwind {
 ; X86-LABEL: add_ultcmp_i32_i8:
 ; X86:       # %bb.0:
-; X86-NEXT:    movl $128, %eax
-; X86-NEXT:    addl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    cmpl $256, %eax # imm = 0x100
-; X86-NEXT:    setb %al
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movsbl %al, %ecx
+; X86-NEXT:    cmpl %eax, %ecx
+; X86-NEXT:    sete %al
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: add_ultcmp_i32_i8:
 ; X64:       # %bb.0:
-; X64-NEXT:    subl $-128, %edi
-; X64-NEXT:    cmpl $256, %edi # imm = 0x100
-; X64-NEXT:    setb %al
+; X64-NEXT:    movsbl %dil, %eax
+; X64-NEXT:    cmpl %edi, %eax
+; X64-NEXT:    sete %al
 ; X64-NEXT:    retq
   %tmp0 = add i32 %x, 128 ; 1U << (8-1)
   %tmp1 = icmp ult i32 %tmp0, 256 ; 1U << 8
@@ -362,16 +360,15 @@ define i1 @add_ultcmp_i64_i32(i64 %x) nounwind {
 ; X86-LABEL: add_ultcmp_i64_i32:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movl $-2147483648, %ecx # imm = 0x80000000
-; X86-NEXT:    addl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    adcl $0, %eax
+; X86-NEXT:    sarl $31, %eax
+; X86-NEXT:    xorl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    sete %al
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: add_ultcmp_i64_i32:
 ; X64:       # %bb.0:
-; X64-NEXT:    subq $-2147483648, %rdi # imm = 0x80000000
-; X64-NEXT:    shrq $32, %rdi
+; X64-NEXT:    movslq %edi, %rax
+; X64-NEXT:    cmpq %rdi, %rax
 ; X64-NEXT:    sete %al
 ; X64-NEXT:    retq
   %tmp0 = add i64 %x, 2147483648 ; 1U << (32-1)
@@ -383,19 +380,19 @@ define i1 @add_ultcmp_i64_i16(i64 %x) nounwind {
 ; X86-LABEL: add_ultcmp_i64_i16:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movl $32768, %ecx # imm = 0x8000
-; X86-NEXT:    addl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    adcl $0, %eax
-; X86-NEXT:    cmpl $65536, %ecx # imm = 0x10000
-; X86-NEXT:    sbbl $0, %eax
-; X86-NEXT:    setb %al
+; X86-NEXT:    movswl %ax, %ecx
+; X86-NEXT:    xorl %ecx, %eax
+; X86-NEXT:    sarl $31, %ecx
+; X86-NEXT:    xorl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    orl %eax, %ecx
+; X86-NEXT:    sete %al
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: add_ultcmp_i64_i16:
 ; X64:       # %bb.0:
-; X64-NEXT:    addq $32768, %rdi # imm = 0x8000
-; X64-NEXT:    cmpq $65536, %rdi # imm = 0x10000
-; X64-NEXT:    setb %al
+; X64-NEXT:    movswq %di, %rax
+; X64-NEXT:    cmpq %rdi, %rax
+; X64-NEXT:    sete %al
 ; X64-NEXT:    retq
   %tmp0 = add i64 %x, 32768 ; 1U << (16-1)
   %tmp1 = icmp ult i64 %tmp0, 65536 ; 1U << 16
@@ -406,19 +403,19 @@ define i1 @add_ultcmp_i64_i8(i64 %x) nounwind {
 ; X86-LABEL: add_ultcmp_i64_i8:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movl $128, %ecx
-; X86-NEXT:    addl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    adcl $0, %eax
-; X86-NEXT:    cmpl $256, %ecx # imm = 0x100
-; X86-NEXT:    sbbl $0, %eax
-; X86-NEXT:    setb %al
+; X86-NEXT:    movsbl %al, %ecx
+; X86-NEXT:    xorl %ecx, %eax
+; X86-NEXT:    sarl $31, %ecx
+; X86-NEXT:    xorl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    orl %eax, %ecx
+; X86-NEXT:    sete %al
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: add_ultcmp_i64_i8:
 ; X64:       # %bb.0:
-; X64-NEXT:    subq $-128, %rdi
-; X64-NEXT:    cmpq $256, %rdi # imm = 0x100
-; X64-NEXT:    setb %al
+; X64-NEXT:    movsbq %dil, %rax
+; X64-NEXT:    cmpq %rdi, %rax
+; X64-NEXT:    sete %al
 ; X64-NEXT:    retq
   %tmp0 = add i64 %x, 128 ; 1U << (8-1)
   %tmp1 = icmp ult i64 %tmp0, 256 ; 1U << 8
-- 
2.34.1