[X86][AArch64][DAGCombine] Unfold 'check for [no] signed truncation' pattern

author Roman Lebedev <lebedev.ri@gmail.com>

Mon, 16 Jul 2018 12:44:10 +0000 (12:44 +0000)

committer Roman Lebedev <lebedev.ri@gmail.com>

Mon, 16 Jul 2018 12:44:10 +0000 (12:44 +0000)
author Roman Lebedev <lebedev.ri@gmail.com>
Mon, 16 Jul 2018 12:44:10 +0000 (12:44 +0000)
committer Roman Lebedev <lebedev.ri@gmail.com>
Mon, 16 Jul 2018 12:44:10 +0000 (12:44 +0000)
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h

index e43f83497a6213dcb22d81c28307a39c86af6dd6..d14c6410953de9c0ceda29c229572dfb53c6da98 100644 (file)
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -519,6 +519,19 @@ public:
      return false;
    }
  
+  /// Should we tranform the IR-optimal check for whether given truncation
+  /// down into KeptBits would be truncating or not:
+  ///   (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
+  /// Into it's more traditional form:
+  ///   ((%x << C) a>> C) dstcond %x
+  /// Return true if we should transform.
+  /// Return false if there is no preference.
+  virtual bool shouldTransformSignedTruncationCheck(EVT XVT,
+                                                    unsigned KeptBits) const {
+    // By default, let's assume that no one prefers shifts.
+    return false;
+  }
+
    /// Return true if the target wants to use the optimization that
    /// turns ext(promotableInst1(...(promotableInstN(load)))) into
    /// promotedInst1(...(promotedInstN(ext(load)))).
@@ -3667,6 +3680,11 @@ private:
    SDValue simplifySetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
                                 ISD::CondCode Cond, DAGCombinerInfo &DCI,
                                 const SDLoc &DL) const;
+
+  SDValue optimizeSetCCOfSignedTruncationCheck(EVT SCCVT, SDValue N0,
+                                               SDValue N1, ISD::CondCode Cond,
+                                               DAGCombinerInfo &DCI,
+                                               const SDLoc &DL) const;
  };
  
  /// Given an LLVM IR type and return type attributes, compute the return value
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

index 76e3cd6b753474d5489c52c80136328682b91d36..fbe74f27b8fb1673fce0c50c271e092c69febdda 100644 (file)
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -1850,6 +1850,80 @@ SDValue TargetLowering::simplifySetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
    return SDValue();
  }
  
+/// There are multiple IR patterns that could be checking whether certain
+/// truncation of a signed number would be lossy or not. The pattern which is
+/// best at IR level, may not lower optimally. Thus, we want to unfold it.
+/// We are looking for the following pattern: (KeptBits is a constant)
+///   (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
+/// KeptBits won't be bitwidth(x), that will be constant-folded to true/false.
+/// KeptBits also can't be 1, that would have been folded to  %x dstcond 0
+/// We will unfold it into the natural trunc+sext pattern:
+///   ((%x << C) a>> C) dstcond %x
+/// Where  C = bitwidth(x) - KeptBits  and  C u< bitwidth(x)
+SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(
+    EVT SCCVT, SDValue N0, SDValue N1, ISD::CondCode Cond, DAGCombinerInfo &DCI,
+    const SDLoc &DL) const {
+  ISD::CondCode NewCond;
+  if (Cond == ISD::CondCode::SETULT)
+    NewCond = ISD::CondCode::SETEQ;
+  else if (Cond == ISD::CondCode::SETUGE)
+    NewCond = ISD::CondCode::SETNE;
+  else
+    return SDValue();
+
+  // We must be comparing with a constant.
+  ConstantSDNode *C1;
+  if (!(C1 = dyn_cast<ConstantSDNode>(N1)))
+    return SDValue();
+
+  // N0 should be:  add %x, (1 << (KeptBits-1))
+  if (N0->getOpcode() != ISD::ADD)
+    return SDValue();
+
+  // And we must be 'add'ing a constant.
+  ConstantSDNode *C01;
+  if (!(C01 = dyn_cast<ConstantSDNode>(N0->getOperand(1))))
+    return SDValue();
+
+  SDValue X = N0->getOperand(0);
+  EVT XVT = X.getValueType();
+
+  // Validate constants ...
+
+  const APInt &I1 = C1->getAPIntValue();
+  const APInt &I01 = C01->getAPIntValue();
+  // Both of them must be power-of-two, and the constant from setcc is bigger.
+  if (!(I1.ugt(I01) && I1.isPowerOf2() && I01.isPowerOf2()))
+    return SDValue();
+
+  // They are power-of-two, so which bit is set?
+  const unsigned KeptBits = I1.logBase2();
+  const unsigned KeptBitsMinusOne = I01.logBase2();
+
+  // Magic!
+  if (KeptBits != (KeptBitsMinusOne + 1))
+    return SDValue();
+  assert(KeptBits > 0 && KeptBits < XVT.getSizeInBits() && "unreachable");
+
+  // We don't want to do this in every single case.
+  SelectionDAG &DAG = DCI.DAG;
+  if (!DAG.getTargetLoweringInfo().shouldTransformSignedTruncationCheck(
+          XVT, KeptBits))
+    return SDValue();
+
+  const unsigned MaskedBits = XVT.getSizeInBits() - KeptBits;
+  assert(MaskedBits > 0 && MaskedBits < XVT.getSizeInBits() && "unreachable");
+
+  // Unfold into:  ((%x << C) a>> C) cond %x
+  // Where 'cond' will be either 'eq' or 'ne'.
+  SDValue ShiftAmt = DAG.getConstant(MaskedBits, DL, XVT);
+  SDValue T0 = DAG.getNode(ISD::SHL, DL, XVT, X, ShiftAmt);
+  SDValue T1 = DAG.getNode(ISD::SRA, DL, XVT, T0, ShiftAmt);
+  SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, X, NewCond);
+
+  return T2;
+}
+
  /// Try to simplify a setcc built with the specified operands and cc. If it is
  /// unable to simplify it, return a null SDValue.
  SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
@@ -2246,6 +2320,10 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
                                Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
        }
      }
+
+    if (SDValue V =
+            optimizeSetCCOfSignedTruncationCheck(VT, N0, N1, Cond, DCI, dl))
+      return V;
    }
  
    // These simplifications apply to splat vectors as well.
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h

index 8d883c14c2c45670d6830dac334403e15ce9bc5b..592845640a4463c0e6b9934e5b43357861a2609d 100644 (file)
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -457,6 +457,23 @@ public:
      return VT.getSizeInBits() >= 64; // vector 'bic'
    }
  
+  bool shouldTransformSignedTruncationCheck(EVT XVT,
+                                            unsigned KeptBits) const override {
+    // For vectors, we don't have a preference..
+    if (XVT.isVector())
+      return false;
+
+    auto VTIsOk = [](EVT VT) -> bool {
+      return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 ||
+             VT == MVT::i64;
+    };
+
+    // We are ok with KeptBitsVT being byte/word/dword, what SXT supports.
+    // XVT will be larger than KeptBitsVT.
+    MVT KeptBitsVT = MVT::getIntegerVT(KeptBits);
+    return VTIsOk(XVT) && VTIsOk(KeptBitsVT);
+  }
+
    bool hasBitPreservingFPLogic(EVT VT) const override {
      // FIXME: Is this always true? It should be true for vectors at least.
      return VT == MVT::f32 || VT == MVT::f64;
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h

index 37002939eb983f0155fe787b5ccf7a77a767b170..32215b170a8cba13b6ac4416fb5622c71da6502c 100644 (file)
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -815,6 +815,24 @@ namespace llvm {
  
      bool preferShiftsToClearExtremeBits(SDValue Y) const override;
  
+    bool
+    shouldTransformSignedTruncationCheck(EVT XVT,
+                                         unsigned KeptBits) const override {
+      // For vectors, we don't have a preference..
+      if (XVT.isVector())
+        return false;
+
+      auto VTIsOk = [](EVT VT) -> bool {
+        return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 ||
+               VT == MVT::i64;
+      };
+
+      // We are ok with KeptBitsVT being byte/word/dword, what MOVS supports.
+      // XVT will be larger than KeptBitsVT.
+      MVT KeptBitsVT = MVT::getIntegerVT(KeptBits);
+      return VTIsOk(XVT) && VTIsOk(KeptBitsVT);
+    }
+
      bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
        return VT.isScalarInteger();
      }
diff --git a/llvm/test/CodeGen/AArch64/lack-of-signed-truncation-check.ll b/llvm/test/CodeGen/AArch64/lack-of-signed-truncation-check.ll

index 4e2e789d23a9084c3201dff97abfcf5b7f80214e..84b0ba15d0aeee6071af10092e9fc58931fc27f5 100644 (file)
--- a/llvm/test/CodeGen/AArch64/lack-of-signed-truncation-check.ll
+++ b/llvm/test/CodeGen/AArch64/lack-of-signed-truncation-check.ll
@@ -183,10 +183,10 @@ define i1 @add_ultcmp_i64_i8(i64 %x) nounwind {
  define i1 @add_ugecmp_i16_i8(i16 %x) nounwind {
  ; CHECK-LABEL: add_ugecmp_i16_i8:
  ; CHECK:       // %bb.0:
-; CHECK-NEXT:    add w8, w0, #128 // =128
+; CHECK-NEXT:    sxtb w8, w0
  ; CHECK-NEXT:    and w8, w8, #0xffff
-; CHECK-NEXT:    cmp w8, #255 // =255
-; CHECK-NEXT:    cset w0, hi
+; CHECK-NEXT:    cmp w8, w0, uxth
+; CHECK-NEXT:    cset w0, ne
  ; CHECK-NEXT:    ret
    %tmp0 = add i16 %x, 128 ; 1U << (8-1)
    %tmp1 = icmp uge i16 %tmp0, 256 ; 1U << 8
@@ -196,9 +196,8 @@ define i1 @add_ugecmp_i16_i8(i16 %x) nounwind {
  define i1 @add_ugecmp_i32_i16(i32 %x) nounwind {
  ; CHECK-LABEL: add_ugecmp_i32_i16:
  ; CHECK:       // %bb.0:
-; CHECK-NEXT:    add w8, w0, #8, lsl #12 // =32768
-; CHECK-NEXT:    lsr w8, w8, #16
-; CHECK-NEXT:    cmp w8, #0 // =0
+; CHECK-NEXT:    sxth w8, w0
+; CHECK-NEXT:    cmp w8, w0
  ; CHECK-NEXT:    cset w0, ne
  ; CHECK-NEXT:    ret
    %tmp0 = add i32 %x, 32768 ; 1U << (16-1)
@@ -209,9 +208,9 @@ define i1 @add_ugecmp_i32_i16(i32 %x) nounwind {
  define i1 @add_ugecmp_i32_i8(i32 %x) nounwind {
  ; CHECK-LABEL: add_ugecmp_i32_i8:
  ; CHECK:       // %bb.0:
-; CHECK-NEXT:    add w8, w0, #128 // =128
-; CHECK-NEXT:    cmp w8, #255 // =255
-; CHECK-NEXT:    cset w0, hi
+; CHECK-NEXT:    sxtb w8, w0
+; CHECK-NEXT:    cmp w8, w0
+; CHECK-NEXT:    cset w0, ne
  ; CHECK-NEXT:    ret
    %tmp0 = add i32 %x, 128 ; 1U << (8-1)
    %tmp1 = icmp uge i32 %tmp0, 256 ; 1U << 8
@@ -221,10 +220,8 @@ define i1 @add_ugecmp_i32_i8(i32 %x) nounwind {
  define i1 @add_ugecmp_i64_i32(i64 %x) nounwind {
  ; CHECK-LABEL: add_ugecmp_i64_i32:
  ; CHECK:       // %bb.0:
-; CHECK-NEXT:    orr w8, wzr, #0x80000000
-; CHECK-NEXT:    add x8, x0, x8
-; CHECK-NEXT:    lsr x8, x8, #32
-; CHECK-NEXT:    cmp x8, #0 // =0
+; CHECK-NEXT:    sxtw x8, w0
+; CHECK-NEXT:    cmp x8, x0
  ; CHECK-NEXT:    cset w0, ne
  ; CHECK-NEXT:    ret
    %tmp0 = add i64 %x, 2147483648 ; 1U << (32-1)
@@ -235,9 +232,8 @@ define i1 @add_ugecmp_i64_i32(i64 %x) nounwind {
  define i1 @add_ugecmp_i64_i16(i64 %x) nounwind {
  ; CHECK-LABEL: add_ugecmp_i64_i16:
  ; CHECK:       // %bb.0:
-; CHECK-NEXT:    add x8, x0, #8, lsl #12 // =32768
-; CHECK-NEXT:    lsr x8, x8, #16
-; CHECK-NEXT:    cmp x8, #0 // =0
+; CHECK-NEXT:    sxth x8, w0
+; CHECK-NEXT:    cmp x8, x0
  ; CHECK-NEXT:    cset w0, ne
  ; CHECK-NEXT:    ret
    %tmp0 = add i64 %x, 32768 ; 1U << (16-1)
@@ -248,9 +244,9 @@ define i1 @add_ugecmp_i64_i16(i64 %x) nounwind {
  define i1 @add_ugecmp_i64_i8(i64 %x) nounwind {
  ; CHECK-LABEL: add_ugecmp_i64_i8:
  ; CHECK:       // %bb.0:
-; CHECK-NEXT:    add x8, x0, #128 // =128
-; CHECK-NEXT:    cmp x8, #255 // =255
-; CHECK-NEXT:    cset w0, hi
+; CHECK-NEXT:    sxtb x8, w0
+; CHECK-NEXT:    cmp x8, x0
+; CHECK-NEXT:    cset w0, ne
  ; CHECK-NEXT:    ret
    %tmp0 = add i64 %x, 128 ; 1U << (8-1)
    %tmp1 = icmp uge i64 %tmp0, 256 ; 1U << 8
diff --git a/llvm/test/CodeGen/AArch64/signed-truncation-check.ll b/llvm/test/CodeGen/AArch64/signed-truncation-check.ll

index 1c26d1ae84070eb36875326134aab2efbbe9a61f..7c8627580da87a61adbe360b24c08fc19dafb9c4 100644 (file)
--- a/llvm/test/CodeGen/AArch64/signed-truncation-check.ll
+++ b/llvm/test/CodeGen/AArch64/signed-truncation-check.ll
@@ -185,10 +185,10 @@ define i1 @add_ugecmp_i64_i8(i64 %x) nounwind {
  define i1 @add_ultcmp_i16_i8(i16 %x) nounwind {
  ; CHECK-LABEL: add_ultcmp_i16_i8:
  ; CHECK:       // %bb.0:
-; CHECK-NEXT:    add w8, w0, #128 // =128
+; CHECK-NEXT:    sxtb w8, w0
  ; CHECK-NEXT:    and w8, w8, #0xffff
-; CHECK-NEXT:    cmp w8, #256 // =256
-; CHECK-NEXT:    cset w0, lo
+; CHECK-NEXT:    cmp w8, w0, uxth
+; CHECK-NEXT:    cset w0, eq
  ; CHECK-NEXT:    ret
    %tmp0 = add i16 %x, 128 ; 1U << (8-1)
    %tmp1 = icmp ult i16 %tmp0, 256 ; 1U << 8
@@ -198,9 +198,9 @@ define i1 @add_ultcmp_i16_i8(i16 %x) nounwind {
  define i1 @add_ultcmp_i32_i16(i32 %x) nounwind {
  ; CHECK-LABEL: add_ultcmp_i32_i16:
  ; CHECK:       // %bb.0:
-; CHECK-NEXT:    add w8, w0, #8, lsl #12 // =32768
-; CHECK-NEXT:    cmp w8, #16, lsl #12 // =65536
-; CHECK-NEXT:    cset w0, lo
+; CHECK-NEXT:    sxth w8, w0
+; CHECK-NEXT:    cmp w8, w0
+; CHECK-NEXT:    cset w0, eq
  ; CHECK-NEXT:    ret
    %tmp0 = add i32 %x, 32768 ; 1U << (16-1)
    %tmp1 = icmp ult i32 %tmp0, 65536 ; 1U << 16
@@ -210,9 +210,9 @@ define i1 @add_ultcmp_i32_i16(i32 %x) nounwind {
  define i1 @add_ultcmp_i32_i8(i32 %x) nounwind {
  ; CHECK-LABEL: add_ultcmp_i32_i8:
  ; CHECK:       // %bb.0:
-; CHECK-NEXT:    add w8, w0, #128 // =128
-; CHECK-NEXT:    cmp w8, #256 // =256
-; CHECK-NEXT:    cset w0, lo
+; CHECK-NEXT:    sxtb w8, w0
+; CHECK-NEXT:    cmp w8, w0
+; CHECK-NEXT:    cset w0, eq
  ; CHECK-NEXT:    ret
    %tmp0 = add i32 %x, 128 ; 1U << (8-1)
    %tmp1 = icmp ult i32 %tmp0, 256 ; 1U << 8
@@ -222,10 +222,8 @@ define i1 @add_ultcmp_i32_i8(i32 %x) nounwind {
  define i1 @add_ultcmp_i64_i32(i64 %x) nounwind {
  ; CHECK-LABEL: add_ultcmp_i64_i32:
  ; CHECK:       // %bb.0:
-; CHECK-NEXT:    orr w8, wzr, #0x80000000
-; CHECK-NEXT:    add x8, x0, x8
-; CHECK-NEXT:    lsr x8, x8, #32
-; CHECK-NEXT:    cmp x8, #0 // =0
+; CHECK-NEXT:    sxtw x8, w0
+; CHECK-NEXT:    cmp x8, x0
  ; CHECK-NEXT:    cset w0, eq
  ; CHECK-NEXT:    ret
    %tmp0 = add i64 %x, 2147483648 ; 1U << (32-1)
@@ -236,9 +234,9 @@ define i1 @add_ultcmp_i64_i32(i64 %x) nounwind {
  define i1 @add_ultcmp_i64_i16(i64 %x) nounwind {
  ; CHECK-LABEL: add_ultcmp_i64_i16:
  ; CHECK:       // %bb.0:
-; CHECK-NEXT:    add x8, x0, #8, lsl #12 // =32768
-; CHECK-NEXT:    cmp x8, #16, lsl #12 // =65536
-; CHECK-NEXT:    cset w0, lo
+; CHECK-NEXT:    sxth x8, w0
+; CHECK-NEXT:    cmp x8, x0
+; CHECK-NEXT:    cset w0, eq
  ; CHECK-NEXT:    ret
    %tmp0 = add i64 %x, 32768 ; 1U << (16-1)
    %tmp1 = icmp ult i64 %tmp0, 65536 ; 1U << 16
@@ -248,9 +246,9 @@ define i1 @add_ultcmp_i64_i16(i64 %x) nounwind {
  define i1 @add_ultcmp_i64_i8(i64 %x) nounwind {
  ; CHECK-LABEL: add_ultcmp_i64_i8:
  ; CHECK:       // %bb.0:
-; CHECK-NEXT:    add x8, x0, #128 // =128
-; CHECK-NEXT:    cmp x8, #256 // =256
-; CHECK-NEXT:    cset w0, lo
+; CHECK-NEXT:    sxtb x8, w0
+; CHECK-NEXT:    cmp x8, x0
+; CHECK-NEXT:    cset w0, eq
  ; CHECK-NEXT:    ret
    %tmp0 = add i64 %x, 128 ; 1U << (8-1)
    %tmp1 = icmp ult i64 %tmp0, 256 ; 1U << 8
diff --git a/llvm/test/CodeGen/X86/lack-of-signed-truncation-check.ll b/llvm/test/CodeGen/X86/lack-of-signed-truncation-check.ll

index cc876f3fe20256bd4d925824c01e88a92faac0eb..397b7f98f1d580e26c302c93b87adb5bfd3c0c91 100644 (file)
--- a/llvm/test/CodeGen/X86/lack-of-signed-truncation-check.ll
+++ b/llvm/test/CodeGen/X86/lack-of-signed-truncation-check.ll
@@ -295,19 +295,17 @@ define i1 @add_ultcmp_i64_i8(i64 %x) nounwind {
  define i1 @add_ugecmp_i16_i8(i16 %x) nounwind {
  ; X86-LABEL: add_ugecmp_i16_i8:
  ; X86:       # %bb.0:
-; X86-NEXT:    movl $128, %eax
-; X86-NEXT:    addl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movzwl %ax, %eax
-; X86-NEXT:    cmpl $255, %eax
-; X86-NEXT:    seta %al
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movsbl %al, %ecx
+; X86-NEXT:    cmpw %ax, %cx
+; X86-NEXT:    setne %al
  ; X86-NEXT:    retl
  ;
  ; X64-LABEL: add_ugecmp_i16_i8:
  ; X64:       # %bb.0:
-; X64-NEXT:    subl $-128, %edi
-; X64-NEXT:    movzwl %di, %eax
-; X64-NEXT:    cmpl $255, %eax
-; X64-NEXT:    seta %al
+; X64-NEXT:    movsbl %dil, %eax
+; X64-NEXT:    cmpw %di, %ax
+; X64-NEXT:    setne %al
  ; X64-NEXT:    retq
    %tmp0 = add i16 %x, 128 ; 1U << (8-1)
    %tmp1 = icmp uge i16 %tmp0, 256 ; 1U << 8
@@ -317,17 +315,17 @@ define i1 @add_ugecmp_i16_i8(i16 %x) nounwind {
  define i1 @add_ugecmp_i32_i16(i32 %x) nounwind {
  ; X86-LABEL: add_ugecmp_i32_i16:
  ; X86:       # %bb.0:
-; X86-NEXT:    movl $32768, %eax # imm = 0x8000
-; X86-NEXT:    addl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    cmpl $65535, %eax # imm = 0xFFFF
-; X86-NEXT:    seta %al
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movswl %ax, %ecx
+; X86-NEXT:    cmpl %eax, %ecx
+; X86-NEXT:    setne %al
  ; X86-NEXT:    retl
  ;
  ; X64-LABEL: add_ugecmp_i32_i16:
  ; X64:       # %bb.0:
-; X64-NEXT:    addl $32768, %edi # imm = 0x8000
-; X64-NEXT:    cmpl $65535, %edi # imm = 0xFFFF
-; X64-NEXT:    seta %al
+; X64-NEXT:    movswl %di, %eax
+; X64-NEXT:    cmpl %edi, %eax
+; X64-NEXT:    setne %al
  ; X64-NEXT:    retq
    %tmp0 = add i32 %x, 32768 ; 1U << (16-1)
    %tmp1 = icmp uge i32 %tmp0, 65536 ; 1U << 16
@@ -337,17 +335,17 @@ define i1 @add_ugecmp_i32_i16(i32 %x) nounwind {
  define i1 @add_ugecmp_i32_i8(i32 %x) nounwind {
  ; X86-LABEL: add_ugecmp_i32_i8:
  ; X86:       # %bb.0:
-; X86-NEXT:    movl $128, %eax
-; X86-NEXT:    addl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    cmpl $255, %eax
-; X86-NEXT:    seta %al
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movsbl %al, %ecx
+; X86-NEXT:    cmpl %eax, %ecx
+; X86-NEXT:    setne %al
  ; X86-NEXT:    retl
  ;
  ; X64-LABEL: add_ugecmp_i32_i8:
  ; X64:       # %bb.0:
-; X64-NEXT:    subl $-128, %edi
-; X64-NEXT:    cmpl $255, %edi
-; X64-NEXT:    seta %al
+; X64-NEXT:    movsbl %dil, %eax
+; X64-NEXT:    cmpl %edi, %eax
+; X64-NEXT:    setne %al
  ; X64-NEXT:    retq
    %tmp0 = add i32 %x, 128 ; 1U << (8-1)
    %tmp1 = icmp uge i32 %tmp0, 256 ; 1U << 8
@@ -358,16 +356,15 @@ define i1 @add_ugecmp_i64_i32(i64 %x) nounwind {
  ; X86-LABEL: add_ugecmp_i64_i32:
  ; X86:       # %bb.0:
  ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movl $-2147483648, %ecx # imm = 0x80000000
-; X86-NEXT:    addl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    adcl $0, %eax
+; X86-NEXT:    sarl $31, %eax
+; X86-NEXT:    xorl {{[0-9]+}}(%esp), %eax
  ; X86-NEXT:    setne %al
  ; X86-NEXT:    retl
  ;
  ; X64-LABEL: add_ugecmp_i64_i32:
  ; X64:       # %bb.0:
-; X64-NEXT:    subq $-2147483648, %rdi # imm = 0x80000000
-; X64-NEXT:    shrq $32, %rdi
+; X64-NEXT:    movslq %edi, %rax
+; X64-NEXT:    cmpq %rdi, %rax
  ; X64-NEXT:    setne %al
  ; X64-NEXT:    retq
    %tmp0 = add i64 %x, 2147483648 ; 1U << (32-1)
@@ -378,24 +375,20 @@ define i1 @add_ugecmp_i64_i32(i64 %x) nounwind {
  define i1 @add_ugecmp_i64_i16(i64 %x) nounwind {
  ; X86-LABEL: add_ugecmp_i64_i16:
  ; X86:       # %bb.0:
-; X86-NEXT:    pushl %esi
  ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movl $32768, %ecx # imm = 0x8000
-; X86-NEXT:    addl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    adcl $0, %eax
-; X86-NEXT:    xorl %edx, %edx
-; X86-NEXT:    movl $65535, %esi # imm = 0xFFFF
-; X86-NEXT:    cmpl %ecx, %esi
-; X86-NEXT:    sbbl %eax, %edx
-; X86-NEXT:    setb %al
-; X86-NEXT:    popl %esi
+; X86-NEXT:    movswl %ax, %ecx
+; X86-NEXT:    xorl %ecx, %eax
+; X86-NEXT:    sarl $31, %ecx
+; X86-NEXT:    xorl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    orl %eax, %ecx
+; X86-NEXT:    setne %al
  ; X86-NEXT:    retl
  ;
  ; X64-LABEL: add_ugecmp_i64_i16:
  ; X64:       # %bb.0:
-; X64-NEXT:    addq $32768, %rdi # imm = 0x8000
-; X64-NEXT:    cmpq $65535, %rdi # imm = 0xFFFF
-; X64-NEXT:    seta %al
+; X64-NEXT:    movswq %di, %rax
+; X64-NEXT:    cmpq %rdi, %rax
+; X64-NEXT:    setne %al
  ; X64-NEXT:    retq
    %tmp0 = add i64 %x, 32768 ; 1U << (16-1)
    %tmp1 = icmp uge i64 %tmp0, 65536 ; 1U << 16
@@ -405,24 +398,20 @@ define i1 @add_ugecmp_i64_i16(i64 %x) nounwind {
  define i1 @add_ugecmp_i64_i8(i64 %x) nounwind {
  ; X86-LABEL: add_ugecmp_i64_i8:
  ; X86:       # %bb.0:
-; X86-NEXT:    pushl %esi
  ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movl $128, %ecx
-; X86-NEXT:    addl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    adcl $0, %eax
-; X86-NEXT:    xorl %edx, %edx
-; X86-NEXT:    movl $255, %esi
-; X86-NEXT:    cmpl %ecx, %esi
-; X86-NEXT:    sbbl %eax, %edx
-; X86-NEXT:    setb %al
-; X86-NEXT:    popl %esi
+; X86-NEXT:    movsbl %al, %ecx
+; X86-NEXT:    xorl %ecx, %eax
+; X86-NEXT:    sarl $31, %ecx
+; X86-NEXT:    xorl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    orl %eax, %ecx
+; X86-NEXT:    setne %al
  ; X86-NEXT:    retl
  ;
  ; X64-LABEL: add_ugecmp_i64_i8:
  ; X64:       # %bb.0:
-; X64-NEXT:    subq $-128, %rdi
-; X64-NEXT:    cmpq $255, %rdi
-; X64-NEXT:    seta %al
+; X64-NEXT:    movsbq %dil, %rax
+; X64-NEXT:    cmpq %rdi, %rax
+; X64-NEXT:    setne %al
  ; X64-NEXT:    retq
    %tmp0 = add i64 %x, 128 ; 1U << (8-1)
    %tmp1 = icmp uge i64 %tmp0, 256 ; 1U << 8
diff --git a/llvm/test/CodeGen/X86/signed-truncation-check.ll b/llvm/test/CodeGen/X86/signed-truncation-check.ll

index 91cdef2a75bb24a763986ebde9bfcc878da7fd3e..f18307fbbc74785cb53f95562b70c81ca643d6aa 100644 (file)
--- a/llvm/test/CodeGen/X86/signed-truncation-check.ll
+++ b/llvm/test/CodeGen/X86/signed-truncation-check.ll
@@ -299,19 +299,17 @@ define i1 @add_ugecmp_i64_i8(i64 %x) nounwind {
  define i1 @add_ultcmp_i16_i8(i16 %x) nounwind {
  ; X86-LABEL: add_ultcmp_i16_i8:
  ; X86:       # %bb.0:
-; X86-NEXT:    movl $128, %eax
-; X86-NEXT:    addl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movzwl %ax, %eax
-; X86-NEXT:    cmpl $256, %eax # imm = 0x100
-; X86-NEXT:    setb %al
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movsbl %al, %ecx
+; X86-NEXT:    cmpw %ax, %cx
+; X86-NEXT:    sete %al
  ; X86-NEXT:    retl
  ;
  ; X64-LABEL: add_ultcmp_i16_i8:
  ; X64:       # %bb.0:
-; X64-NEXT:    subl $-128, %edi
-; X64-NEXT:    movzwl %di, %eax
-; X64-NEXT:    cmpl $256, %eax # imm = 0x100
-; X64-NEXT:    setb %al
+; X64-NEXT:    movsbl %dil, %eax
+; X64-NEXT:    cmpw %di, %ax
+; X64-NEXT:    sete %al
  ; X64-NEXT:    retq
    %tmp0 = add i16 %x, 128 ; 1U << (8-1)
    %tmp1 = icmp ult i16 %tmp0, 256 ; 1U << 8
@@ -321,17 +319,17 @@ define i1 @add_ultcmp_i16_i8(i16 %x) nounwind {
  define i1 @add_ultcmp_i32_i16(i32 %x) nounwind {
  ; X86-LABEL: add_ultcmp_i32_i16:
  ; X86:       # %bb.0:
-; X86-NEXT:    movl $32768, %eax # imm = 0x8000
-; X86-NEXT:    addl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    cmpl $65536, %eax # imm = 0x10000
-; X86-NEXT:    setb %al
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movswl %ax, %ecx
+; X86-NEXT:    cmpl %eax, %ecx
+; X86-NEXT:    sete %al
  ; X86-NEXT:    retl
  ;
  ; X64-LABEL: add_ultcmp_i32_i16:
  ; X64:       # %bb.0:
-; X64-NEXT:    addl $32768, %edi # imm = 0x8000
-; X64-NEXT:    cmpl $65536, %edi # imm = 0x10000
-; X64-NEXT:    setb %al
+; X64-NEXT:    movswl %di, %eax
+; X64-NEXT:    cmpl %edi, %eax
+; X64-NEXT:    sete %al
  ; X64-NEXT:    retq
    %tmp0 = add i32 %x, 32768 ; 1U << (16-1)
    %tmp1 = icmp ult i32 %tmp0, 65536 ; 1U << 16
@@ -341,17 +339,17 @@ define i1 @add_ultcmp_i32_i16(i32 %x) nounwind {
  define i1 @add_ultcmp_i32_i8(i32 %x) nounwind {
  ; X86-LABEL: add_ultcmp_i32_i8:
  ; X86:       # %bb.0:
-; X86-NEXT:    movl $128, %eax
-; X86-NEXT:    addl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    cmpl $256, %eax # imm = 0x100
-; X86-NEXT:    setb %al
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movsbl %al, %ecx
+; X86-NEXT:    cmpl %eax, %ecx
+; X86-NEXT:    sete %al
  ; X86-NEXT:    retl
  ;
  ; X64-LABEL: add_ultcmp_i32_i8:
  ; X64:       # %bb.0:
-; X64-NEXT:    subl $-128, %edi
-; X64-NEXT:    cmpl $256, %edi # imm = 0x100
-; X64-NEXT:    setb %al
+; X64-NEXT:    movsbl %dil, %eax
+; X64-NEXT:    cmpl %edi, %eax
+; X64-NEXT:    sete %al
  ; X64-NEXT:    retq
    %tmp0 = add i32 %x, 128 ; 1U << (8-1)
    %tmp1 = icmp ult i32 %tmp0, 256 ; 1U << 8
@@ -362,16 +360,15 @@ define i1 @add_ultcmp_i64_i32(i64 %x) nounwind {
  ; X86-LABEL: add_ultcmp_i64_i32:
  ; X86:       # %bb.0:
  ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movl $-2147483648, %ecx # imm = 0x80000000
-; X86-NEXT:    addl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    adcl $0, %eax
+; X86-NEXT:    sarl $31, %eax
+; X86-NEXT:    xorl {{[0-9]+}}(%esp), %eax
  ; X86-NEXT:    sete %al
  ; X86-NEXT:    retl
  ;
  ; X64-LABEL: add_ultcmp_i64_i32:
  ; X64:       # %bb.0:
-; X64-NEXT:    subq $-2147483648, %rdi # imm = 0x80000000
-; X64-NEXT:    shrq $32, %rdi
+; X64-NEXT:    movslq %edi, %rax
+; X64-NEXT:    cmpq %rdi, %rax
  ; X64-NEXT:    sete %al
  ; X64-NEXT:    retq
    %tmp0 = add i64 %x, 2147483648 ; 1U << (32-1)
@@ -383,19 +380,19 @@ define i1 @add_ultcmp_i64_i16(i64 %x) nounwind {
  ; X86-LABEL: add_ultcmp_i64_i16:
  ; X86:       # %bb.0:
  ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movl $32768, %ecx # imm = 0x8000
-; X86-NEXT:    addl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    adcl $0, %eax
-; X86-NEXT:    cmpl $65536, %ecx # imm = 0x10000
-; X86-NEXT:    sbbl $0, %eax
-; X86-NEXT:    setb %al
+; X86-NEXT:    movswl %ax, %ecx
+; X86-NEXT:    xorl %ecx, %eax
+; X86-NEXT:    sarl $31, %ecx
+; X86-NEXT:    xorl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    orl %eax, %ecx
+; X86-NEXT:    sete %al
  ; X86-NEXT:    retl
  ;
  ; X64-LABEL: add_ultcmp_i64_i16:
  ; X64:       # %bb.0:
-; X64-NEXT:    addq $32768, %rdi # imm = 0x8000
-; X64-NEXT:    cmpq $65536, %rdi # imm = 0x10000
-; X64-NEXT:    setb %al
+; X64-NEXT:    movswq %di, %rax
+; X64-NEXT:    cmpq %rdi, %rax
+; X64-NEXT:    sete %al
  ; X64-NEXT:    retq
    %tmp0 = add i64 %x, 32768 ; 1U << (16-1)
    %tmp1 = icmp ult i64 %tmp0, 65536 ; 1U << 16
@@ -406,19 +403,19 @@ define i1 @add_ultcmp_i64_i8(i64 %x) nounwind {
  ; X86-LABEL: add_ultcmp_i64_i8:
  ; X86:       # %bb.0:
  ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movl $128, %ecx
-; X86-NEXT:    addl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    adcl $0, %eax
-; X86-NEXT:    cmpl $256, %ecx # imm = 0x100
-; X86-NEXT:    sbbl $0, %eax
-; X86-NEXT:    setb %al
+; X86-NEXT:    movsbl %al, %ecx
+; X86-NEXT:    xorl %ecx, %eax
+; X86-NEXT:    sarl $31, %ecx
+; X86-NEXT:    xorl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    orl %eax, %ecx
+; X86-NEXT:    sete %al
  ; X86-NEXT:    retl
  ;
  ; X64-LABEL: add_ultcmp_i64_i8:
  ; X64:       # %bb.0:
-; X64-NEXT:    subq $-128, %rdi
-; X64-NEXT:    cmpq $256, %rdi # imm = 0x100
-; X64-NEXT:    setb %al
+; X64-NEXT:    movsbq %dil, %rax
+; X64-NEXT:    cmpq %rdi, %rax
+; X64-NEXT:    sete %al
  ; X64-NEXT:    retq
    %tmp0 = add i64 %x, 128 ; 1U << (8-1)
    %tmp1 = icmp ult i64 %tmp0, 256 ; 1U << 8
author	Roman Lebedev <lebedev.ri@gmail.com>
	Mon, 16 Jul 2018 12:44:10 +0000 (12:44 +0000)
committer	Roman Lebedev <lebedev.ri@gmail.com>
	Mon, 16 Jul 2018 12:44:10 +0000 (12:44 +0000)
llvm/include/llvm/CodeGen/TargetLowering.h		patch \| blob \| history
llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp		patch \| blob \| history
llvm/lib/Target/AArch64/AArch64ISelLowering.h		patch \| blob \| history
llvm/lib/Target/X86/X86ISelLowering.h		patch \| blob \| history
llvm/test/CodeGen/AArch64/lack-of-signed-truncation-check.ll		patch \| blob \| history
llvm/test/CodeGen/AArch64/signed-truncation-check.ll		patch \| blob \| history
llvm/test/CodeGen/X86/lack-of-signed-truncation-check.ll		patch \| blob \| history
llvm/test/CodeGen/X86/signed-truncation-check.ll		patch \| blob \| history