From de506632aaf9722b270d4ff29b323da893a92800 Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Mon, 16 Jul 2018 12:44:10 +0000 Subject: [PATCH] [X86][AArch64][DAGCombine] Unfold 'check for [no] signed truncation' pattern Summary: [[ https://bugs.llvm.org/show_bug.cgi?id=38149 | PR38149 ]] As discussed in https://reviews.llvm.org/D49179#1158957 and later, the IR for 'check for [no] signed truncation' pattern can be improved: https://rise4fun.com/Alive/gBf ^ that pattern will be produced by Implicit Integer Truncation sanitizer, https://reviews.llvm.org/D48958 https://bugs.llvm.org/show_bug.cgi?id=21530 in signed case, therefore it is probably a good idea to improve it. But the IR-optimal patter does not lower efficiently, so we want to undo it.. This handles the simple pattern. There is a second pattern with predicate and constants inverted. NOTE: we do not check uses here. we always do the transform. Reviewers: spatel, craig.topper, RKSimon, javed.absar Reviewed By: spatel Subscribers: kristof.beyls, llvm-commits Differential Revision: https://reviews.llvm.org/D49266 llvm-svn: 337166 --- llvm/include/llvm/CodeGen/TargetLowering.h | 18 ++++ .../CodeGen/SelectionDAG/TargetLowering.cpp | 78 +++++++++++++++ llvm/lib/Target/AArch64/AArch64ISelLowering.h | 17 ++++ llvm/lib/Target/X86/X86ISelLowering.h | 18 ++++ .../lack-of-signed-truncation-check.ll | 34 +++---- .../AArch64/signed-truncation-check.ll | 36 ++++--- .../X86/lack-of-signed-truncation-check.ll | 97 ++++++++----------- .../CodeGen/X86/signed-truncation-check.ll | 89 ++++++++--------- 8 files changed, 249 insertions(+), 138 deletions(-) diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index e43f83497a62..d14c6410953d 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -519,6 +519,19 @@ public: return false; } + /// Should we tranform the IR-optimal check for whether given truncation + /// down into KeptBits would be truncating or not: + /// (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits) + /// Into it's more traditional form: + /// ((%x << C) a>> C) dstcond %x + /// Return true if we should transform. + /// Return false if there is no preference. + virtual bool shouldTransformSignedTruncationCheck(EVT XVT, + unsigned KeptBits) const { + // By default, let's assume that no one prefers shifts. + return false; + } + /// Return true if the target wants to use the optimization that /// turns ext(promotableInst1(...(promotableInstN(load)))) into /// promotedInst1(...(promotedInstN(ext(load)))). @@ -3667,6 +3680,11 @@ private: SDValue simplifySetCCWithAnd(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, DAGCombinerInfo &DCI, const SDLoc &DL) const; + + SDValue optimizeSetCCOfSignedTruncationCheck(EVT SCCVT, SDValue N0, + SDValue N1, ISD::CondCode Cond, + DAGCombinerInfo &DCI, + const SDLoc &DL) const; }; /// Given an LLVM IR type and return type attributes, compute the return value diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 76e3cd6b7534..fbe74f27b8fb 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -1850,6 +1850,80 @@ SDValue TargetLowering::simplifySetCCWithAnd(EVT VT, SDValue N0, SDValue N1, return SDValue(); } +/// There are multiple IR patterns that could be checking whether certain +/// truncation of a signed number would be lossy or not. The pattern which is +/// best at IR level, may not lower optimally. Thus, we want to unfold it. +/// We are looking for the following pattern: (KeptBits is a constant) +/// (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits) +/// KeptBits won't be bitwidth(x), that will be constant-folded to true/false. +/// KeptBits also can't be 1, that would have been folded to %x dstcond 0 +/// We will unfold it into the natural trunc+sext pattern: +/// ((%x << C) a>> C) dstcond %x +/// Where C = bitwidth(x) - KeptBits and C u< bitwidth(x) +SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck( + EVT SCCVT, SDValue N0, SDValue N1, ISD::CondCode Cond, DAGCombinerInfo &DCI, + const SDLoc &DL) const { + ISD::CondCode NewCond; + if (Cond == ISD::CondCode::SETULT) + NewCond = ISD::CondCode::SETEQ; + else if (Cond == ISD::CondCode::SETUGE) + NewCond = ISD::CondCode::SETNE; + else + return SDValue(); + + // We must be comparing with a constant. + ConstantSDNode *C1; + if (!(C1 = dyn_cast(N1))) + return SDValue(); + + // N0 should be: add %x, (1 << (KeptBits-1)) + if (N0->getOpcode() != ISD::ADD) + return SDValue(); + + // And we must be 'add'ing a constant. + ConstantSDNode *C01; + if (!(C01 = dyn_cast(N0->getOperand(1)))) + return SDValue(); + + SDValue X = N0->getOperand(0); + EVT XVT = X.getValueType(); + + // Validate constants ... + + const APInt &I1 = C1->getAPIntValue(); + const APInt &I01 = C01->getAPIntValue(); + // Both of them must be power-of-two, and the constant from setcc is bigger. + if (!(I1.ugt(I01) && I1.isPowerOf2() && I01.isPowerOf2())) + return SDValue(); + + // They are power-of-two, so which bit is set? + const unsigned KeptBits = I1.logBase2(); + const unsigned KeptBitsMinusOne = I01.logBase2(); + + // Magic! + if (KeptBits != (KeptBitsMinusOne + 1)) + return SDValue(); + assert(KeptBits > 0 && KeptBits < XVT.getSizeInBits() && "unreachable"); + + // We don't want to do this in every single case. + SelectionDAG &DAG = DCI.DAG; + if (!DAG.getTargetLoweringInfo().shouldTransformSignedTruncationCheck( + XVT, KeptBits)) + return SDValue(); + + const unsigned MaskedBits = XVT.getSizeInBits() - KeptBits; + assert(MaskedBits > 0 && MaskedBits < XVT.getSizeInBits() && "unreachable"); + + // Unfold into: ((%x << C) a>> C) cond %x + // Where 'cond' will be either 'eq' or 'ne'. + SDValue ShiftAmt = DAG.getConstant(MaskedBits, DL, XVT); + SDValue T0 = DAG.getNode(ISD::SHL, DL, XVT, X, ShiftAmt); + SDValue T1 = DAG.getNode(ISD::SRA, DL, XVT, T0, ShiftAmt); + SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, X, NewCond); + + return T2; +} + /// Try to simplify a setcc built with the specified operands and cc. If it is /// unable to simplify it, return a null SDValue. SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, @@ -2246,6 +2320,10 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ); } } + + if (SDValue V = + optimizeSetCCOfSignedTruncationCheck(VT, N0, N1, Cond, DCI, dl)) + return V; } // These simplifications apply to splat vectors as well. diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index 8d883c14c2c4..592845640a44 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -457,6 +457,23 @@ public: return VT.getSizeInBits() >= 64; // vector 'bic' } + bool shouldTransformSignedTruncationCheck(EVT XVT, + unsigned KeptBits) const override { + // For vectors, we don't have a preference.. + if (XVT.isVector()) + return false; + + auto VTIsOk = [](EVT VT) -> bool { + return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 || + VT == MVT::i64; + }; + + // We are ok with KeptBitsVT being byte/word/dword, what SXT supports. + // XVT will be larger than KeptBitsVT. + MVT KeptBitsVT = MVT::getIntegerVT(KeptBits); + return VTIsOk(XVT) && VTIsOk(KeptBitsVT); + } + bool hasBitPreservingFPLogic(EVT VT) const override { // FIXME: Is this always true? It should be true for vectors at least. return VT == MVT::f32 || VT == MVT::f64; diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index 37002939eb98..32215b170a8c 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -815,6 +815,24 @@ namespace llvm { bool preferShiftsToClearExtremeBits(SDValue Y) const override; + bool + shouldTransformSignedTruncationCheck(EVT XVT, + unsigned KeptBits) const override { + // For vectors, we don't have a preference.. + if (XVT.isVector()) + return false; + + auto VTIsOk = [](EVT VT) -> bool { + return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 || + VT == MVT::i64; + }; + + // We are ok with KeptBitsVT being byte/word/dword, what MOVS supports. + // XVT will be larger than KeptBitsVT. + MVT KeptBitsVT = MVT::getIntegerVT(KeptBits); + return VTIsOk(XVT) && VTIsOk(KeptBitsVT); + } + bool convertSetCCLogicToBitwiseLogic(EVT VT) const override { return VT.isScalarInteger(); } diff --git a/llvm/test/CodeGen/AArch64/lack-of-signed-truncation-check.ll b/llvm/test/CodeGen/AArch64/lack-of-signed-truncation-check.ll index 4e2e789d23a9..84b0ba15d0ae 100644 --- a/llvm/test/CodeGen/AArch64/lack-of-signed-truncation-check.ll +++ b/llvm/test/CodeGen/AArch64/lack-of-signed-truncation-check.ll @@ -183,10 +183,10 @@ define i1 @add_ultcmp_i64_i8(i64 %x) nounwind { define i1 @add_ugecmp_i16_i8(i16 %x) nounwind { ; CHECK-LABEL: add_ugecmp_i16_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: add w8, w0, #128 // =128 +; CHECK-NEXT: sxtb w8, w0 ; CHECK-NEXT: and w8, w8, #0xffff -; CHECK-NEXT: cmp w8, #255 // =255 -; CHECK-NEXT: cset w0, hi +; CHECK-NEXT: cmp w8, w0, uxth +; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret %tmp0 = add i16 %x, 128 ; 1U << (8-1) %tmp1 = icmp uge i16 %tmp0, 256 ; 1U << 8 @@ -196,9 +196,8 @@ define i1 @add_ugecmp_i16_i8(i16 %x) nounwind { define i1 @add_ugecmp_i32_i16(i32 %x) nounwind { ; CHECK-LABEL: add_ugecmp_i32_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: add w8, w0, #8, lsl #12 // =32768 -; CHECK-NEXT: lsr w8, w8, #16 -; CHECK-NEXT: cmp w8, #0 // =0 +; CHECK-NEXT: sxth w8, w0 +; CHECK-NEXT: cmp w8, w0 ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret %tmp0 = add i32 %x, 32768 ; 1U << (16-1) @@ -209,9 +208,9 @@ define i1 @add_ugecmp_i32_i16(i32 %x) nounwind { define i1 @add_ugecmp_i32_i8(i32 %x) nounwind { ; CHECK-LABEL: add_ugecmp_i32_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: add w8, w0, #128 // =128 -; CHECK-NEXT: cmp w8, #255 // =255 -; CHECK-NEXT: cset w0, hi +; CHECK-NEXT: sxtb w8, w0 +; CHECK-NEXT: cmp w8, w0 +; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret %tmp0 = add i32 %x, 128 ; 1U << (8-1) %tmp1 = icmp uge i32 %tmp0, 256 ; 1U << 8 @@ -221,10 +220,8 @@ define i1 @add_ugecmp_i32_i8(i32 %x) nounwind { define i1 @add_ugecmp_i64_i32(i64 %x) nounwind { ; CHECK-LABEL: add_ugecmp_i64_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: orr w8, wzr, #0x80000000 -; CHECK-NEXT: add x8, x0, x8 -; CHECK-NEXT: lsr x8, x8, #32 -; CHECK-NEXT: cmp x8, #0 // =0 +; CHECK-NEXT: sxtw x8, w0 +; CHECK-NEXT: cmp x8, x0 ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret %tmp0 = add i64 %x, 2147483648 ; 1U << (32-1) @@ -235,9 +232,8 @@ define i1 @add_ugecmp_i64_i32(i64 %x) nounwind { define i1 @add_ugecmp_i64_i16(i64 %x) nounwind { ; CHECK-LABEL: add_ugecmp_i64_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: add x8, x0, #8, lsl #12 // =32768 -; CHECK-NEXT: lsr x8, x8, #16 -; CHECK-NEXT: cmp x8, #0 // =0 +; CHECK-NEXT: sxth x8, w0 +; CHECK-NEXT: cmp x8, x0 ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret %tmp0 = add i64 %x, 32768 ; 1U << (16-1) @@ -248,9 +244,9 @@ define i1 @add_ugecmp_i64_i16(i64 %x) nounwind { define i1 @add_ugecmp_i64_i8(i64 %x) nounwind { ; CHECK-LABEL: add_ugecmp_i64_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: add x8, x0, #128 // =128 -; CHECK-NEXT: cmp x8, #255 // =255 -; CHECK-NEXT: cset w0, hi +; CHECK-NEXT: sxtb x8, w0 +; CHECK-NEXT: cmp x8, x0 +; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret %tmp0 = add i64 %x, 128 ; 1U << (8-1) %tmp1 = icmp uge i64 %tmp0, 256 ; 1U << 8 diff --git a/llvm/test/CodeGen/AArch64/signed-truncation-check.ll b/llvm/test/CodeGen/AArch64/signed-truncation-check.ll index 1c26d1ae8407..7c8627580da8 100644 --- a/llvm/test/CodeGen/AArch64/signed-truncation-check.ll +++ b/llvm/test/CodeGen/AArch64/signed-truncation-check.ll @@ -185,10 +185,10 @@ define i1 @add_ugecmp_i64_i8(i64 %x) nounwind { define i1 @add_ultcmp_i16_i8(i16 %x) nounwind { ; CHECK-LABEL: add_ultcmp_i16_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: add w8, w0, #128 // =128 +; CHECK-NEXT: sxtb w8, w0 ; CHECK-NEXT: and w8, w8, #0xffff -; CHECK-NEXT: cmp w8, #256 // =256 -; CHECK-NEXT: cset w0, lo +; CHECK-NEXT: cmp w8, w0, uxth +; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %tmp0 = add i16 %x, 128 ; 1U << (8-1) %tmp1 = icmp ult i16 %tmp0, 256 ; 1U << 8 @@ -198,9 +198,9 @@ define i1 @add_ultcmp_i16_i8(i16 %x) nounwind { define i1 @add_ultcmp_i32_i16(i32 %x) nounwind { ; CHECK-LABEL: add_ultcmp_i32_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: add w8, w0, #8, lsl #12 // =32768 -; CHECK-NEXT: cmp w8, #16, lsl #12 // =65536 -; CHECK-NEXT: cset w0, lo +; CHECK-NEXT: sxth w8, w0 +; CHECK-NEXT: cmp w8, w0 +; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %tmp0 = add i32 %x, 32768 ; 1U << (16-1) %tmp1 = icmp ult i32 %tmp0, 65536 ; 1U << 16 @@ -210,9 +210,9 @@ define i1 @add_ultcmp_i32_i16(i32 %x) nounwind { define i1 @add_ultcmp_i32_i8(i32 %x) nounwind { ; CHECK-LABEL: add_ultcmp_i32_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: add w8, w0, #128 // =128 -; CHECK-NEXT: cmp w8, #256 // =256 -; CHECK-NEXT: cset w0, lo +; CHECK-NEXT: sxtb w8, w0 +; CHECK-NEXT: cmp w8, w0 +; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %tmp0 = add i32 %x, 128 ; 1U << (8-1) %tmp1 = icmp ult i32 %tmp0, 256 ; 1U << 8 @@ -222,10 +222,8 @@ define i1 @add_ultcmp_i32_i8(i32 %x) nounwind { define i1 @add_ultcmp_i64_i32(i64 %x) nounwind { ; CHECK-LABEL: add_ultcmp_i64_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: orr w8, wzr, #0x80000000 -; CHECK-NEXT: add x8, x0, x8 -; CHECK-NEXT: lsr x8, x8, #32 -; CHECK-NEXT: cmp x8, #0 // =0 +; CHECK-NEXT: sxtw x8, w0 +; CHECK-NEXT: cmp x8, x0 ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %tmp0 = add i64 %x, 2147483648 ; 1U << (32-1) @@ -236,9 +234,9 @@ define i1 @add_ultcmp_i64_i32(i64 %x) nounwind { define i1 @add_ultcmp_i64_i16(i64 %x) nounwind { ; CHECK-LABEL: add_ultcmp_i64_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: add x8, x0, #8, lsl #12 // =32768 -; CHECK-NEXT: cmp x8, #16, lsl #12 // =65536 -; CHECK-NEXT: cset w0, lo +; CHECK-NEXT: sxth x8, w0 +; CHECK-NEXT: cmp x8, x0 +; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %tmp0 = add i64 %x, 32768 ; 1U << (16-1) %tmp1 = icmp ult i64 %tmp0, 65536 ; 1U << 16 @@ -248,9 +246,9 @@ define i1 @add_ultcmp_i64_i16(i64 %x) nounwind { define i1 @add_ultcmp_i64_i8(i64 %x) nounwind { ; CHECK-LABEL: add_ultcmp_i64_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: add x8, x0, #128 // =128 -; CHECK-NEXT: cmp x8, #256 // =256 -; CHECK-NEXT: cset w0, lo +; CHECK-NEXT: sxtb x8, w0 +; CHECK-NEXT: cmp x8, x0 +; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %tmp0 = add i64 %x, 128 ; 1U << (8-1) %tmp1 = icmp ult i64 %tmp0, 256 ; 1U << 8 diff --git a/llvm/test/CodeGen/X86/lack-of-signed-truncation-check.ll b/llvm/test/CodeGen/X86/lack-of-signed-truncation-check.ll index cc876f3fe202..397b7f98f1d5 100644 --- a/llvm/test/CodeGen/X86/lack-of-signed-truncation-check.ll +++ b/llvm/test/CodeGen/X86/lack-of-signed-truncation-check.ll @@ -295,19 +295,17 @@ define i1 @add_ultcmp_i64_i8(i64 %x) nounwind { define i1 @add_ugecmp_i16_i8(i16 %x) nounwind { ; X86-LABEL: add_ugecmp_i16_i8: ; X86: # %bb.0: -; X86-NEXT: movl $128, %eax -; X86-NEXT: addl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzwl %ax, %eax -; X86-NEXT: cmpl $255, %eax -; X86-NEXT: seta %al +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movsbl %al, %ecx +; X86-NEXT: cmpw %ax, %cx +; X86-NEXT: setne %al ; X86-NEXT: retl ; ; X64-LABEL: add_ugecmp_i16_i8: ; X64: # %bb.0: -; X64-NEXT: subl $-128, %edi -; X64-NEXT: movzwl %di, %eax -; X64-NEXT: cmpl $255, %eax -; X64-NEXT: seta %al +; X64-NEXT: movsbl %dil, %eax +; X64-NEXT: cmpw %di, %ax +; X64-NEXT: setne %al ; X64-NEXT: retq %tmp0 = add i16 %x, 128 ; 1U << (8-1) %tmp1 = icmp uge i16 %tmp0, 256 ; 1U << 8 @@ -317,17 +315,17 @@ define i1 @add_ugecmp_i16_i8(i16 %x) nounwind { define i1 @add_ugecmp_i32_i16(i32 %x) nounwind { ; X86-LABEL: add_ugecmp_i32_i16: ; X86: # %bb.0: -; X86-NEXT: movl $32768, %eax # imm = 0x8000 -; X86-NEXT: addl {{[0-9]+}}(%esp), %eax -; X86-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; X86-NEXT: seta %al +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movswl %ax, %ecx +; X86-NEXT: cmpl %eax, %ecx +; X86-NEXT: setne %al ; X86-NEXT: retl ; ; X64-LABEL: add_ugecmp_i32_i16: ; X64: # %bb.0: -; X64-NEXT: addl $32768, %edi # imm = 0x8000 -; X64-NEXT: cmpl $65535, %edi # imm = 0xFFFF -; X64-NEXT: seta %al +; X64-NEXT: movswl %di, %eax +; X64-NEXT: cmpl %edi, %eax +; X64-NEXT: setne %al ; X64-NEXT: retq %tmp0 = add i32 %x, 32768 ; 1U << (16-1) %tmp1 = icmp uge i32 %tmp0, 65536 ; 1U << 16 @@ -337,17 +335,17 @@ define i1 @add_ugecmp_i32_i16(i32 %x) nounwind { define i1 @add_ugecmp_i32_i8(i32 %x) nounwind { ; X86-LABEL: add_ugecmp_i32_i8: ; X86: # %bb.0: -; X86-NEXT: movl $128, %eax -; X86-NEXT: addl {{[0-9]+}}(%esp), %eax -; X86-NEXT: cmpl $255, %eax -; X86-NEXT: seta %al +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movsbl %al, %ecx +; X86-NEXT: cmpl %eax, %ecx +; X86-NEXT: setne %al ; X86-NEXT: retl ; ; X64-LABEL: add_ugecmp_i32_i8: ; X64: # %bb.0: -; X64-NEXT: subl $-128, %edi -; X64-NEXT: cmpl $255, %edi -; X64-NEXT: seta %al +; X64-NEXT: movsbl %dil, %eax +; X64-NEXT: cmpl %edi, %eax +; X64-NEXT: setne %al ; X64-NEXT: retq %tmp0 = add i32 %x, 128 ; 1U << (8-1) %tmp1 = icmp uge i32 %tmp0, 256 ; 1U << 8 @@ -358,16 +356,15 @@ define i1 @add_ugecmp_i64_i32(i64 %x) nounwind { ; X86-LABEL: add_ugecmp_i64_i32: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl $-2147483648, %ecx # imm = 0x80000000 -; X86-NEXT: addl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: adcl $0, %eax +; X86-NEXT: sarl $31, %eax +; X86-NEXT: xorl {{[0-9]+}}(%esp), %eax ; X86-NEXT: setne %al ; X86-NEXT: retl ; ; X64-LABEL: add_ugecmp_i64_i32: ; X64: # %bb.0: -; X64-NEXT: subq $-2147483648, %rdi # imm = 0x80000000 -; X64-NEXT: shrq $32, %rdi +; X64-NEXT: movslq %edi, %rax +; X64-NEXT: cmpq %rdi, %rax ; X64-NEXT: setne %al ; X64-NEXT: retq %tmp0 = add i64 %x, 2147483648 ; 1U << (32-1) @@ -378,24 +375,20 @@ define i1 @add_ugecmp_i64_i32(i64 %x) nounwind { define i1 @add_ugecmp_i64_i16(i64 %x) nounwind { ; X86-LABEL: add_ugecmp_i64_i16: ; X86: # %bb.0: -; X86-NEXT: pushl %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl $32768, %ecx # imm = 0x8000 -; X86-NEXT: addl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: adcl $0, %eax -; X86-NEXT: xorl %edx, %edx -; X86-NEXT: movl $65535, %esi # imm = 0xFFFF -; X86-NEXT: cmpl %ecx, %esi -; X86-NEXT: sbbl %eax, %edx -; X86-NEXT: setb %al -; X86-NEXT: popl %esi +; X86-NEXT: movswl %ax, %ecx +; X86-NEXT: xorl %ecx, %eax +; X86-NEXT: sarl $31, %ecx +; X86-NEXT: xorl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: orl %eax, %ecx +; X86-NEXT: setne %al ; X86-NEXT: retl ; ; X64-LABEL: add_ugecmp_i64_i16: ; X64: # %bb.0: -; X64-NEXT: addq $32768, %rdi # imm = 0x8000 -; X64-NEXT: cmpq $65535, %rdi # imm = 0xFFFF -; X64-NEXT: seta %al +; X64-NEXT: movswq %di, %rax +; X64-NEXT: cmpq %rdi, %rax +; X64-NEXT: setne %al ; X64-NEXT: retq %tmp0 = add i64 %x, 32768 ; 1U << (16-1) %tmp1 = icmp uge i64 %tmp0, 65536 ; 1U << 16 @@ -405,24 +398,20 @@ define i1 @add_ugecmp_i64_i16(i64 %x) nounwind { define i1 @add_ugecmp_i64_i8(i64 %x) nounwind { ; X86-LABEL: add_ugecmp_i64_i8: ; X86: # %bb.0: -; X86-NEXT: pushl %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl $128, %ecx -; X86-NEXT: addl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: adcl $0, %eax -; X86-NEXT: xorl %edx, %edx -; X86-NEXT: movl $255, %esi -; X86-NEXT: cmpl %ecx, %esi -; X86-NEXT: sbbl %eax, %edx -; X86-NEXT: setb %al -; X86-NEXT: popl %esi +; X86-NEXT: movsbl %al, %ecx +; X86-NEXT: xorl %ecx, %eax +; X86-NEXT: sarl $31, %ecx +; X86-NEXT: xorl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: orl %eax, %ecx +; X86-NEXT: setne %al ; X86-NEXT: retl ; ; X64-LABEL: add_ugecmp_i64_i8: ; X64: # %bb.0: -; X64-NEXT: subq $-128, %rdi -; X64-NEXT: cmpq $255, %rdi -; X64-NEXT: seta %al +; X64-NEXT: movsbq %dil, %rax +; X64-NEXT: cmpq %rdi, %rax +; X64-NEXT: setne %al ; X64-NEXT: retq %tmp0 = add i64 %x, 128 ; 1U << (8-1) %tmp1 = icmp uge i64 %tmp0, 256 ; 1U << 8 diff --git a/llvm/test/CodeGen/X86/signed-truncation-check.ll b/llvm/test/CodeGen/X86/signed-truncation-check.ll index 91cdef2a75bb..f18307fbbc74 100644 --- a/llvm/test/CodeGen/X86/signed-truncation-check.ll +++ b/llvm/test/CodeGen/X86/signed-truncation-check.ll @@ -299,19 +299,17 @@ define i1 @add_ugecmp_i64_i8(i64 %x) nounwind { define i1 @add_ultcmp_i16_i8(i16 %x) nounwind { ; X86-LABEL: add_ultcmp_i16_i8: ; X86: # %bb.0: -; X86-NEXT: movl $128, %eax -; X86-NEXT: addl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzwl %ax, %eax -; X86-NEXT: cmpl $256, %eax # imm = 0x100 -; X86-NEXT: setb %al +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movsbl %al, %ecx +; X86-NEXT: cmpw %ax, %cx +; X86-NEXT: sete %al ; X86-NEXT: retl ; ; X64-LABEL: add_ultcmp_i16_i8: ; X64: # %bb.0: -; X64-NEXT: subl $-128, %edi -; X64-NEXT: movzwl %di, %eax -; X64-NEXT: cmpl $256, %eax # imm = 0x100 -; X64-NEXT: setb %al +; X64-NEXT: movsbl %dil, %eax +; X64-NEXT: cmpw %di, %ax +; X64-NEXT: sete %al ; X64-NEXT: retq %tmp0 = add i16 %x, 128 ; 1U << (8-1) %tmp1 = icmp ult i16 %tmp0, 256 ; 1U << 8 @@ -321,17 +319,17 @@ define i1 @add_ultcmp_i16_i8(i16 %x) nounwind { define i1 @add_ultcmp_i32_i16(i32 %x) nounwind { ; X86-LABEL: add_ultcmp_i32_i16: ; X86: # %bb.0: -; X86-NEXT: movl $32768, %eax # imm = 0x8000 -; X86-NEXT: addl {{[0-9]+}}(%esp), %eax -; X86-NEXT: cmpl $65536, %eax # imm = 0x10000 -; X86-NEXT: setb %al +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movswl %ax, %ecx +; X86-NEXT: cmpl %eax, %ecx +; X86-NEXT: sete %al ; X86-NEXT: retl ; ; X64-LABEL: add_ultcmp_i32_i16: ; X64: # %bb.0: -; X64-NEXT: addl $32768, %edi # imm = 0x8000 -; X64-NEXT: cmpl $65536, %edi # imm = 0x10000 -; X64-NEXT: setb %al +; X64-NEXT: movswl %di, %eax +; X64-NEXT: cmpl %edi, %eax +; X64-NEXT: sete %al ; X64-NEXT: retq %tmp0 = add i32 %x, 32768 ; 1U << (16-1) %tmp1 = icmp ult i32 %tmp0, 65536 ; 1U << 16 @@ -341,17 +339,17 @@ define i1 @add_ultcmp_i32_i16(i32 %x) nounwind { define i1 @add_ultcmp_i32_i8(i32 %x) nounwind { ; X86-LABEL: add_ultcmp_i32_i8: ; X86: # %bb.0: -; X86-NEXT: movl $128, %eax -; X86-NEXT: addl {{[0-9]+}}(%esp), %eax -; X86-NEXT: cmpl $256, %eax # imm = 0x100 -; X86-NEXT: setb %al +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movsbl %al, %ecx +; X86-NEXT: cmpl %eax, %ecx +; X86-NEXT: sete %al ; X86-NEXT: retl ; ; X64-LABEL: add_ultcmp_i32_i8: ; X64: # %bb.0: -; X64-NEXT: subl $-128, %edi -; X64-NEXT: cmpl $256, %edi # imm = 0x100 -; X64-NEXT: setb %al +; X64-NEXT: movsbl %dil, %eax +; X64-NEXT: cmpl %edi, %eax +; X64-NEXT: sete %al ; X64-NEXT: retq %tmp0 = add i32 %x, 128 ; 1U << (8-1) %tmp1 = icmp ult i32 %tmp0, 256 ; 1U << 8 @@ -362,16 +360,15 @@ define i1 @add_ultcmp_i64_i32(i64 %x) nounwind { ; X86-LABEL: add_ultcmp_i64_i32: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl $-2147483648, %ecx # imm = 0x80000000 -; X86-NEXT: addl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: adcl $0, %eax +; X86-NEXT: sarl $31, %eax +; X86-NEXT: xorl {{[0-9]+}}(%esp), %eax ; X86-NEXT: sete %al ; X86-NEXT: retl ; ; X64-LABEL: add_ultcmp_i64_i32: ; X64: # %bb.0: -; X64-NEXT: subq $-2147483648, %rdi # imm = 0x80000000 -; X64-NEXT: shrq $32, %rdi +; X64-NEXT: movslq %edi, %rax +; X64-NEXT: cmpq %rdi, %rax ; X64-NEXT: sete %al ; X64-NEXT: retq %tmp0 = add i64 %x, 2147483648 ; 1U << (32-1) @@ -383,19 +380,19 @@ define i1 @add_ultcmp_i64_i16(i64 %x) nounwind { ; X86-LABEL: add_ultcmp_i64_i16: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl $32768, %ecx # imm = 0x8000 -; X86-NEXT: addl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: adcl $0, %eax -; X86-NEXT: cmpl $65536, %ecx # imm = 0x10000 -; X86-NEXT: sbbl $0, %eax -; X86-NEXT: setb %al +; X86-NEXT: movswl %ax, %ecx +; X86-NEXT: xorl %ecx, %eax +; X86-NEXT: sarl $31, %ecx +; X86-NEXT: xorl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: orl %eax, %ecx +; X86-NEXT: sete %al ; X86-NEXT: retl ; ; X64-LABEL: add_ultcmp_i64_i16: ; X64: # %bb.0: -; X64-NEXT: addq $32768, %rdi # imm = 0x8000 -; X64-NEXT: cmpq $65536, %rdi # imm = 0x10000 -; X64-NEXT: setb %al +; X64-NEXT: movswq %di, %rax +; X64-NEXT: cmpq %rdi, %rax +; X64-NEXT: sete %al ; X64-NEXT: retq %tmp0 = add i64 %x, 32768 ; 1U << (16-1) %tmp1 = icmp ult i64 %tmp0, 65536 ; 1U << 16 @@ -406,19 +403,19 @@ define i1 @add_ultcmp_i64_i8(i64 %x) nounwind { ; X86-LABEL: add_ultcmp_i64_i8: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl $128, %ecx -; X86-NEXT: addl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: adcl $0, %eax -; X86-NEXT: cmpl $256, %ecx # imm = 0x100 -; X86-NEXT: sbbl $0, %eax -; X86-NEXT: setb %al +; X86-NEXT: movsbl %al, %ecx +; X86-NEXT: xorl %ecx, %eax +; X86-NEXT: sarl $31, %ecx +; X86-NEXT: xorl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: orl %eax, %ecx +; X86-NEXT: sete %al ; X86-NEXT: retl ; ; X64-LABEL: add_ultcmp_i64_i8: ; X64: # %bb.0: -; X64-NEXT: subq $-128, %rdi -; X64-NEXT: cmpq $256, %rdi # imm = 0x100 -; X64-NEXT: setb %al +; X64-NEXT: movsbq %dil, %rax +; X64-NEXT: cmpq %rdi, %rax +; X64-NEXT: sete %al ; X64-NEXT: retq %tmp0 = add i64 %x, 128 ; 1U << (8-1) %tmp1 = icmp ult i64 %tmp0, 256 ; 1U << 8 -- 2.34.1