return false;
}
+ /// Should we tranform the IR-optimal check for whether given truncation
+ /// down into KeptBits would be truncating or not:
+ /// (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
+ /// Into it's more traditional form:
+ /// ((%x << C) a>> C) dstcond %x
+ /// Return true if we should transform.
+ /// Return false if there is no preference.
+ virtual bool shouldTransformSignedTruncationCheck(EVT XVT,
+ unsigned KeptBits) const {
+ // By default, let's assume that no one prefers shifts.
+ return false;
+ }
+
/// Return true if the target wants to use the optimization that
/// turns ext(promotableInst1(...(promotableInstN(load)))) into
/// promotedInst1(...(promotedInstN(ext(load)))).
SDValue simplifySetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
ISD::CondCode Cond, DAGCombinerInfo &DCI,
const SDLoc &DL) const;
+
+ SDValue optimizeSetCCOfSignedTruncationCheck(EVT SCCVT, SDValue N0,
+ SDValue N1, ISD::CondCode Cond,
+ DAGCombinerInfo &DCI,
+ const SDLoc &DL) const;
};
/// Given an LLVM IR type and return type attributes, compute the return value
return SDValue();
}
+/// There are multiple IR patterns that could be checking whether certain
+/// truncation of a signed number would be lossy or not. The pattern which is
+/// best at IR level, may not lower optimally. Thus, we want to unfold it.
+/// We are looking for the following pattern: (KeptBits is a constant)
+/// (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
+/// KeptBits won't be bitwidth(x), that will be constant-folded to true/false.
+/// KeptBits also can't be 1, that would have been folded to %x dstcond 0
+/// We will unfold it into the natural trunc+sext pattern:
+/// ((%x << C) a>> C) dstcond %x
+/// Where C = bitwidth(x) - KeptBits and C u< bitwidth(x)
+SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(
+ EVT SCCVT, SDValue N0, SDValue N1, ISD::CondCode Cond, DAGCombinerInfo &DCI,
+ const SDLoc &DL) const {
+ ISD::CondCode NewCond;
+ if (Cond == ISD::CondCode::SETULT)
+ NewCond = ISD::CondCode::SETEQ;
+ else if (Cond == ISD::CondCode::SETUGE)
+ NewCond = ISD::CondCode::SETNE;
+ else
+ return SDValue();
+
+ // We must be comparing with a constant.
+ ConstantSDNode *C1;
+ if (!(C1 = dyn_cast<ConstantSDNode>(N1)))
+ return SDValue();
+
+ // N0 should be: add %x, (1 << (KeptBits-1))
+ if (N0->getOpcode() != ISD::ADD)
+ return SDValue();
+
+ // And we must be 'add'ing a constant.
+ ConstantSDNode *C01;
+ if (!(C01 = dyn_cast<ConstantSDNode>(N0->getOperand(1))))
+ return SDValue();
+
+ SDValue X = N0->getOperand(0);
+ EVT XVT = X.getValueType();
+
+ // Validate constants ...
+
+ const APInt &I1 = C1->getAPIntValue();
+ const APInt &I01 = C01->getAPIntValue();
+ // Both of them must be power-of-two, and the constant from setcc is bigger.
+ if (!(I1.ugt(I01) && I1.isPowerOf2() && I01.isPowerOf2()))
+ return SDValue();
+
+ // They are power-of-two, so which bit is set?
+ const unsigned KeptBits = I1.logBase2();
+ const unsigned KeptBitsMinusOne = I01.logBase2();
+
+ // Magic!
+ if (KeptBits != (KeptBitsMinusOne + 1))
+ return SDValue();
+ assert(KeptBits > 0 && KeptBits < XVT.getSizeInBits() && "unreachable");
+
+ // We don't want to do this in every single case.
+ SelectionDAG &DAG = DCI.DAG;
+ if (!DAG.getTargetLoweringInfo().shouldTransformSignedTruncationCheck(
+ XVT, KeptBits))
+ return SDValue();
+
+ const unsigned MaskedBits = XVT.getSizeInBits() - KeptBits;
+ assert(MaskedBits > 0 && MaskedBits < XVT.getSizeInBits() && "unreachable");
+
+ // Unfold into: ((%x << C) a>> C) cond %x
+ // Where 'cond' will be either 'eq' or 'ne'.
+ SDValue ShiftAmt = DAG.getConstant(MaskedBits, DL, XVT);
+ SDValue T0 = DAG.getNode(ISD::SHL, DL, XVT, X, ShiftAmt);
+ SDValue T1 = DAG.getNode(ISD::SRA, DL, XVT, T0, ShiftAmt);
+ SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, X, NewCond);
+
+ return T2;
+}
+
/// Try to simplify a setcc built with the specified operands and cc. If it is
/// unable to simplify it, return a null SDValue.
SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
}
}
+
+ if (SDValue V =
+ optimizeSetCCOfSignedTruncationCheck(VT, N0, N1, Cond, DCI, dl))
+ return V;
}
// These simplifications apply to splat vectors as well.
return VT.getSizeInBits() >= 64; // vector 'bic'
}
+ bool shouldTransformSignedTruncationCheck(EVT XVT,
+ unsigned KeptBits) const override {
+ // For vectors, we don't have a preference..
+ if (XVT.isVector())
+ return false;
+
+ auto VTIsOk = [](EVT VT) -> bool {
+ return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 ||
+ VT == MVT::i64;
+ };
+
+ // We are ok with KeptBitsVT being byte/word/dword, what SXT supports.
+ // XVT will be larger than KeptBitsVT.
+ MVT KeptBitsVT = MVT::getIntegerVT(KeptBits);
+ return VTIsOk(XVT) && VTIsOk(KeptBitsVT);
+ }
+
bool hasBitPreservingFPLogic(EVT VT) const override {
// FIXME: Is this always true? It should be true for vectors at least.
return VT == MVT::f32 || VT == MVT::f64;
bool preferShiftsToClearExtremeBits(SDValue Y) const override;
+ bool
+ shouldTransformSignedTruncationCheck(EVT XVT,
+ unsigned KeptBits) const override {
+ // For vectors, we don't have a preference..
+ if (XVT.isVector())
+ return false;
+
+ auto VTIsOk = [](EVT VT) -> bool {
+ return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 ||
+ VT == MVT::i64;
+ };
+
+ // We are ok with KeptBitsVT being byte/word/dword, what MOVS supports.
+ // XVT will be larger than KeptBitsVT.
+ MVT KeptBitsVT = MVT::getIntegerVT(KeptBits);
+ return VTIsOk(XVT) && VTIsOk(KeptBitsVT);
+ }
+
bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
return VT.isScalarInteger();
}
define i1 @add_ugecmp_i16_i8(i16 %x) nounwind {
; CHECK-LABEL: add_ugecmp_i16_i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: add w8, w0, #128 // =128
+; CHECK-NEXT: sxtb w8, w0
; CHECK-NEXT: and w8, w8, #0xffff
-; CHECK-NEXT: cmp w8, #255 // =255
-; CHECK-NEXT: cset w0, hi
+; CHECK-NEXT: cmp w8, w0, uxth
+; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%tmp0 = add i16 %x, 128 ; 1U << (8-1)
%tmp1 = icmp uge i16 %tmp0, 256 ; 1U << 8
define i1 @add_ugecmp_i32_i16(i32 %x) nounwind {
; CHECK-LABEL: add_ugecmp_i32_i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: add w8, w0, #8, lsl #12 // =32768
-; CHECK-NEXT: lsr w8, w8, #16
-; CHECK-NEXT: cmp w8, #0 // =0
+; CHECK-NEXT: sxth w8, w0
+; CHECK-NEXT: cmp w8, w0
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%tmp0 = add i32 %x, 32768 ; 1U << (16-1)
define i1 @add_ugecmp_i32_i8(i32 %x) nounwind {
; CHECK-LABEL: add_ugecmp_i32_i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: add w8, w0, #128 // =128
-; CHECK-NEXT: cmp w8, #255 // =255
-; CHECK-NEXT: cset w0, hi
+; CHECK-NEXT: sxtb w8, w0
+; CHECK-NEXT: cmp w8, w0
+; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%tmp0 = add i32 %x, 128 ; 1U << (8-1)
%tmp1 = icmp uge i32 %tmp0, 256 ; 1U << 8
define i1 @add_ugecmp_i64_i32(i64 %x) nounwind {
; CHECK-LABEL: add_ugecmp_i64_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: orr w8, wzr, #0x80000000
-; CHECK-NEXT: add x8, x0, x8
-; CHECK-NEXT: lsr x8, x8, #32
-; CHECK-NEXT: cmp x8, #0 // =0
+; CHECK-NEXT: sxtw x8, w0
+; CHECK-NEXT: cmp x8, x0
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%tmp0 = add i64 %x, 2147483648 ; 1U << (32-1)
define i1 @add_ugecmp_i64_i16(i64 %x) nounwind {
; CHECK-LABEL: add_ugecmp_i64_i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: add x8, x0, #8, lsl #12 // =32768
-; CHECK-NEXT: lsr x8, x8, #16
-; CHECK-NEXT: cmp x8, #0 // =0
+; CHECK-NEXT: sxth x8, w0
+; CHECK-NEXT: cmp x8, x0
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%tmp0 = add i64 %x, 32768 ; 1U << (16-1)
define i1 @add_ugecmp_i64_i8(i64 %x) nounwind {
; CHECK-LABEL: add_ugecmp_i64_i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: add x8, x0, #128 // =128
-; CHECK-NEXT: cmp x8, #255 // =255
-; CHECK-NEXT: cset w0, hi
+; CHECK-NEXT: sxtb x8, w0
+; CHECK-NEXT: cmp x8, x0
+; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%tmp0 = add i64 %x, 128 ; 1U << (8-1)
%tmp1 = icmp uge i64 %tmp0, 256 ; 1U << 8
define i1 @add_ultcmp_i16_i8(i16 %x) nounwind {
; CHECK-LABEL: add_ultcmp_i16_i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: add w8, w0, #128 // =128
+; CHECK-NEXT: sxtb w8, w0
; CHECK-NEXT: and w8, w8, #0xffff
-; CHECK-NEXT: cmp w8, #256 // =256
-; CHECK-NEXT: cset w0, lo
+; CHECK-NEXT: cmp w8, w0, uxth
+; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%tmp0 = add i16 %x, 128 ; 1U << (8-1)
%tmp1 = icmp ult i16 %tmp0, 256 ; 1U << 8
define i1 @add_ultcmp_i32_i16(i32 %x) nounwind {
; CHECK-LABEL: add_ultcmp_i32_i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: add w8, w0, #8, lsl #12 // =32768
-; CHECK-NEXT: cmp w8, #16, lsl #12 // =65536
-; CHECK-NEXT: cset w0, lo
+; CHECK-NEXT: sxth w8, w0
+; CHECK-NEXT: cmp w8, w0
+; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%tmp0 = add i32 %x, 32768 ; 1U << (16-1)
%tmp1 = icmp ult i32 %tmp0, 65536 ; 1U << 16
define i1 @add_ultcmp_i32_i8(i32 %x) nounwind {
; CHECK-LABEL: add_ultcmp_i32_i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: add w8, w0, #128 // =128
-; CHECK-NEXT: cmp w8, #256 // =256
-; CHECK-NEXT: cset w0, lo
+; CHECK-NEXT: sxtb w8, w0
+; CHECK-NEXT: cmp w8, w0
+; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%tmp0 = add i32 %x, 128 ; 1U << (8-1)
%tmp1 = icmp ult i32 %tmp0, 256 ; 1U << 8
define i1 @add_ultcmp_i64_i32(i64 %x) nounwind {
; CHECK-LABEL: add_ultcmp_i64_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: orr w8, wzr, #0x80000000
-; CHECK-NEXT: add x8, x0, x8
-; CHECK-NEXT: lsr x8, x8, #32
-; CHECK-NEXT: cmp x8, #0 // =0
+; CHECK-NEXT: sxtw x8, w0
+; CHECK-NEXT: cmp x8, x0
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%tmp0 = add i64 %x, 2147483648 ; 1U << (32-1)
define i1 @add_ultcmp_i64_i16(i64 %x) nounwind {
; CHECK-LABEL: add_ultcmp_i64_i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: add x8, x0, #8, lsl #12 // =32768
-; CHECK-NEXT: cmp x8, #16, lsl #12 // =65536
-; CHECK-NEXT: cset w0, lo
+; CHECK-NEXT: sxth x8, w0
+; CHECK-NEXT: cmp x8, x0
+; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%tmp0 = add i64 %x, 32768 ; 1U << (16-1)
%tmp1 = icmp ult i64 %tmp0, 65536 ; 1U << 16
define i1 @add_ultcmp_i64_i8(i64 %x) nounwind {
; CHECK-LABEL: add_ultcmp_i64_i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: add x8, x0, #128 // =128
-; CHECK-NEXT: cmp x8, #256 // =256
-; CHECK-NEXT: cset w0, lo
+; CHECK-NEXT: sxtb x8, w0
+; CHECK-NEXT: cmp x8, x0
+; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%tmp0 = add i64 %x, 128 ; 1U << (8-1)
%tmp1 = icmp ult i64 %tmp0, 256 ; 1U << 8
define i1 @add_ugecmp_i16_i8(i16 %x) nounwind {
; X86-LABEL: add_ugecmp_i16_i8:
; X86: # %bb.0:
-; X86-NEXT: movl $128, %eax
-; X86-NEXT: addl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movzwl %ax, %eax
-; X86-NEXT: cmpl $255, %eax
-; X86-NEXT: seta %al
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movsbl %al, %ecx
+; X86-NEXT: cmpw %ax, %cx
+; X86-NEXT: setne %al
; X86-NEXT: retl
;
; X64-LABEL: add_ugecmp_i16_i8:
; X64: # %bb.0:
-; X64-NEXT: subl $-128, %edi
-; X64-NEXT: movzwl %di, %eax
-; X64-NEXT: cmpl $255, %eax
-; X64-NEXT: seta %al
+; X64-NEXT: movsbl %dil, %eax
+; X64-NEXT: cmpw %di, %ax
+; X64-NEXT: setne %al
; X64-NEXT: retq
%tmp0 = add i16 %x, 128 ; 1U << (8-1)
%tmp1 = icmp uge i16 %tmp0, 256 ; 1U << 8
define i1 @add_ugecmp_i32_i16(i32 %x) nounwind {
; X86-LABEL: add_ugecmp_i32_i16:
; X86: # %bb.0:
-; X86-NEXT: movl $32768, %eax # imm = 0x8000
-; X86-NEXT: addl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: cmpl $65535, %eax # imm = 0xFFFF
-; X86-NEXT: seta %al
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movswl %ax, %ecx
+; X86-NEXT: cmpl %eax, %ecx
+; X86-NEXT: setne %al
; X86-NEXT: retl
;
; X64-LABEL: add_ugecmp_i32_i16:
; X64: # %bb.0:
-; X64-NEXT: addl $32768, %edi # imm = 0x8000
-; X64-NEXT: cmpl $65535, %edi # imm = 0xFFFF
-; X64-NEXT: seta %al
+; X64-NEXT: movswl %di, %eax
+; X64-NEXT: cmpl %edi, %eax
+; X64-NEXT: setne %al
; X64-NEXT: retq
%tmp0 = add i32 %x, 32768 ; 1U << (16-1)
%tmp1 = icmp uge i32 %tmp0, 65536 ; 1U << 16
define i1 @add_ugecmp_i32_i8(i32 %x) nounwind {
; X86-LABEL: add_ugecmp_i32_i8:
; X86: # %bb.0:
-; X86-NEXT: movl $128, %eax
-; X86-NEXT: addl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: cmpl $255, %eax
-; X86-NEXT: seta %al
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movsbl %al, %ecx
+; X86-NEXT: cmpl %eax, %ecx
+; X86-NEXT: setne %al
; X86-NEXT: retl
;
; X64-LABEL: add_ugecmp_i32_i8:
; X64: # %bb.0:
-; X64-NEXT: subl $-128, %edi
-; X64-NEXT: cmpl $255, %edi
-; X64-NEXT: seta %al
+; X64-NEXT: movsbl %dil, %eax
+; X64-NEXT: cmpl %edi, %eax
+; X64-NEXT: setne %al
; X64-NEXT: retq
%tmp0 = add i32 %x, 128 ; 1U << (8-1)
%tmp1 = icmp uge i32 %tmp0, 256 ; 1U << 8
; X86-LABEL: add_ugecmp_i64_i32:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl $-2147483648, %ecx # imm = 0x80000000
-; X86-NEXT: addl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: adcl $0, %eax
+; X86-NEXT: sarl $31, %eax
+; X86-NEXT: xorl {{[0-9]+}}(%esp), %eax
; X86-NEXT: setne %al
; X86-NEXT: retl
;
; X64-LABEL: add_ugecmp_i64_i32:
; X64: # %bb.0:
-; X64-NEXT: subq $-2147483648, %rdi # imm = 0x80000000
-; X64-NEXT: shrq $32, %rdi
+; X64-NEXT: movslq %edi, %rax
+; X64-NEXT: cmpq %rdi, %rax
; X64-NEXT: setne %al
; X64-NEXT: retq
%tmp0 = add i64 %x, 2147483648 ; 1U << (32-1)
define i1 @add_ugecmp_i64_i16(i64 %x) nounwind {
; X86-LABEL: add_ugecmp_i64_i16:
; X86: # %bb.0:
-; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl $32768, %ecx # imm = 0x8000
-; X86-NEXT: addl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: adcl $0, %eax
-; X86-NEXT: xorl %edx, %edx
-; X86-NEXT: movl $65535, %esi # imm = 0xFFFF
-; X86-NEXT: cmpl %ecx, %esi
-; X86-NEXT: sbbl %eax, %edx
-; X86-NEXT: setb %al
-; X86-NEXT: popl %esi
+; X86-NEXT: movswl %ax, %ecx
+; X86-NEXT: xorl %ecx, %eax
+; X86-NEXT: sarl $31, %ecx
+; X86-NEXT: xorl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: orl %eax, %ecx
+; X86-NEXT: setne %al
; X86-NEXT: retl
;
; X64-LABEL: add_ugecmp_i64_i16:
; X64: # %bb.0:
-; X64-NEXT: addq $32768, %rdi # imm = 0x8000
-; X64-NEXT: cmpq $65535, %rdi # imm = 0xFFFF
-; X64-NEXT: seta %al
+; X64-NEXT: movswq %di, %rax
+; X64-NEXT: cmpq %rdi, %rax
+; X64-NEXT: setne %al
; X64-NEXT: retq
%tmp0 = add i64 %x, 32768 ; 1U << (16-1)
%tmp1 = icmp uge i64 %tmp0, 65536 ; 1U << 16
define i1 @add_ugecmp_i64_i8(i64 %x) nounwind {
; X86-LABEL: add_ugecmp_i64_i8:
; X86: # %bb.0:
-; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl $128, %ecx
-; X86-NEXT: addl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: adcl $0, %eax
-; X86-NEXT: xorl %edx, %edx
-; X86-NEXT: movl $255, %esi
-; X86-NEXT: cmpl %ecx, %esi
-; X86-NEXT: sbbl %eax, %edx
-; X86-NEXT: setb %al
-; X86-NEXT: popl %esi
+; X86-NEXT: movsbl %al, %ecx
+; X86-NEXT: xorl %ecx, %eax
+; X86-NEXT: sarl $31, %ecx
+; X86-NEXT: xorl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: orl %eax, %ecx
+; X86-NEXT: setne %al
; X86-NEXT: retl
;
; X64-LABEL: add_ugecmp_i64_i8:
; X64: # %bb.0:
-; X64-NEXT: subq $-128, %rdi
-; X64-NEXT: cmpq $255, %rdi
-; X64-NEXT: seta %al
+; X64-NEXT: movsbq %dil, %rax
+; X64-NEXT: cmpq %rdi, %rax
+; X64-NEXT: setne %al
; X64-NEXT: retq
%tmp0 = add i64 %x, 128 ; 1U << (8-1)
%tmp1 = icmp uge i64 %tmp0, 256 ; 1U << 8
define i1 @add_ultcmp_i16_i8(i16 %x) nounwind {
; X86-LABEL: add_ultcmp_i16_i8:
; X86: # %bb.0:
-; X86-NEXT: movl $128, %eax
-; X86-NEXT: addl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movzwl %ax, %eax
-; X86-NEXT: cmpl $256, %eax # imm = 0x100
-; X86-NEXT: setb %al
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movsbl %al, %ecx
+; X86-NEXT: cmpw %ax, %cx
+; X86-NEXT: sete %al
; X86-NEXT: retl
;
; X64-LABEL: add_ultcmp_i16_i8:
; X64: # %bb.0:
-; X64-NEXT: subl $-128, %edi
-; X64-NEXT: movzwl %di, %eax
-; X64-NEXT: cmpl $256, %eax # imm = 0x100
-; X64-NEXT: setb %al
+; X64-NEXT: movsbl %dil, %eax
+; X64-NEXT: cmpw %di, %ax
+; X64-NEXT: sete %al
; X64-NEXT: retq
%tmp0 = add i16 %x, 128 ; 1U << (8-1)
%tmp1 = icmp ult i16 %tmp0, 256 ; 1U << 8
define i1 @add_ultcmp_i32_i16(i32 %x) nounwind {
; X86-LABEL: add_ultcmp_i32_i16:
; X86: # %bb.0:
-; X86-NEXT: movl $32768, %eax # imm = 0x8000
-; X86-NEXT: addl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: cmpl $65536, %eax # imm = 0x10000
-; X86-NEXT: setb %al
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movswl %ax, %ecx
+; X86-NEXT: cmpl %eax, %ecx
+; X86-NEXT: sete %al
; X86-NEXT: retl
;
; X64-LABEL: add_ultcmp_i32_i16:
; X64: # %bb.0:
-; X64-NEXT: addl $32768, %edi # imm = 0x8000
-; X64-NEXT: cmpl $65536, %edi # imm = 0x10000
-; X64-NEXT: setb %al
+; X64-NEXT: movswl %di, %eax
+; X64-NEXT: cmpl %edi, %eax
+; X64-NEXT: sete %al
; X64-NEXT: retq
%tmp0 = add i32 %x, 32768 ; 1U << (16-1)
%tmp1 = icmp ult i32 %tmp0, 65536 ; 1U << 16
define i1 @add_ultcmp_i32_i8(i32 %x) nounwind {
; X86-LABEL: add_ultcmp_i32_i8:
; X86: # %bb.0:
-; X86-NEXT: movl $128, %eax
-; X86-NEXT: addl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: cmpl $256, %eax # imm = 0x100
-; X86-NEXT: setb %al
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movsbl %al, %ecx
+; X86-NEXT: cmpl %eax, %ecx
+; X86-NEXT: sete %al
; X86-NEXT: retl
;
; X64-LABEL: add_ultcmp_i32_i8:
; X64: # %bb.0:
-; X64-NEXT: subl $-128, %edi
-; X64-NEXT: cmpl $256, %edi # imm = 0x100
-; X64-NEXT: setb %al
+; X64-NEXT: movsbl %dil, %eax
+; X64-NEXT: cmpl %edi, %eax
+; X64-NEXT: sete %al
; X64-NEXT: retq
%tmp0 = add i32 %x, 128 ; 1U << (8-1)
%tmp1 = icmp ult i32 %tmp0, 256 ; 1U << 8
; X86-LABEL: add_ultcmp_i64_i32:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl $-2147483648, %ecx # imm = 0x80000000
-; X86-NEXT: addl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: adcl $0, %eax
+; X86-NEXT: sarl $31, %eax
+; X86-NEXT: xorl {{[0-9]+}}(%esp), %eax
; X86-NEXT: sete %al
; X86-NEXT: retl
;
; X64-LABEL: add_ultcmp_i64_i32:
; X64: # %bb.0:
-; X64-NEXT: subq $-2147483648, %rdi # imm = 0x80000000
-; X64-NEXT: shrq $32, %rdi
+; X64-NEXT: movslq %edi, %rax
+; X64-NEXT: cmpq %rdi, %rax
; X64-NEXT: sete %al
; X64-NEXT: retq
%tmp0 = add i64 %x, 2147483648 ; 1U << (32-1)
; X86-LABEL: add_ultcmp_i64_i16:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl $32768, %ecx # imm = 0x8000
-; X86-NEXT: addl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: adcl $0, %eax
-; X86-NEXT: cmpl $65536, %ecx # imm = 0x10000
-; X86-NEXT: sbbl $0, %eax
-; X86-NEXT: setb %al
+; X86-NEXT: movswl %ax, %ecx
+; X86-NEXT: xorl %ecx, %eax
+; X86-NEXT: sarl $31, %ecx
+; X86-NEXT: xorl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: orl %eax, %ecx
+; X86-NEXT: sete %al
; X86-NEXT: retl
;
; X64-LABEL: add_ultcmp_i64_i16:
; X64: # %bb.0:
-; X64-NEXT: addq $32768, %rdi # imm = 0x8000
-; X64-NEXT: cmpq $65536, %rdi # imm = 0x10000
-; X64-NEXT: setb %al
+; X64-NEXT: movswq %di, %rax
+; X64-NEXT: cmpq %rdi, %rax
+; X64-NEXT: sete %al
; X64-NEXT: retq
%tmp0 = add i64 %x, 32768 ; 1U << (16-1)
%tmp1 = icmp ult i64 %tmp0, 65536 ; 1U << 16
; X86-LABEL: add_ultcmp_i64_i8:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl $128, %ecx
-; X86-NEXT: addl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: adcl $0, %eax
-; X86-NEXT: cmpl $256, %ecx # imm = 0x100
-; X86-NEXT: sbbl $0, %eax
-; X86-NEXT: setb %al
+; X86-NEXT: movsbl %al, %ecx
+; X86-NEXT: xorl %ecx, %eax
+; X86-NEXT: sarl $31, %ecx
+; X86-NEXT: xorl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: orl %eax, %ecx
+; X86-NEXT: sete %al
; X86-NEXT: retl
;
; X64-LABEL: add_ultcmp_i64_i8:
; X64: # %bb.0:
-; X64-NEXT: subq $-128, %rdi
-; X64-NEXT: cmpq $256, %rdi # imm = 0x100
-; X64-NEXT: setb %al
+; X64-NEXT: movsbq %dil, %rax
+; X64-NEXT: cmpq %rdi, %rax
+; X64-NEXT: sete %al
; X64-NEXT: retq
%tmp0 = add i64 %x, 128 ; 1U << (8-1)
%tmp1 = icmp ult i64 %tmp0, 256 ; 1U << 8