CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(64, 32));
}]>;
+// topbitsallzero - Return true if all bits except the lowest bit are known zero
+def topbitsallzero32: PatLeaf<(i32 GPR32:$src), [{
+ return SDValue(N,0)->getValueType(0) == MVT::i32 &&
+ CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(32, 31));
+ }]>;
+def topbitsallzero64: PatLeaf<(i64 GPR64:$src), [{
+ return SDValue(N,0)->getValueType(0) == MVT::i64 &&
+ CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(64, 63));
+ }]>;
+
// Node definitions.
def AArch64adrp : SDNode<"AArch64ISD::ADRP", SDTIntUnaryOp, []>;
def AArch64adr : SDNode<"AArch64ISD::ADR", SDTIntUnaryOp, []>;
def : Pat<(add GPR64:$val, (zext (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV))),
(CSINCXr GPR64:$val, GPR64:$val, (i32 imm:$cc))>;
+def : Pat<(or (topbitsallzero32:$val), (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV)),
+ (CSINCWr GPR32:$val, WZR, imm:$cc)>;
+def : Pat<(or (topbitsallzero64:$val), (AArch64csel (i64 0), (i64 1), (i32 imm:$cc), NZCV)),
+ (CSINCXr GPR64:$val, XZR, imm:$cc)>;
+def : Pat<(or (topbitsallzero64:$val), (zext (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV))),
+ (CSINCXr GPR64:$val, XZR, imm:$cc)>;
+
+def : Pat<(and (topbitsallzero32:$val), (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV)),
+ (CSELWr WZR, GPR32:$val, imm:$cc)>;
+def : Pat<(and (topbitsallzero64:$val), (AArch64csel (i64 0), (i64 1), (i32 imm:$cc), NZCV)),
+ (CSELXr XZR, GPR64:$val, imm:$cc)>;
+def : Pat<(and (topbitsallzero64:$val), (zext (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV))),
+ (CSELXr XZR, GPR64:$val, imm:$cc)>;
+
// The inverse of the condition code from the alias instruction is what is used
// in the aliased instruction. The parser all ready inverts the condition code
// for these aliases.
; SDISEL-NEXT: cset w8, gt
; SDISEL-NEXT: cmp x2, #2
; SDISEL-NEXT: ccmp x2, #4, #4, lt
-; SDISEL-NEXT: cset w9, gt
-; SDISEL-NEXT: orr w8, w8, w9
+; SDISEL-NEXT: csinc w8, w8, wzr, le
; SDISEL-NEXT: cmp w8, #0
; SDISEL-NEXT: csel x0, xzr, x3, ne
; SDISEL-NEXT: ret
; SDISEL-NEXT: cmp w0, #22
; SDISEL-NEXT: mov w9, #44
; SDISEL-NEXT: ccmp w0, w9, #0, ge
-; SDISEL-NEXT: cset w9, gt
+; SDISEL-NEXT: csel w8, wzr, w8, le
; SDISEL-NEXT: cmp w0, #99
-; SDISEL-NEXT: and w8, w8, w9
; SDISEL-NEXT: mov w9, #77
; SDISEL-NEXT: ccmp w0, w9, #4, ne
; SDISEL-NEXT: cset w9, eq
define i1 @bcmp_i128(i128 %a0, i128 %b0, i128 %a1, i128 %b1, i128 %a2, i128 %b2) {
; CHECK-LABEL: bcmp_i128:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldp x9, x8, [sp]
-; CHECK-NEXT: ldp x10, x11, [sp, #16]
-; CHECK-NEXT: cmp x10, x9
-; CHECK-NEXT: ccmp x11, x8, #0, eq
-; CHECK-NEXT: cset w8, ne
; CHECK-NEXT: cmp x2, x0
; CHECK-NEXT: ccmp x3, x1, #0, eq
+; CHECK-NEXT: ldp x9, x8, [sp]
; CHECK-NEXT: ccmp x6, x4, #0, eq
+; CHECK-NEXT: ldp x10, x11, [sp, #16]
; CHECK-NEXT: ccmp x7, x5, #0, eq
-; CHECK-NEXT: cset w9, ne
-; CHECK-NEXT: orr w0, w9, w8
+; CHECK-NEXT: cset w12, ne
+; CHECK-NEXT: cmp x10, x9
+; CHECK-NEXT: ccmp x11, x8, #0, eq
+; CHECK-NEXT: csinc w0, w12, wzr, eq
; CHECK-NEXT: ret
%xor0 = xor i128 %b0, %a0
%xor1 = xor i128 %b1, %a1
define i32 @ori32i32_eq(i32 %x, i32 %y) {
; CHECK-LABEL: ori32i32_eq:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmp w1, #0
; CHECK-NEXT: and w8, w0, #0x1
-; CHECK-NEXT: cset w9, eq
-; CHECK-NEXT: orr w0, w8, w9
+; CHECK-NEXT: cmp w1, #0
+; CHECK-NEXT: csinc w0, w8, wzr, ne
; CHECK-NEXT: ret
%xa = and i32 %x, 1
%c = icmp eq i32 %y, 0
define i32 @ori32_eq_c(i32 %x, i32 %y) {
; CHECK-LABEL: ori32_eq_c:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmp w1, #0
; CHECK-NEXT: and w8, w0, #0x1
-; CHECK-NEXT: cset w9, eq
-; CHECK-NEXT: orr w0, w9, w8
+; CHECK-NEXT: cmp w1, #0
+; CHECK-NEXT: csinc w0, w8, wzr, ne
; CHECK-NEXT: ret
%xa = and i32 %x, 1
%c = icmp eq i32 %y, 0
define i32 @ori32i64_eq(i32 %x, i64 %y) {
; CHECK-LABEL: ori32i64_eq:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmp x1, #0
; CHECK-NEXT: and w8, w0, #0x1
-; CHECK-NEXT: cset w9, eq
-; CHECK-NEXT: orr w0, w8, w9
+; CHECK-NEXT: cmp x1, #0
+; CHECK-NEXT: csinc w0, w8, wzr, ne
; CHECK-NEXT: ret
%xa = and i32 %x, 1
%c = icmp eq i64 %y, 0
define i32 @ori32_sgt(i32 %x, i32 %y) {
; CHECK-LABEL: ori32_sgt:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmp w1, #0
; CHECK-NEXT: and w8, w0, #0x1
-; CHECK-NEXT: cset w9, gt
-; CHECK-NEXT: orr w0, w8, w9
+; CHECK-NEXT: cmp w1, #0
+; CHECK-NEXT: csinc w0, w8, wzr, le
; CHECK-NEXT: ret
%xa = and i32 %x, 1
%c = icmp sgt i32 %y, 0
; CHECK-NEXT: tst w0, #0xff
; CHECK-NEXT: cset w8, eq
; CHECK-NEXT: tst w1, #0xff
-; CHECK-NEXT: cset w9, ne
-; CHECK-NEXT: and w0, w8, w9
+; CHECK-NEXT: csel w0, wzr, w8, eq
; CHECK-NEXT: ret
%xc = icmp eq i8 %x, 0
%xa = zext i1 %xc to i32
define i64 @ori64i32_eq(i64 %x, i32 %y) {
; CHECK-LABEL: ori64i32_eq:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmp w1, #0
; CHECK-NEXT: and x8, x0, #0x1
-; CHECK-NEXT: cset w9, eq
-; CHECK-NEXT: orr x0, x8, x9
+; CHECK-NEXT: cmp w1, #0
+; CHECK-NEXT: csinc x0, x8, xzr, ne
; CHECK-NEXT: ret
%xa = and i64 %x, 1
%c = icmp eq i32 %y, 0
define i64 @ori64i64_eq(i64 %x, i64 %y) {
; CHECK-LABEL: ori64i64_eq:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmp x1, #0
; CHECK-NEXT: and x8, x0, #0x1
-; CHECK-NEXT: cset w9, eq
-; CHECK-NEXT: orr x0, x8, x9
+; CHECK-NEXT: cmp x1, #0
+; CHECK-NEXT: csinc x0, x8, xzr, ne
; CHECK-NEXT: ret
%xa = and i64 %x, 1
%c = icmp eq i64 %y, 0
define i64 @ori64_eq_c(i64 %x, i32 %y) {
; CHECK-LABEL: ori64_eq_c:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmp w1, #0
; CHECK-NEXT: and x8, x0, #0x1
-; CHECK-NEXT: cset w9, eq
-; CHECK-NEXT: orr x0, x9, x8
+; CHECK-NEXT: cmp w1, #0
+; CHECK-NEXT: csinc x0, x8, xzr, ne
; CHECK-NEXT: ret
%xa = and i64 %x, 1
%c = icmp eq i32 %y, 0
; CHECK-NEXT: tst w0, #0xff
; CHECK-NEXT: cset w8, eq
; CHECK-NEXT: tst w1, #0xff
-; CHECK-NEXT: cset w9, ne
-; CHECK-NEXT: and w0, w8, w9
+; CHECK-NEXT: csel w0, wzr, w8, eq
; CHECK-NEXT: ret
%xc = icmp eq i8 %x, 0
%xa = zext i1 %xc to i64
define { i128, i8 } @u128_checked_mul(i128 %x, i128 %y) {
; CHECK-LABEL: u128_checked_mul:
; CHECK: // %bb.0:
-; CHECK-NEXT: mul x8, x3, x0
-; CHECK-NEXT: umulh x9, x0, x2
-; CHECK-NEXT: madd x8, x1, x2, x8
-; CHECK-NEXT: umulh x10, x1, x2
-; CHECK-NEXT: adds x8, x9, x8
-; CHECK-NEXT: cset w9, hs
; CHECK-NEXT: cmp x1, #0
+; CHECK-NEXT: umulh x8, x1, x2
; CHECK-NEXT: ccmp x3, #0, #4, ne
-; CHECK-NEXT: mov x1, x8
-; CHECK-NEXT: ccmp xzr, x10, #0, eq
-; CHECK-NEXT: umulh x10, x3, x0
+; CHECK-NEXT: mul x9, x3, x0
+; CHECK-NEXT: madd x9, x1, x2, x9
+; CHECK-NEXT: ccmp xzr, x8, #0, eq
+; CHECK-NEXT: umulh x8, x3, x0
+; CHECK-NEXT: ccmp xzr, x8, #0, eq
+; CHECK-NEXT: umulh x8, x0, x2
; CHECK-NEXT: mul x0, x0, x2
-; CHECK-NEXT: ccmp xzr, x10, #0, eq
; CHECK-NEXT: cset w10, ne
-; CHECK-NEXT: orr w9, w10, w9
-; CHECK-NEXT: eor w2, w9, #0x1
+; CHECK-NEXT: adds x1, x8, x9
+; CHECK-NEXT: csinc w8, w10, wzr, lo
+; CHECK-NEXT: eor w2, w8, #0x1
; CHECK-NEXT: ret
%1 = tail call { i128, i1 } @llvm.umul.with.overflow.i128(i128 %x, i128 %y)
%2 = extractvalue { i128, i1 } %1, 0
define { i128, i8 } @u128_overflowing_mul(i128 %x, i128 %y) {
; CHECK-LABEL: u128_overflowing_mul:
; CHECK: // %bb.0:
-; CHECK-NEXT: mul x8, x3, x0
-; CHECK-NEXT: umulh x9, x0, x2
-; CHECK-NEXT: madd x8, x1, x2, x8
-; CHECK-NEXT: umulh x10, x1, x2
-; CHECK-NEXT: adds x8, x9, x8
-; CHECK-NEXT: cset w9, hs
; CHECK-NEXT: cmp x1, #0
+; CHECK-NEXT: umulh x8, x1, x2
; CHECK-NEXT: ccmp x3, #0, #4, ne
-; CHECK-NEXT: mov x1, x8
-; CHECK-NEXT: ccmp xzr, x10, #0, eq
-; CHECK-NEXT: umulh x10, x3, x0
+; CHECK-NEXT: umulh x9, x3, x0
+; CHECK-NEXT: ccmp xzr, x8, #0, eq
+; CHECK-NEXT: mul x8, x3, x0
+; CHECK-NEXT: madd x8, x1, x2, x8
+; CHECK-NEXT: ccmp xzr, x9, #0, eq
+; CHECK-NEXT: umulh x9, x0, x2
; CHECK-NEXT: mul x0, x0, x2
-; CHECK-NEXT: ccmp xzr, x10, #0, eq
; CHECK-NEXT: cset w10, ne
-; CHECK-NEXT: orr w2, w10, w9
+; CHECK-NEXT: adds x1, x9, x8
+; CHECK-NEXT: csinc w2, w10, wzr, lo
; CHECK-NEXT: ret
%1 = tail call { i128, i1 } @llvm.umul.with.overflow.i128(i128 %x, i128 %y)
%2 = extractvalue { i128, i1 } %1, 0
define i128 @u128_saturating_mul(i128 %x, i128 %y) {
; CHECK-LABEL: u128_saturating_mul:
; CHECK: // %bb.0:
-; CHECK-NEXT: mul x8, x3, x0
-; CHECK-NEXT: umulh x9, x0, x2
-; CHECK-NEXT: madd x8, x1, x2, x8
-; CHECK-NEXT: umulh x10, x1, x2
-; CHECK-NEXT: adds x8, x9, x8
-; CHECK-NEXT: cset w9, hs
; CHECK-NEXT: cmp x1, #0
+; CHECK-NEXT: umulh x8, x1, x2
; CHECK-NEXT: ccmp x3, #0, #4, ne
-; CHECK-NEXT: ccmp xzr, x10, #0, eq
-; CHECK-NEXT: umulh x10, x3, x0
-; CHECK-NEXT: ccmp xzr, x10, #0, eq
+; CHECK-NEXT: umulh x9, x3, x0
+; CHECK-NEXT: ccmp xzr, x8, #0, eq
+; CHECK-NEXT: mul x8, x3, x0
+; CHECK-NEXT: madd x8, x1, x2, x8
+; CHECK-NEXT: ccmp xzr, x9, #0, eq
+; CHECK-NEXT: umulh x9, x0, x2
; CHECK-NEXT: cset w10, ne
-; CHECK-NEXT: orr w9, w10, w9
+; CHECK-NEXT: adds x8, x9, x8
+; CHECK-NEXT: csinc w9, w10, wzr, lo
; CHECK-NEXT: mul x10, x0, x2
; CHECK-NEXT: cmp w9, #0
; CHECK-NEXT: csinv x0, x10, xzr, eq
define i1 @mul_nsw_eq0_i8(i8 %x, i8 %y) {
; CHECK-LABEL: mul_nsw_eq0_i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: tst w1, #0xff
-; CHECK-NEXT: cset w8, eq
; CHECK-NEXT: tst w0, #0xff
-; CHECK-NEXT: cset w9, eq
-; CHECK-NEXT: orr w0, w9, w8
+; CHECK-NEXT: cset w8, eq
+; CHECK-NEXT: tst w1, #0xff
+; CHECK-NEXT: csinc w0, w8, wzr, ne
; CHECK-NEXT: ret
%m = mul nsw i8 %x, %y
%r = icmp eq i8 %m, 0
define i1 @mul_nsw_ne0_i16(i16 %x, i16 %y) {
; CHECK-LABEL: mul_nsw_ne0_i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: tst w1, #0xffff
-; CHECK-NEXT: cset w8, ne
; CHECK-NEXT: tst w0, #0xffff
-; CHECK-NEXT: cset w9, ne
-; CHECK-NEXT: and w0, w9, w8
+; CHECK-NEXT: cset w8, ne
+; CHECK-NEXT: tst w1, #0xffff
+; CHECK-NEXT: csel w0, wzr, w8, eq
; CHECK-NEXT: ret
%m = mul nsw i16 %x, %y
%r = icmp ne i16 %m, 0
define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 {
; AARCH-LABEL: muloti_test:
; AARCH: // %bb.0: // %start
-; AARCH-NEXT: mul x8, x3, x0
-; AARCH-NEXT: umulh x9, x0, x2
-; AARCH-NEXT: madd x8, x1, x2, x8
-; AARCH-NEXT: umulh x10, x1, x2
-; AARCH-NEXT: adds x8, x9, x8
-; AARCH-NEXT: cset w9, hs
; AARCH-NEXT: cmp x1, #0
+; AARCH-NEXT: umulh x8, x1, x2
; AARCH-NEXT: ccmp x3, #0, #4, ne
-; AARCH-NEXT: mov x1, x8
-; AARCH-NEXT: ccmp xzr, x10, #0, eq
-; AARCH-NEXT: umulh x10, x3, x0
+; AARCH-NEXT: umulh x9, x3, x0
+; AARCH-NEXT: ccmp xzr, x8, #0, eq
+; AARCH-NEXT: mul x8, x3, x0
+; AARCH-NEXT: madd x8, x1, x2, x8
+; AARCH-NEXT: ccmp xzr, x9, #0, eq
+; AARCH-NEXT: umulh x9, x0, x2
; AARCH-NEXT: mul x0, x0, x2
-; AARCH-NEXT: ccmp xzr, x10, #0, eq
; AARCH-NEXT: cset w10, ne
-; AARCH-NEXT: orr w2, w10, w9
+; AARCH-NEXT: adds x1, x9, x8
+; AARCH-NEXT: csinc w2, w10, wzr, lo
; AARCH-NEXT: ret
start:
%0 = tail call { i128, i1 } @llvm.umul.with.overflow.i128(i128 %l, i128 %r) #2
define <2 x i32> @umulo_v2i128(<2 x i128> %a0, <2 x i128> %a1, ptr %p2) nounwind {
; CHECK-LABEL: umulo_v2i128:
; CHECK: // %bb.0:
+; CHECK-NEXT: cmp x3, #0
+; CHECK-NEXT: umulh x8, x3, x6
+; CHECK-NEXT: ccmp x7, #0, #4, ne
+; CHECK-NEXT: umulh x9, x7, x2
+; CHECK-NEXT: umulh x11, x5, x0
+; CHECK-NEXT: ccmp xzr, x8, #0, eq
; CHECK-NEXT: mul x8, x7, x2
-; CHECK-NEXT: umulh x9, x2, x6
; CHECK-NEXT: madd x8, x3, x6, x8
-; CHECK-NEXT: umulh x10, x3, x6
+; CHECK-NEXT: ccmp xzr, x9, #0, eq
+; CHECK-NEXT: umulh x9, x2, x6
+; CHECK-NEXT: cset w10, ne
; CHECK-NEXT: adds x8, x9, x8
-; CHECK-NEXT: umulh x11, x7, x2
-; CHECK-NEXT: cset w9, hs
-; CHECK-NEXT: cmp x3, #0
-; CHECK-NEXT: ccmp x7, #0, #4, ne
-; CHECK-NEXT: umulh x13, x1, x4
+; CHECK-NEXT: csinc w9, w10, wzr, lo
+; CHECK-NEXT: cmp x1, #0
+; CHECK-NEXT: ccmp x5, #0, #4, ne
+; CHECK-NEXT: umulh x10, x1, x4
; CHECK-NEXT: ccmp xzr, x10, #0, eq
; CHECK-NEXT: mul x10, x5, x0
; CHECK-NEXT: madd x10, x1, x4, x10
; CHECK-NEXT: umulh x11, x0, x4
; CHECK-NEXT: cset w12, ne
; CHECK-NEXT: adds x10, x11, x10
-; CHECK-NEXT: cset w11, hs
-; CHECK-NEXT: cmp x1, #0
-; CHECK-NEXT: ccmp x5, #0, #4, ne
-; CHECK-NEXT: orr w9, w12, w9
+; CHECK-NEXT: csinc w11, w12, wzr, lo
; CHECK-NEXT: mul x12, x0, x4
-; CHECK-NEXT: ccmp xzr, x13, #0, eq
-; CHECK-NEXT: umulh x13, x5, x0
-; CHECK-NEXT: ccmp xzr, x13, #0, eq
-; CHECK-NEXT: cset w13, ne
-; CHECK-NEXT: orr w11, w13, w11
; CHECK-NEXT: fmov s0, w11
; CHECK-NEXT: ldr x11, [sp]
; CHECK-NEXT: mov v0.s[1], w9