// And if the target does not like this form then turn into:
// sub y, (xor x, -1)
if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.getOpcode() == ISD::ADD &&
- N0.hasOneUse()) {
+ N0.hasOneUse() &&
+ // Limit this to after legalization if the add has wrap flags
+ (Level >= AfterLegalizeDAG || (!N->getFlags().hasNoUnsignedWrap() &&
+ !N->getFlags().hasNoSignedWrap()))) {
SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
DAG.getAllOnesConstant(DL, VT));
return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(1), Not);
// And if the target does not like this form then turn into:
// sub y, (xor x, -1)
if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.getOpcode() == ISD::ADD &&
- N0.hasOneUse() && isOneOrOneSplat(N0.getOperand(1))) {
+ N0.hasOneUse() && isOneOrOneSplat(N0.getOperand(1)) &&
+ // Limit this to after legalization if the add has wrap flags
+ (Level >= AfterLegalizeDAG || (!N0->getFlags().hasNoUnsignedWrap() &&
+ !N0->getFlags().hasNoSignedWrap()))) {
SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
DAG.getAllOnesConstant(DL, VT));
return DAG.getNode(ISD::SUB, DL, VT, N1, Not);
; CHECK: // %bb.0:
; CHECK-NEXT: shl.4h v0, v0, #8
; CHECK-NEXT: shl.4h v1, v1, #8
+; CHECK-NEXT: movi.4h v2, #1
; CHECK-NEXT: sshr.4h v0, v0, #8
-; CHECK-NEXT: sshr.4h v1, v1, #8
-; CHECK-NEXT: mvn.8b v0, v0
-; CHECK-NEXT: sub.4h v0, v1, v0
+; CHECK-NEXT: ssra.4h v0, v1, #8
+; CHECK-NEXT: add.4h v0, v0, v2
; CHECK-NEXT: ushr.4h v0, v0, #1
; CHECK-NEXT: ret
%zextsrc1 = sext <4 x i8> %src1 to <4 x i16>
; CHECK-NEXT: shl.2s v1, v1, #24
; CHECK-NEXT: sshr.2s v0, v0, #24
; CHECK-NEXT: sshr.2s v1, v1, #24
-; CHECK-NEXT: mvn.8b v0, v0
-; CHECK-NEXT: sub.2s v0, v1, v0
-; CHECK-NEXT: sshr.2s v0, v0, #1
+; CHECK-NEXT: srhadd.2s v0, v0, v1
; CHECK-NEXT: ret
%zextsrc1 = sext <2 x i8> %src1 to <2 x i16>
%zextsrc2 = sext <2 x i8> %src2 to <2 x i16>
; CHECK-NEXT: movi d2, #0x0000ff000000ff
; CHECK-NEXT: and.8b v0, v0, v2
; CHECK-NEXT: and.8b v1, v1, v2
-; CHECK-NEXT: mvn.8b v0, v0
-; CHECK-NEXT: sub.2s v0, v1, v0
-; CHECK-NEXT: ushr.2s v0, v0, #1
+; CHECK-NEXT: urhadd.2s v0, v0, v1
; CHECK-NEXT: ret
%zextsrc1 = zext <2 x i8> %src1 to <2 x i16>
%zextsrc2 = zext <2 x i8> %src2 to <2 x i16>
; CHECK: // %bb.0:
; CHECK-NEXT: shl.2s v0, v0, #24
; CHECK-NEXT: shl.2s v1, v1, #24
-; CHECK-NEXT: movi d2, #0x00ffff0000ffff
+; CHECK-NEXT: movi.2s v2, #1
; CHECK-NEXT: sshr.2s v0, v0, #24
-; CHECK-NEXT: sshr.2s v1, v1, #24
-; CHECK-NEXT: mvn.8b v0, v0
-; CHECK-NEXT: sub.2s v0, v1, v0
-; CHECK-NEXT: and.8b v0, v0, v2
+; CHECK-NEXT: ssra.2s v0, v1, #24
+; CHECK-NEXT: movi d1, #0x00ffff0000ffff
+; CHECK-NEXT: add.2s v0, v0, v2
+; CHECK-NEXT: and.8b v0, v0, v1
; CHECK-NEXT: ushr.2s v0, v0, #1
; CHECK-NEXT: ret
%zextsrc1 = sext <2 x i8> %src1 to <2 x i16>
; CHECK-NEXT: movi d2, #0x0000ff000000ff
; CHECK-NEXT: and.8b v0, v0, v2
; CHECK-NEXT: and.8b v1, v1, v2
-; CHECK-NEXT: mvn.8b v0, v0
-; CHECK-NEXT: sub.2s v0, v1, v0
-; CHECK-NEXT: ushr.2s v0, v0, #1
+; CHECK-NEXT: urhadd.2s v0, v0, v1
; CHECK-NEXT: ret
%zextsrc1 = zext <2 x i8> %src1 to <2 x i16>
%zextsrc2 = zext <2 x i8> %src2 to <2 x i16>
}
define <vscale x 2 x i16> @rhaddu_v2i16(<vscale x 2 x i16> %s0, <vscale x 2 x i16> %s1) {
-; CHECK-LABEL: rhaddu_v2i16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: mov z2.d, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: and z0.d, z0.d, #0xffff
-; CHECK-NEXT: and z1.d, z1.d, #0xffff
-; CHECK-NEXT: eor z0.d, z0.d, z2.d
-; CHECK-NEXT: sub z0.d, z1.d, z0.d
-; CHECK-NEXT: lsr z0.d, z0.d, #1
-; CHECK-NEXT: ret
+; SVE-LABEL: rhaddu_v2i16:
+; SVE: // %bb.0: // %entry
+; SVE-NEXT: mov z2.d, #-1 // =0xffffffffffffffff
+; SVE-NEXT: and z0.d, z0.d, #0xffff
+; SVE-NEXT: and z1.d, z1.d, #0xffff
+; SVE-NEXT: eor z0.d, z0.d, z2.d
+; SVE-NEXT: sub z0.d, z1.d, z0.d
+; SVE-NEXT: lsr z0.d, z0.d, #1
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: rhaddu_v2i16:
+; SVE2: // %bb.0: // %entry
+; SVE2-NEXT: ptrue p0.d
+; SVE2-NEXT: and z0.d, z0.d, #0xffff
+; SVE2-NEXT: and z1.d, z1.d, #0xffff
+; SVE2-NEXT: urhadd z0.d, p0/m, z0.d, z1.d
+; SVE2-NEXT: ret
entry:
%s0s = zext <vscale x 2 x i16> %s0 to <vscale x 2 x i32>
%s1s = zext <vscale x 2 x i16> %s1 to <vscale x 2 x i32>
}
define <vscale x 4 x i8> @rhaddu_v4i8(<vscale x 4 x i8> %s0, <vscale x 4 x i8> %s1) {
-; CHECK-LABEL: rhaddu_v4i8:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: mov z2.s, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: and z0.s, z0.s, #0xff
-; CHECK-NEXT: and z1.s, z1.s, #0xff
-; CHECK-NEXT: eor z0.d, z0.d, z2.d
-; CHECK-NEXT: sub z0.s, z1.s, z0.s
-; CHECK-NEXT: lsr z0.s, z0.s, #1
-; CHECK-NEXT: ret
+; SVE-LABEL: rhaddu_v4i8:
+; SVE: // %bb.0: // %entry
+; SVE-NEXT: mov z2.s, #-1 // =0xffffffffffffffff
+; SVE-NEXT: and z0.s, z0.s, #0xff
+; SVE-NEXT: and z1.s, z1.s, #0xff
+; SVE-NEXT: eor z0.d, z0.d, z2.d
+; SVE-NEXT: sub z0.s, z1.s, z0.s
+; SVE-NEXT: lsr z0.s, z0.s, #1
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: rhaddu_v4i8:
+; SVE2: // %bb.0: // %entry
+; SVE2-NEXT: ptrue p0.s
+; SVE2-NEXT: and z0.s, z0.s, #0xff
+; SVE2-NEXT: and z1.s, z1.s, #0xff
+; SVE2-NEXT: urhadd z0.s, p0/m, z0.s, z1.s
+; SVE2-NEXT: ret
entry:
%s0s = zext <vscale x 4 x i8> %s0 to <vscale x 4 x i16>
%s1s = zext <vscale x 4 x i8> %s1 to <vscale x 4 x i16>