; CHECK-NEXT: shl.2s v0, v0, #24
; CHECK-NEXT: shl.2s v1, v1, #24
; CHECK-NEXT: sshr.2s v0, v0, #24
-; CHECK-NEXT: ssra.2s v0, v1, #24
-; CHECK-NEXT: sshr.2s v0, v0, #1
+; CHECK-NEXT: sshr.2s v1, v1, #24
+; CHECK-NEXT: shadd.2s v0, v0, v1
; CHECK-NEXT: ret
%zextsrc1 = sext <2 x i8> %src1 to <2 x i16>
%zextsrc2 = sext <2 x i8> %src2 to <2 x i16>
; CHECK-NEXT: movi d2, #0x0000ff000000ff
; CHECK-NEXT: and.8b v0, v0, v2
; CHECK-NEXT: and.8b v1, v1, v2
-; CHECK-NEXT: add.2s v0, v0, v1
-; CHECK-NEXT: ushr.2s v0, v0, #1
+; CHECK-NEXT: uhadd.2s v0, v0, v1
; CHECK-NEXT: ret
%zextsrc1 = zext <2 x i8> %src1 to <2 x i16>
%zextsrc2 = zext <2 x i8> %src2 to <2 x i16>
; CHECK-NEXT: movi d2, #0x0000ff000000ff
; CHECK-NEXT: and.8b v0, v0, v2
; CHECK-NEXT: and.8b v1, v1, v2
-; CHECK-NEXT: add.2s v0, v0, v1
-; CHECK-NEXT: ushr.2s v0, v0, #1
+; CHECK-NEXT: uhadd.2s v0, v0, v1
; CHECK-NEXT: ret
%zextsrc1 = zext <2 x i8> %src1 to <2 x i16>
%zextsrc2 = zext <2 x i8> %src2 to <2 x i16>
; CHECK-NEXT: shl.2s v1, v1, #24
; CHECK-NEXT: sshr.2s v0, v0, #24
; CHECK-NEXT: sshr.2s v1, v1, #24
-; CHECK-NEXT: mvn.8b v0, v0
-; CHECK-NEXT: sub.2s v0, v1, v0
-; CHECK-NEXT: sshr.2s v0, v0, #1
+; CHECK-NEXT: srhadd.2s v0, v0, v1
; CHECK-NEXT: ret
%zextsrc1 = sext <2 x i8> %src1 to <2 x i16>
%zextsrc2 = sext <2 x i8> %src2 to <2 x i16>
; CHECK-NEXT: movi d2, #0x0000ff000000ff
; CHECK-NEXT: and.8b v0, v0, v2
; CHECK-NEXT: and.8b v1, v1, v2
-; CHECK-NEXT: mvn.8b v0, v0
-; CHECK-NEXT: sub.2s v0, v1, v0
-; CHECK-NEXT: ushr.2s v0, v0, #1
+; CHECK-NEXT: urhadd.2s v0, v0, v1
; CHECK-NEXT: ret
%zextsrc1 = zext <2 x i8> %src1 to <2 x i16>
%zextsrc2 = zext <2 x i8> %src2 to <2 x i16>
; CHECK-NEXT: movi d2, #0x0000ff000000ff
; CHECK-NEXT: and.8b v0, v0, v2
; CHECK-NEXT: and.8b v1, v1, v2
-; CHECK-NEXT: mvn.8b v0, v0
-; CHECK-NEXT: sub.2s v0, v1, v0
-; CHECK-NEXT: ushr.2s v0, v0, #1
+; CHECK-NEXT: urhadd.2s v0, v0, v1
; CHECK-NEXT: ret
%zextsrc1 = zext <2 x i8> %src1 to <2 x i16>
%zextsrc2 = zext <2 x i8> %src2 to <2 x i16>
}
define <vscale x 2 x i16> @hadds_v2i16(<vscale x 2 x i16> %s0, <vscale x 2 x i16> %s1) {
-; CHECK-LABEL: hadds_v2i16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: sxth z0.d, p0/m, z0.d
-; CHECK-NEXT: sxth z1.d, p0/m, z1.d
-; CHECK-NEXT: add z0.d, z0.d, z1.d
-; CHECK-NEXT: asr z0.d, z0.d, #1
-; CHECK-NEXT: ret
+; SVE-LABEL: hadds_v2i16:
+; SVE: // %bb.0: // %entry
+; SVE-NEXT: ptrue p0.d
+; SVE-NEXT: sxth z0.d, p0/m, z0.d
+; SVE-NEXT: sxth z1.d, p0/m, z1.d
+; SVE-NEXT: add z0.d, z0.d, z1.d
+; SVE-NEXT: asr z0.d, z0.d, #1
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: hadds_v2i16:
+; SVE2: // %bb.0: // %entry
+; SVE2-NEXT: ptrue p0.d
+; SVE2-NEXT: sxth z0.d, p0/m, z0.d
+; SVE2-NEXT: sxth z1.d, p0/m, z1.d
+; SVE2-NEXT: shadd z0.d, p0/m, z0.d, z1.d
+; SVE2-NEXT: ret
entry:
%s0s = sext <vscale x 2 x i16> %s0 to <vscale x 2 x i32>
%s1s = sext <vscale x 2 x i16> %s1 to <vscale x 2 x i32>
}
define <vscale x 2 x i16> @haddu_v2i16(<vscale x 2 x i16> %s0, <vscale x 2 x i16> %s1) {
-; CHECK-LABEL: haddu_v2i16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: and z0.d, z0.d, #0xffff
-; CHECK-NEXT: and z1.d, z1.d, #0xffff
-; CHECK-NEXT: add z0.d, z0.d, z1.d
-; CHECK-NEXT: lsr z0.d, z0.d, #1
-; CHECK-NEXT: ret
+; SVE-LABEL: haddu_v2i16:
+; SVE: // %bb.0: // %entry
+; SVE-NEXT: and z0.d, z0.d, #0xffff
+; SVE-NEXT: and z1.d, z1.d, #0xffff
+; SVE-NEXT: add z0.d, z0.d, z1.d
+; SVE-NEXT: lsr z0.d, z0.d, #1
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: haddu_v2i16:
+; SVE2: // %bb.0: // %entry
+; SVE2-NEXT: ptrue p0.d
+; SVE2-NEXT: and z0.d, z0.d, #0xffff
+; SVE2-NEXT: and z1.d, z1.d, #0xffff
+; SVE2-NEXT: uhadd z0.d, p0/m, z0.d, z1.d
+; SVE2-NEXT: ret
entry:
%s0s = zext <vscale x 2 x i16> %s0 to <vscale x 2 x i32>
%s1s = zext <vscale x 2 x i16> %s1 to <vscale x 2 x i32>
}
define <vscale x 4 x i8> @hadds_v4i8(<vscale x 4 x i8> %s0, <vscale x 4 x i8> %s1) {
-; CHECK-LABEL: hadds_v4i8:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: sxtb z0.s, p0/m, z0.s
-; CHECK-NEXT: sxtb z1.s, p0/m, z1.s
-; CHECK-NEXT: add z0.s, z0.s, z1.s
-; CHECK-NEXT: asr z0.s, z0.s, #1
-; CHECK-NEXT: ret
+; SVE-LABEL: hadds_v4i8:
+; SVE: // %bb.0: // %entry
+; SVE-NEXT: ptrue p0.s
+; SVE-NEXT: sxtb z0.s, p0/m, z0.s
+; SVE-NEXT: sxtb z1.s, p0/m, z1.s
+; SVE-NEXT: add z0.s, z0.s, z1.s
+; SVE-NEXT: asr z0.s, z0.s, #1
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: hadds_v4i8:
+; SVE2: // %bb.0: // %entry
+; SVE2-NEXT: ptrue p0.s
+; SVE2-NEXT: sxtb z0.s, p0/m, z0.s
+; SVE2-NEXT: sxtb z1.s, p0/m, z1.s
+; SVE2-NEXT: shadd z0.s, p0/m, z0.s, z1.s
+; SVE2-NEXT: ret
entry:
%s0s = sext <vscale x 4 x i8> %s0 to <vscale x 4 x i16>
%s1s = sext <vscale x 4 x i8> %s1 to <vscale x 4 x i16>
}
define <vscale x 4 x i8> @haddu_v4i8(<vscale x 4 x i8> %s0, <vscale x 4 x i8> %s1) {
-; CHECK-LABEL: haddu_v4i8:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: and z0.s, z0.s, #0xff
-; CHECK-NEXT: and z1.s, z1.s, #0xff
-; CHECK-NEXT: add z0.s, z0.s, z1.s
-; CHECK-NEXT: lsr z0.s, z0.s, #1
-; CHECK-NEXT: ret
+; SVE-LABEL: haddu_v4i8:
+; SVE: // %bb.0: // %entry
+; SVE-NEXT: and z0.s, z0.s, #0xff
+; SVE-NEXT: and z1.s, z1.s, #0xff
+; SVE-NEXT: add z0.s, z0.s, z1.s
+; SVE-NEXT: lsr z0.s, z0.s, #1
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: haddu_v4i8:
+; SVE2: // %bb.0: // %entry
+; SVE2-NEXT: ptrue p0.s
+; SVE2-NEXT: and z0.s, z0.s, #0xff
+; SVE2-NEXT: and z1.s, z1.s, #0xff
+; SVE2-NEXT: uhadd z0.s, p0/m, z0.s, z1.s
+; SVE2-NEXT: ret
entry:
%s0s = zext <vscale x 4 x i8> %s0 to <vscale x 4 x i16>
%s1s = zext <vscale x 4 x i8> %s1 to <vscale x 4 x i16>
}
define <vscale x 2 x i32> @rhadds_v2i32(<vscale x 2 x i32> %s0, <vscale x 2 x i32> %s1) {
-; CHECK-LABEL: rhadds_v2i32:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: mov z2.d, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: sxtw z0.d, p0/m, z0.d
-; CHECK-NEXT: sxtw z1.d, p0/m, z1.d
-; CHECK-NEXT: eor z0.d, z0.d, z2.d
-; CHECK-NEXT: sub z0.d, z1.d, z0.d
-; CHECK-NEXT: asr z0.d, z0.d, #1
-; CHECK-NEXT: ret
+; SVE-LABEL: rhadds_v2i32:
+; SVE: // %bb.0: // %entry
+; SVE-NEXT: ptrue p0.d
+; SVE-NEXT: mov z2.d, #-1 // =0xffffffffffffffff
+; SVE-NEXT: sxtw z0.d, p0/m, z0.d
+; SVE-NEXT: sxtw z1.d, p0/m, z1.d
+; SVE-NEXT: eor z0.d, z0.d, z2.d
+; SVE-NEXT: sub z0.d, z1.d, z0.d
+; SVE-NEXT: asr z0.d, z0.d, #1
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: rhadds_v2i32:
+; SVE2: // %bb.0: // %entry
+; SVE2-NEXT: ptrue p0.d
+; SVE2-NEXT: sxtw z0.d, p0/m, z0.d
+; SVE2-NEXT: sxtw z1.d, p0/m, z1.d
+; SVE2-NEXT: srhadd z0.d, p0/m, z0.d, z1.d
+; SVE2-NEXT: ret
entry:
%s0s = sext <vscale x 2 x i32> %s0 to <vscale x 2 x i64>
%s1s = sext <vscale x 2 x i32> %s1 to <vscale x 2 x i64>
}
define <vscale x 2 x i16> @rhaddu_v2i16(<vscale x 2 x i16> %s0, <vscale x 2 x i16> %s1) {
-; CHECK-LABEL: rhaddu_v2i16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: mov z2.d, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: and z0.d, z0.d, #0xffff
-; CHECK-NEXT: and z1.d, z1.d, #0xffff
-; CHECK-NEXT: eor z0.d, z0.d, z2.d
-; CHECK-NEXT: sub z0.d, z1.d, z0.d
-; CHECK-NEXT: lsr z0.d, z0.d, #1
-; CHECK-NEXT: ret
+; SVE-LABEL: rhaddu_v2i16:
+; SVE: // %bb.0: // %entry
+; SVE-NEXT: mov z2.d, #-1 // =0xffffffffffffffff
+; SVE-NEXT: and z0.d, z0.d, #0xffff
+; SVE-NEXT: and z1.d, z1.d, #0xffff
+; SVE-NEXT: eor z0.d, z0.d, z2.d
+; SVE-NEXT: sub z0.d, z1.d, z0.d
+; SVE-NEXT: lsr z0.d, z0.d, #1
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: rhaddu_v2i16:
+; SVE2: // %bb.0: // %entry
+; SVE2-NEXT: ptrue p0.d
+; SVE2-NEXT: and z0.d, z0.d, #0xffff
+; SVE2-NEXT: and z1.d, z1.d, #0xffff
+; SVE2-NEXT: urhadd z0.d, p0/m, z0.d, z1.d
+; SVE2-NEXT: ret
entry:
%s0s = zext <vscale x 2 x i16> %s0 to <vscale x 2 x i32>
%s1s = zext <vscale x 2 x i16> %s1 to <vscale x 2 x i32>
}
define <vscale x 4 x i16> @rhadds_v4i16(<vscale x 4 x i16> %s0, <vscale x 4 x i16> %s1) {
-; CHECK-LABEL: rhadds_v4i16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: mov z2.s, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: sxth z0.s, p0/m, z0.s
-; CHECK-NEXT: sxth z1.s, p0/m, z1.s
-; CHECK-NEXT: eor z0.d, z0.d, z2.d
-; CHECK-NEXT: sub z0.s, z1.s, z0.s
-; CHECK-NEXT: asr z0.s, z0.s, #1
-; CHECK-NEXT: ret
+; SVE-LABEL: rhadds_v4i16:
+; SVE: // %bb.0: // %entry
+; SVE-NEXT: ptrue p0.s
+; SVE-NEXT: mov z2.s, #-1 // =0xffffffffffffffff
+; SVE-NEXT: sxth z0.s, p0/m, z0.s
+; SVE-NEXT: sxth z1.s, p0/m, z1.s
+; SVE-NEXT: eor z0.d, z0.d, z2.d
+; SVE-NEXT: sub z0.s, z1.s, z0.s
+; SVE-NEXT: asr z0.s, z0.s, #1
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: rhadds_v4i16:
+; SVE2: // %bb.0: // %entry
+; SVE2-NEXT: ptrue p0.s
+; SVE2-NEXT: sxth z0.s, p0/m, z0.s
+; SVE2-NEXT: sxth z1.s, p0/m, z1.s
+; SVE2-NEXT: srhadd z0.s, p0/m, z0.s, z1.s
+; SVE2-NEXT: ret
entry:
%s0s = sext <vscale x 4 x i16> %s0 to <vscale x 4 x i32>
%s1s = sext <vscale x 4 x i16> %s1 to <vscale x 4 x i32>
}
define <vscale x 4 x i8> @rhaddu_v4i8(<vscale x 4 x i8> %s0, <vscale x 4 x i8> %s1) {
-; CHECK-LABEL: rhaddu_v4i8:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: mov z2.s, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: and z0.s, z0.s, #0xff
-; CHECK-NEXT: and z1.s, z1.s, #0xff
-; CHECK-NEXT: eor z0.d, z0.d, z2.d
-; CHECK-NEXT: sub z0.s, z1.s, z0.s
-; CHECK-NEXT: lsr z0.s, z0.s, #1
-; CHECK-NEXT: ret
+; SVE-LABEL: rhaddu_v4i8:
+; SVE: // %bb.0: // %entry
+; SVE-NEXT: mov z2.s, #-1 // =0xffffffffffffffff
+; SVE-NEXT: and z0.s, z0.s, #0xff
+; SVE-NEXT: and z1.s, z1.s, #0xff
+; SVE-NEXT: eor z0.d, z0.d, z2.d
+; SVE-NEXT: sub z0.s, z1.s, z0.s
+; SVE-NEXT: lsr z0.s, z0.s, #1
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: rhaddu_v4i8:
+; SVE2: // %bb.0: // %entry
+; SVE2-NEXT: ptrue p0.s
+; SVE2-NEXT: and z0.s, z0.s, #0xff
+; SVE2-NEXT: and z1.s, z1.s, #0xff
+; SVE2-NEXT: urhadd z0.s, p0/m, z0.s, z1.s
+; SVE2-NEXT: ret
entry:
%s0s = zext <vscale x 4 x i8> %s0 to <vscale x 4 x i16>
%s1s = zext <vscale x 4 x i8> %s1 to <vscale x 4 x i16>
}
define <vscale x 8 x i8> @rhadds_v8i8(<vscale x 8 x i8> %s0, <vscale x 8 x i8> %s1) {
-; CHECK-LABEL: rhadds_v8i8:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: mov z2.h, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: sxtb z0.h, p0/m, z0.h
-; CHECK-NEXT: sxtb z1.h, p0/m, z1.h
-; CHECK-NEXT: eor z0.d, z0.d, z2.d
-; CHECK-NEXT: sub z0.h, z1.h, z0.h
-; CHECK-NEXT: asr z0.h, z0.h, #1
-; CHECK-NEXT: ret
+; SVE-LABEL: rhadds_v8i8:
+; SVE: // %bb.0: // %entry
+; SVE-NEXT: ptrue p0.h
+; SVE-NEXT: mov z2.h, #-1 // =0xffffffffffffffff
+; SVE-NEXT: sxtb z0.h, p0/m, z0.h
+; SVE-NEXT: sxtb z1.h, p0/m, z1.h
+; SVE-NEXT: eor z0.d, z0.d, z2.d
+; SVE-NEXT: sub z0.h, z1.h, z0.h
+; SVE-NEXT: asr z0.h, z0.h, #1
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: rhadds_v8i8:
+; SVE2: // %bb.0: // %entry
+; SVE2-NEXT: ptrue p0.h
+; SVE2-NEXT: sxtb z0.h, p0/m, z0.h
+; SVE2-NEXT: sxtb z1.h, p0/m, z1.h
+; SVE2-NEXT: srhadd z0.h, p0/m, z0.h, z1.h
+; SVE2-NEXT: ret
entry:
%s0s = sext <vscale x 8 x i8> %s0 to <vscale x 8 x i16>
%s1s = sext <vscale x 8 x i8> %s1 to <vscale x 8 x i16>