defm FMINNMV : SIMDFPAcrossLanes<0b01100, 1, "fminnmv", int_aarch64_neon_fminnmv>;
defm FMINV : SIMDFPAcrossLanes<0b01111, 1, "fminv", int_aarch64_neon_fminv>;
-// Patterns for uaddv(uaddlp(x)) ==> uaddlv
-def : Pat<(i32 (vector_extract (v8i16 (insert_subvector undef,
- (v4i16 (AArch64uaddv (v4i16 (AArch64uaddlp (v8i8 V64:$op))))),
- (i64 0))), (i64 0))),
- (EXTRACT_SUBREG (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)),
- (UADDLVv8i8v V64:$op), hsub), ssub)>;
-def : Pat<(i32 (vector_extract (v8i16 (AArch64uaddv (v8i16 (AArch64uaddlp
- (v16i8 V128:$op))))), (i64 0))),
- (EXTRACT_SUBREG (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
- (UADDLVv16i8v V128:$op), hsub), ssub)>;
-def : Pat<(v4i32 (AArch64uaddv (v4i32 (AArch64uaddlp (v8i16 V128:$op))))),
- (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), (UADDLVv8i16v V128:$op), ssub)>;
-
-// Patterns for addp(uaddlp(x))) ==> uaddlv
-def : Pat<(v2i32 (AArch64uaddv (v2i32 (AArch64uaddlp (v4i16 V64:$op))))),
- (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), (UADDLVv4i16v V64:$op), ssub)>;
-def : Pat<(v2i64 (AArch64uaddv (v2i64 (AArch64uaddlp (v4i32 V128:$op))))),
- (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (UADDLVv4i32v V128:$op), dsub)>;
+multiclass SIMDAcrossLaneLongPairIntrinsic<string Opc, SDPatternOperator addlp> {
+ // Patterns for addv(addlp(x)) ==> addlv
+ def : Pat<(i32 (vector_extract (v8i16 (insert_subvector undef,
+ (v4i16 (AArch64uaddv (v4i16 (addlp (v8i8 V64:$op))))),
+ (i64 0))), (i64 0))),
+ (EXTRACT_SUBREG (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)),
+ (!cast<Instruction>(Opc#"v8i8v") V64:$op), hsub), ssub)>;
+ def : Pat<(i32 (vector_extract (v8i16 (AArch64uaddv (v8i16 (addlp (v16i8 V128:$op))))), (i64 0))),
+ (EXTRACT_SUBREG (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
+ (!cast<Instruction>(Opc#"v16i8v") V128:$op), hsub), ssub)>;
+ def : Pat<(v4i32 (AArch64uaddv (v4i32 (addlp (v8i16 V128:$op))))),
+ (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), (!cast<Instruction>(Opc#"v8i16v") V128:$op), ssub)>;
+
+ // Patterns for addp(addlp(x))) ==> addlv
+ def : Pat<(v2i32 (AArch64uaddv (v2i32 (addlp (v4i16 V64:$op))))),
+ (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), (!cast<Instruction>(Opc#"v4i16v") V64:$op), ssub)>;
+ def : Pat<(v2i64 (AArch64uaddv (v2i64 (addlp (v4i32 V128:$op))))),
+ (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (!cast<Instruction>(Opc#"v4i32v") V128:$op), dsub)>;
+}
+
+defm : SIMDAcrossLaneLongPairIntrinsic<"UADDLV", AArch64uaddlp>;
+defm : SIMDAcrossLaneLongPairIntrinsic<"SADDLV", AArch64saddlp>;
// Patterns for across-vector intrinsics, that have a node equivalent, that
// returns a vector (with only the low lane defined) instead of a scalar.
; CHECK-LABEL: saddlv4h_from_v8i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
-; CHECK-NEXT: saddlp v0.4h, v0.8b
-; CHECK-NEXT: addv h0, v0.4h
+; CHECK-NEXT: saddlv h0, v0.8b
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%tmp1 = load <8 x i8>, <8 x i8>* %A
; CHECK-LABEL: saddlv16b_from_v16i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
-; CHECK-NEXT: saddlp v0.8h, v0.16b
-; CHECK-NEXT: addv h0, v0.8h
+; CHECK-NEXT: saddlv h0, v0.16b
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%tmp1 = load <16 x i8>, <16 x i8>* %A
; CHECK-LABEL: saddlv8h_from_v8i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
-; CHECK-NEXT: saddlp v0.4s, v0.8h
-; CHECK-NEXT: addv s0, v0.4s
+; CHECK-NEXT: saddlv s0, v0.8h
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%tmp1 = load <8 x i16>, <8 x i16>* %A
; CHECK-LABEL: saddlv4s_from_v4i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
-; CHECK-NEXT: saddlp v0.2d, v0.4s
-; CHECK-NEXT: addp d0, v0.2d
+; CHECK-NEXT: saddlv d0, v0.4s
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
%tmp1 = load <4 x i32>, <4 x i32>* %A
; CHECK-LABEL: saddlv4h_from_v4i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
-; CHECK-NEXT: saddlp v0.2s, v0.4h
-; CHECK-NEXT: addp v0.2s, v0.2s, v0.2s
+; CHECK-NEXT: saddlv s0, v0.4h
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%tmp1 = load <4 x i16>, <4 x i16>* %A
define i64 @add_v4i32_v4i64_sext(<4 x i32> %x) {
; CHECK-LABEL: add_v4i32_v4i64_sext:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: saddlp v0.2d, v0.4s
-; CHECK-NEXT: addp d0, v0.2d
+; CHECK-NEXT: saddlv d0, v0.4s
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
entry:
define i32 @add_v8i16_v8i32_sext(<8 x i16> %x) {
; CHECK-LABEL: add_v8i16_v8i32_sext:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: saddlp v0.4s, v0.8h
-; CHECK-NEXT: addv s0, v0.4s
+; CHECK-NEXT: saddlv s0, v0.8h
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
entry:
; CHECK-LABEL: add_v4i16_v4i64_sext:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: sshll v0.4s, v0.4h, #0
-; CHECK-NEXT: saddlp v0.2d, v0.4s
-; CHECK-NEXT: addp d0, v0.2d
+; CHECK-NEXT: saddlv d0, v0.4s
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
entry:
; CHECK-BASE-LABEL: add_v8i8_v8i32_sext:
; CHECK-BASE: // %bb.0: // %entry
; CHECK-BASE-NEXT: sshll v0.8h, v0.8b, #0
-; CHECK-BASE-NEXT: saddlp v0.4s, v0.8h
-; CHECK-BASE-NEXT: addv s0, v0.4s
+; CHECK-BASE-NEXT: saddlv s0, v0.8h
; CHECK-BASE-NEXT: fmov w0, s0
; CHECK-BASE-NEXT: ret
;
define i64 @add_v4i32_v4i64_acc_sext(<4 x i32> %x, i64 %a) {
; CHECK-LABEL: add_v4i32_v4i64_acc_sext:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: saddlp v0.2d, v0.4s
-; CHECK-NEXT: addp d0, v0.2d
+; CHECK-NEXT: saddlv d0, v0.4s
; CHECK-NEXT: fmov x8, d0
; CHECK-NEXT: add x0, x8, x0
; CHECK-NEXT: ret
define i32 @add_v8i16_v8i32_acc_sext(<8 x i16> %x, i32 %a) {
; CHECK-LABEL: add_v8i16_v8i32_acc_sext:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: saddlp v0.4s, v0.8h
-; CHECK-NEXT: addv s0, v0.4s
+; CHECK-NEXT: saddlv s0, v0.8h
; CHECK-NEXT: fmov w8, s0
; CHECK-NEXT: add w0, w8, w0
; CHECK-NEXT: ret
; CHECK-LABEL: add_v4i16_v4i64_acc_sext:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: sshll v0.4s, v0.4h, #0
-; CHECK-NEXT: saddlp v0.2d, v0.4s
-; CHECK-NEXT: addp d0, v0.2d
+; CHECK-NEXT: saddlv d0, v0.4s
; CHECK-NEXT: fmov x8, d0
; CHECK-NEXT: add x0, x8, x0
; CHECK-NEXT: ret
; CHECK-BASE-LABEL: add_v8i8_v8i32_acc_sext:
; CHECK-BASE: // %bb.0: // %entry
; CHECK-BASE-NEXT: sshll v0.8h, v0.8b, #0
-; CHECK-BASE-NEXT: saddlp v0.4s, v0.8h
-; CHECK-BASE-NEXT: addv s0, v0.4s
+; CHECK-BASE-NEXT: saddlv s0, v0.8h
; CHECK-BASE-NEXT: fmov w8, s0
; CHECK-BASE-NEXT: add w0, w8, w0
; CHECK-BASE-NEXT: ret
define signext i16 @add_v16i8_v16i16_acc_sext(<16 x i8> %x, i16 %a) {
; CHECK-LABEL: add_v16i8_v16i16_acc_sext:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: saddlp v0.8h, v0.16b
-; CHECK-NEXT: addv h0, v0.8h
+; CHECK-NEXT: saddlv h0, v0.16b
; CHECK-NEXT: fmov w8, s0
; CHECK-NEXT: add w8, w8, w0
; CHECK-NEXT: sxth w0, w8
define signext i16 @add_pair_v16i8_v16i16_sext(<16 x i8> %x, <16 x i8> %y) {
; CHECK-LABEL: add_pair_v16i8_v16i16_sext:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: saddlp v0.8h, v0.16b
-; CHECK-NEXT: saddlp v1.8h, v1.16b
-; CHECK-NEXT: addv h0, v0.8h
-; CHECK-NEXT: addv h1, v1.8h
+; CHECK-NEXT: saddlv h0, v0.16b
+; CHECK-NEXT: saddlv h1, v1.16b
; CHECK-NEXT: fmov w8, s0
; CHECK-NEXT: fmov w9, s1
; CHECK-NEXT: add w8, w8, w9