let Predicates = [HasMVEInt] in {
def : Pat<(i32 (vecreduce_add (mul (v4i32 MQPR:$src1), (v4i32 MQPR:$src2)))),
(i32 (MVE_VMLADAVu32 $src1, $src2))>;
- def : Pat<(i32 (ARMVMLAVs (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))),
- (i32 (MVE_VMLADAVs8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
- def : Pat<(i32 (ARMVMLAVu (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))),
- (i32 (MVE_VMLADAVu8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
+ def : Pat<(i32 (vecreduce_add (mul (v8i16 MQPR:$src1), (v8i16 MQPR:$src2)))),
+ (i32 (MVE_VMLADAVu16 $src1, $src2))>;
def : Pat<(i32 (ARMVMLAVs (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))),
(i32 (MVE_VMLADAVs16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
def : Pat<(i32 (ARMVMLAVu (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))),
(i32 (MVE_VMLADAVu16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
+ def : Pat<(i32 (vecreduce_add (mul (v16i8 MQPR:$src1), (v16i8 MQPR:$src2)))),
+ (i32 (MVE_VMLADAVu8 $src1, $src2))>;
+ def : Pat<(i32 (ARMVMLAVs (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))),
+ (i32 (MVE_VMLADAVs8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
+ def : Pat<(i32 (ARMVMLAVu (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))),
+ (i32 (MVE_VMLADAVu8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
def : Pat<(i32 (add (i32 (vecreduce_add (mul (v4i32 MQPR:$src1), (v4i32 MQPR:$src2)))),
(i32 tGPREven:$src3))),
(i32 (MVE_VMLADAVau32 $src3, $src1, $src2))>;
+ def : Pat<(i32 (add (i32 (vecreduce_add (mul (v8i16 MQPR:$src1), (v8i16 MQPR:$src2)))),
+ (i32 tGPREven:$src3))),
+ (i32 (MVE_VMLADAVau16 $src3, $src1, $src2))>;
def : Pat<(i32 (add (ARMVMLAVs (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)), tGPREven:$Rd)),
(i32 (MVE_VMLADAVas16 tGPREven:$Rd, (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
def : Pat<(i32 (add (ARMVMLAVu (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)), tGPREven:$Rd)),
(i32 (MVE_VMLADAVau16 tGPREven:$Rd, (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
+ def : Pat<(i32 (add (i32 (vecreduce_add (mul (v16i8 MQPR:$src1), (v16i8 MQPR:$src2)))),
+ (i32 tGPREven:$src3))),
+ (i32 (MVE_VMLADAVau8 $src3, $src1, $src2))>;
def : Pat<(i32 (add (ARMVMLAVs (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)), tGPREven:$Rd)),
(i32 (MVE_VMLADAVas8 tGPREven:$Rd, (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
def : Pat<(i32 (add (ARMVMLAVu (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)), tGPREven:$Rd)),
define arm_aapcs_vfpcc zeroext i16 @add_v8i16_v8i16(<8 x i16> %x, <8 x i16> %y) {
; CHECK-LABEL: add_v8i16_v8i16:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vmul.i16 q0, q0, q1
-; CHECK-NEXT: vaddv.u16 r0, q0
+; CHECK-NEXT: vmlav.u16 r0, q0, q1
; CHECK-NEXT: uxth r0, r0
; CHECK-NEXT: bx lr
entry:
define arm_aapcs_vfpcc zeroext i16 @add_v8i8_v8i16_zext(<8 x i8> %x, <8 x i8> %y) {
; CHECK-LABEL: add_v8i8_v8i16_zext:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vmullb.u8 q0, q0, q1
-; CHECK-NEXT: vaddv.u16 r0, q0
+; CHECK-NEXT: vmovlb.u8 q1, q1
+; CHECK-NEXT: vmovlb.u8 q0, q0
+; CHECK-NEXT: vmlav.u16 r0, q0, q1
; CHECK-NEXT: uxth r0, r0
; CHECK-NEXT: bx lr
entry:
define arm_aapcs_vfpcc signext i16 @add_v8i8_v8i16_sext(<8 x i8> %x, <8 x i8> %y) {
; CHECK-LABEL: add_v8i8_v8i16_sext:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vmullb.s8 q0, q0, q1
-; CHECK-NEXT: vaddv.u16 r0, q0
+; CHECK-NEXT: vmovlb.s8 q1, q1
+; CHECK-NEXT: vmovlb.s8 q0, q0
+; CHECK-NEXT: vmlav.u16 r0, q0, q1
; CHECK-NEXT: sxth r0, r0
; CHECK-NEXT: bx lr
entry:
define arm_aapcs_vfpcc zeroext i8 @add_v16i8_v16i8(<16 x i8> %x, <16 x i8> %y) {
; CHECK-LABEL: add_v16i8_v16i8:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vmul.i8 q0, q0, q1
-; CHECK-NEXT: vaddv.u8 r0, q0
+; CHECK-NEXT: vmlav.u8 r0, q0, q1
; CHECK-NEXT: uxtb r0, r0
; CHECK-NEXT: bx lr
entry:
define arm_aapcs_vfpcc zeroext i16 @add_v8i16_v8i16_acc(<8 x i16> %x, <8 x i16> %y, i16 %a) {
; CHECK-LABEL: add_v8i16_v8i16_acc:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vmul.i16 q0, q0, q1
-; CHECK-NEXT: vaddva.u16 r0, q0
+; CHECK-NEXT: vmlava.u16 r0, q0, q1
; CHECK-NEXT: uxth r0, r0
; CHECK-NEXT: bx lr
entry:
define arm_aapcs_vfpcc zeroext i16 @add_v8i8_v8i16_acc_zext(<8 x i8> %x, <8 x i8> %y, i16 %a) {
; CHECK-LABEL: add_v8i8_v8i16_acc_zext:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vmullb.u8 q0, q0, q1
-; CHECK-NEXT: vaddva.u16 r0, q0
+; CHECK-NEXT: vmovlb.u8 q1, q1
+; CHECK-NEXT: vmovlb.u8 q0, q0
+; CHECK-NEXT: vmlava.u16 r0, q0, q1
; CHECK-NEXT: uxth r0, r0
; CHECK-NEXT: bx lr
entry:
define arm_aapcs_vfpcc signext i16 @add_v8i8_v8i16_acc_sext(<8 x i8> %x, <8 x i8> %y, i16 %a) {
; CHECK-LABEL: add_v8i8_v8i16_acc_sext:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vmullb.s8 q0, q0, q1
-; CHECK-NEXT: vaddva.u16 r0, q0
+; CHECK-NEXT: vmovlb.s8 q1, q1
+; CHECK-NEXT: vmovlb.s8 q0, q0
+; CHECK-NEXT: vmlava.u16 r0, q0, q1
; CHECK-NEXT: sxth r0, r0
; CHECK-NEXT: bx lr
entry:
define arm_aapcs_vfpcc zeroext i8 @add_v16i8_v16i8_acc(<16 x i8> %x, <16 x i8> %y, i8 %a) {
; CHECK-LABEL: add_v16i8_v16i8_acc:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vmul.i8 q0, q0, q1
-; CHECK-NEXT: vaddva.u8 r0, q0
+; CHECK-NEXT: vmlava.u8 r0, q0, q1
; CHECK-NEXT: uxtb r0, r0
; CHECK-NEXT: bx lr
entry: