defm MVE_VQRDMLASH_qr : MVE_VQDMLAH_qr_types<"vqrdmlash", 0b0, 0b1>;
class MVE_VxDUP<string iname, string suffix, bits<2> size, bit bit_12,
- list<dag> pattern=[]>
+ ValueType VT, SDPatternOperator vxdup>
: MVE_p<(outs MQPR:$Qd, tGPREven:$Rn),
(ins tGPREven:$Rn_src, MVE_VIDUP_imm:$imm), NoItinerary,
iname, suffix, "$Qd, $Rn, $imm", vpred_r, "$Rn = $Rn_src",
- pattern> {
+ [(set (VT MQPR:$Qd), (i32 tGPREven:$Rn),
+ (vxdup (i32 tGPREven:$Rn_src), (i32 imm:$imm)))]> {
bits<4> Qd;
bits<4> Rn;
bits<2> imm;
let hasSideEffects = 0;
}
-def MVE_VIDUPu8 : MVE_VxDUP<"vidup", "u8", 0b00, 0b0>;
-def MVE_VIDUPu16 : MVE_VxDUP<"vidup", "u16", 0b01, 0b0>;
-def MVE_VIDUPu32 : MVE_VxDUP<"vidup", "u32", 0b10, 0b0>;
+def MVE_VIDUPu8 : MVE_VxDUP<"vidup", "u8", 0b00, 0b0, v16i8, ARMvidup>;
+def MVE_VIDUPu16 : MVE_VxDUP<"vidup", "u16", 0b01, 0b0, v8i16, ARMvidup>;
+def MVE_VIDUPu32 : MVE_VxDUP<"vidup", "u32", 0b10, 0b0, v4i32, ARMvidup>;
-def MVE_VDDUPu8 : MVE_VxDUP<"vddup", "u8", 0b00, 0b1>;
-def MVE_VDDUPu16 : MVE_VxDUP<"vddup", "u16", 0b01, 0b1>;
-def MVE_VDDUPu32 : MVE_VxDUP<"vddup", "u32", 0b10, 0b1>;
+def MVE_VDDUPu8 : MVE_VxDUP<"vddup", "u8", 0b00, 0b1, v16i8, null_frag>;
+def MVE_VDDUPu16 : MVE_VxDUP<"vddup", "u16", 0b01, 0b1, v8i16, null_frag>;
+def MVE_VDDUPu32 : MVE_VxDUP<"vddup", "u32", 0b10, 0b1, v4i32, null_frag>;
class MVE_VxWDUP<string iname, string suffix, bits<2> size, bit bit_12,
list<dag> pattern=[]>
; CHECK-NEXT: it ls
; CHECK-NEXT: popls {r4, pc}
; CHECK-NEXT: .LBB0_1: @ %while.body.preheader
-; CHECK-NEXT: subs r0, r0, r1
-; CHECK-NEXT: movs r3, #1
-; CHECK-NEXT: add.w r2, r0, #15
-; CHECK-NEXT: mov r12, r1
-; CHECK-NEXT: bic r2, r2, #15
-; CHECK-NEXT: subs r2, #16
-; CHECK-NEXT: add.w r3, r3, r2, lsr #4
+; CHECK-NEXT: subs r4, r0, r1
; CHECK-NEXT: movs r2, #0
-; CHECK-NEXT: dls lr, r3
+; CHECK-NEXT: mov r3, r1
+; CHECK-NEXT: dlstp.8 lr, r4
; CHECK-NEXT: .LBB0_2: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: adds r3, r1, r2
-; CHECK-NEXT: vctp.8 r0
-; CHECK-NEXT: vmov.8 q0[0], r3
-; CHECK-NEXT: adds r4, r3, #1
-; CHECK-NEXT: vmov.8 q0[1], r4
-; CHECK-NEXT: adds r4, r3, #2
-; CHECK-NEXT: vmov.8 q0[2], r4
-; CHECK-NEXT: adds r4, r3, #3
-; CHECK-NEXT: vmov.8 q0[3], r4
-; CHECK-NEXT: adds r4, r3, #4
-; CHECK-NEXT: vmov.8 q0[4], r4
-; CHECK-NEXT: adds r4, r3, #5
-; CHECK-NEXT: vmov.8 q0[5], r4
-; CHECK-NEXT: adds r4, r3, #6
-; CHECK-NEXT: vmov.8 q0[6], r4
-; CHECK-NEXT: adds r4, r3, #7
-; CHECK-NEXT: vmov.8 q0[7], r4
-; CHECK-NEXT: add.w r4, r3, #8
-; CHECK-NEXT: vmov.8 q0[8], r4
-; CHECK-NEXT: add.w r4, r3, #9
-; CHECK-NEXT: vmov.8 q0[9], r4
-; CHECK-NEXT: add.w r4, r3, #10
-; CHECK-NEXT: vmov.8 q0[10], r4
-; CHECK-NEXT: add.w r4, r3, #11
-; CHECK-NEXT: vmov.8 q0[11], r4
-; CHECK-NEXT: add.w r4, r3, #12
-; CHECK-NEXT: vmov.8 q0[12], r4
-; CHECK-NEXT: add.w r4, r3, #13
-; CHECK-NEXT: vmov.8 q0[13], r4
-; CHECK-NEXT: add.w r4, r3, #14
+; CHECK-NEXT: adds r0, r1, r2
; CHECK-NEXT: adds r2, #16
-; CHECK-NEXT: subs r0, #16
-; CHECK-NEXT: vmov.8 q0[14], r4
-; CHECK-NEXT: adds r3, #15
-; CHECK-NEXT: vmov.8 q0[15], r3
-; CHECK-NEXT: vpst
-; CHECK-NEXT: vstrbt.8 q0, [r12], #16
-; CHECK-NEXT: le lr, .LBB0_2
+; CHECK-NEXT: vidup.u8 q0, r0, #1
+; CHECK-NEXT: vstrb.8 q0, [r3], #16
+; CHECK-NEXT: letp lr, .LBB0_2
; CHECK-NEXT: @ %bb.3: @ %while.end
; CHECK-NEXT: pop {r4, pc}
entry:
define arm_aapcs_vfpcc <4 x i32> @vidup_v4i32_1(i32 %index) {
; CHECK-LABEL: vidup_v4i32_1:
; CHECK: @ %bb.0:
-; CHECK-NEXT: adds r1, r0, #2
-; CHECK-NEXT: vmov q0[2], q0[0], r0, r1
-; CHECK-NEXT: adds r1, r0, #3
-; CHECK-NEXT: adds r0, #1
-; CHECK-NEXT: vmov q0[3], q0[1], r0, r1
+; CHECK-NEXT: vidup.u32 q0, r0, #1
; CHECK-NEXT: bx lr
%a1 = add i32 %index, 1
%a2 = add i32 %index, 2
define arm_aapcs_vfpcc <4 x i32> @vidup_v4i32_2(i32 %index) {
; CHECK-LABEL: vidup_v4i32_2:
; CHECK: @ %bb.0:
-; CHECK-NEXT: adds r1, r0, #4
-; CHECK-NEXT: vmov q0[2], q0[0], r0, r1
-; CHECK-NEXT: adds r1, r0, #6
-; CHECK-NEXT: adds r0, #2
-; CHECK-NEXT: vmov q0[3], q0[1], r0, r1
+; CHECK-NEXT: vidup.u32 q0, r0, #2
; CHECK-NEXT: bx lr
%a1 = add i32 %index, 2
%a2 = add i32 %index, 4
define arm_aapcs_vfpcc <4 x i32> @vidup_v4i32_4(i32 %index) {
; CHECK-LABEL: vidup_v4i32_4:
; CHECK: @ %bb.0:
-; CHECK-NEXT: add.w r1, r0, #8
-; CHECK-NEXT: vmov q0[2], q0[0], r0, r1
-; CHECK-NEXT: add.w r1, r0, #12
-; CHECK-NEXT: adds r0, #4
-; CHECK-NEXT: vmov q0[3], q0[1], r0, r1
+; CHECK-NEXT: vidup.u32 q0, r0, #4
; CHECK-NEXT: bx lr
%a1 = add i32 %index, 4
%a2 = add i32 %index, 8
define arm_aapcs_vfpcc <4 x i32> @vidup_v4i32_8(i32 %index) {
; CHECK-LABEL: vidup_v4i32_8:
; CHECK: @ %bb.0:
-; CHECK-NEXT: add.w r1, r0, #16
-; CHECK-NEXT: vmov q0[2], q0[0], r0, r1
-; CHECK-NEXT: add.w r1, r0, #24
-; CHECK-NEXT: adds r0, #8
-; CHECK-NEXT: vmov q0[3], q0[1], r0, r1
+; CHECK-NEXT: vidup.u32 q0, r0, #8
; CHECK-NEXT: bx lr
%a1 = add i32 %index, 8
%a2 = add i32 %index, 16
define arm_aapcs_vfpcc <8 x i16> @vidup_v8i16_1(i16 %index) {
; CHECK-LABEL: vidup_v8i16_1:
; CHECK: @ %bb.0:
-; CHECK-NEXT: vmov.16 q0[0], r0
-; CHECK-NEXT: adds r1, r0, #1
-; CHECK-NEXT: vmov.16 q0[1], r1
-; CHECK-NEXT: adds r1, r0, #2
-; CHECK-NEXT: vmov.16 q0[2], r1
-; CHECK-NEXT: adds r1, r0, #3
-; CHECK-NEXT: vmov.16 q0[3], r1
-; CHECK-NEXT: adds r1, r0, #4
-; CHECK-NEXT: vmov.16 q0[4], r1
-; CHECK-NEXT: adds r1, r0, #5
-; CHECK-NEXT: vmov.16 q0[5], r1
-; CHECK-NEXT: adds r1, r0, #6
-; CHECK-NEXT: vmov.16 q0[6], r1
-; CHECK-NEXT: adds r0, #7
-; CHECK-NEXT: vmov.16 q0[7], r0
+; CHECK-NEXT: vidup.u16 q0, r0, #1
; CHECK-NEXT: bx lr
%a1 = add i16 %index, 1
%a2 = add i16 %index, 2
define arm_aapcs_vfpcc <8 x i16> @vidup_v8i16_2(i16 %index) {
; CHECK-LABEL: vidup_v8i16_2:
; CHECK: @ %bb.0:
-; CHECK-NEXT: vmov.16 q0[0], r0
-; CHECK-NEXT: adds r1, r0, #2
-; CHECK-NEXT: vmov.16 q0[1], r1
-; CHECK-NEXT: adds r1, r0, #4
-; CHECK-NEXT: vmov.16 q0[2], r1
-; CHECK-NEXT: adds r1, r0, #6
-; CHECK-NEXT: vmov.16 q0[3], r1
-; CHECK-NEXT: add.w r1, r0, #8
-; CHECK-NEXT: vmov.16 q0[4], r1
-; CHECK-NEXT: add.w r1, r0, #10
-; CHECK-NEXT: vmov.16 q0[5], r1
-; CHECK-NEXT: add.w r1, r0, #12
-; CHECK-NEXT: vmov.16 q0[6], r1
-; CHECK-NEXT: adds r0, #14
-; CHECK-NEXT: vmov.16 q0[7], r0
+; CHECK-NEXT: vidup.u16 q0, r0, #2
; CHECK-NEXT: bx lr
%a1 = add i16 %index, 2
%a2 = add i16 %index, 4
define arm_aapcs_vfpcc <8 x i16> @vidup_v8i16_4(i16 %index) {
; CHECK-LABEL: vidup_v8i16_4:
; CHECK: @ %bb.0:
-; CHECK-NEXT: vmov.16 q0[0], r0
-; CHECK-NEXT: adds r1, r0, #4
-; CHECK-NEXT: vmov.16 q0[1], r1
-; CHECK-NEXT: add.w r1, r0, #8
-; CHECK-NEXT: vmov.16 q0[2], r1
-; CHECK-NEXT: add.w r1, r0, #12
-; CHECK-NEXT: vmov.16 q0[3], r1
-; CHECK-NEXT: add.w r1, r0, #16
-; CHECK-NEXT: vmov.16 q0[4], r1
-; CHECK-NEXT: add.w r1, r0, #20
-; CHECK-NEXT: vmov.16 q0[5], r1
-; CHECK-NEXT: add.w r1, r0, #24
-; CHECK-NEXT: vmov.16 q0[6], r1
-; CHECK-NEXT: adds r0, #28
-; CHECK-NEXT: vmov.16 q0[7], r0
+; CHECK-NEXT: vidup.u16 q0, r0, #4
; CHECK-NEXT: bx lr
%a1 = add i16 %index, 4
%a2 = add i16 %index, 8
define arm_aapcs_vfpcc <8 x i16> @vidup_v8i16_8(i16 %index) {
; CHECK-LABEL: vidup_v8i16_8:
; CHECK: @ %bb.0:
-; CHECK-NEXT: vmov.16 q0[0], r0
-; CHECK-NEXT: add.w r1, r0, #8
-; CHECK-NEXT: vmov.16 q0[1], r1
-; CHECK-NEXT: add.w r1, r0, #16
-; CHECK-NEXT: vmov.16 q0[2], r1
-; CHECK-NEXT: add.w r1, r0, #24
-; CHECK-NEXT: vmov.16 q0[3], r1
-; CHECK-NEXT: add.w r1, r0, #32
-; CHECK-NEXT: vmov.16 q0[4], r1
-; CHECK-NEXT: add.w r1, r0, #40
-; CHECK-NEXT: vmov.16 q0[5], r1
-; CHECK-NEXT: add.w r1, r0, #48
-; CHECK-NEXT: vmov.16 q0[6], r1
-; CHECK-NEXT: adds r0, #56
-; CHECK-NEXT: vmov.16 q0[7], r0
+; CHECK-NEXT: vidup.u16 q0, r0, #8
; CHECK-NEXT: bx lr
%a1 = add i16 %index, 8
%a2 = add i16 %index, 16
define arm_aapcs_vfpcc <16 x i8> @vidup_v16i8_1(i8 %index) {
; CHECK-LABEL: vidup_v16i8_1:
; CHECK: @ %bb.0:
-; CHECK-NEXT: vmov.8 q0[0], r0
-; CHECK-NEXT: adds r1, r0, #1
-; CHECK-NEXT: vmov.8 q0[1], r1
-; CHECK-NEXT: adds r1, r0, #2
-; CHECK-NEXT: vmov.8 q0[2], r1
-; CHECK-NEXT: adds r1, r0, #3
-; CHECK-NEXT: vmov.8 q0[3], r1
-; CHECK-NEXT: adds r1, r0, #4
-; CHECK-NEXT: vmov.8 q0[4], r1
-; CHECK-NEXT: adds r1, r0, #5
-; CHECK-NEXT: vmov.8 q0[5], r1
-; CHECK-NEXT: adds r1, r0, #6
-; CHECK-NEXT: vmov.8 q0[6], r1
-; CHECK-NEXT: adds r1, r0, #7
-; CHECK-NEXT: vmov.8 q0[7], r1
-; CHECK-NEXT: add.w r1, r0, #8
-; CHECK-NEXT: vmov.8 q0[8], r1
-; CHECK-NEXT: add.w r1, r0, #9
-; CHECK-NEXT: vmov.8 q0[9], r1
-; CHECK-NEXT: add.w r1, r0, #10
-; CHECK-NEXT: vmov.8 q0[10], r1
-; CHECK-NEXT: add.w r1, r0, #11
-; CHECK-NEXT: vmov.8 q0[11], r1
-; CHECK-NEXT: add.w r1, r0, #12
-; CHECK-NEXT: vmov.8 q0[12], r1
-; CHECK-NEXT: add.w r1, r0, #13
-; CHECK-NEXT: vmov.8 q0[13], r1
-; CHECK-NEXT: add.w r1, r0, #14
-; CHECK-NEXT: vmov.8 q0[14], r1
-; CHECK-NEXT: adds r0, #15
-; CHECK-NEXT: vmov.8 q0[15], r0
+; CHECK-NEXT: vidup.u8 q0, r0, #1
; CHECK-NEXT: bx lr
%a1 = add i8 %index, 1
%a2 = add i8 %index, 2
define arm_aapcs_vfpcc <16 x i8> @vidup_v16i8_4(i8 %index) {
; CHECK-LABEL: vidup_v16i8_4:
; CHECK: @ %bb.0:
-; CHECK-NEXT: vmov.8 q0[0], r0
-; CHECK-NEXT: adds r1, r0, #4
-; CHECK-NEXT: vmov.8 q0[1], r1
-; CHECK-NEXT: add.w r1, r0, #8
-; CHECK-NEXT: vmov.8 q0[2], r1
-; CHECK-NEXT: add.w r1, r0, #12
-; CHECK-NEXT: vmov.8 q0[3], r1
-; CHECK-NEXT: add.w r1, r0, #16
-; CHECK-NEXT: vmov.8 q0[4], r1
-; CHECK-NEXT: add.w r1, r0, #20
-; CHECK-NEXT: vmov.8 q0[5], r1
-; CHECK-NEXT: add.w r1, r0, #24
-; CHECK-NEXT: vmov.8 q0[6], r1
-; CHECK-NEXT: add.w r1, r0, #28
-; CHECK-NEXT: vmov.8 q0[7], r1
-; CHECK-NEXT: add.w r1, r0, #32
-; CHECK-NEXT: vmov.8 q0[8], r1
-; CHECK-NEXT: add.w r1, r0, #36
-; CHECK-NEXT: vmov.8 q0[9], r1
-; CHECK-NEXT: add.w r1, r0, #40
-; CHECK-NEXT: vmov.8 q0[10], r1
-; CHECK-NEXT: add.w r1, r0, #44
-; CHECK-NEXT: vmov.8 q0[11], r1
-; CHECK-NEXT: add.w r1, r0, #48
-; CHECK-NEXT: vmov.8 q0[12], r1
-; CHECK-NEXT: add.w r1, r0, #52
-; CHECK-NEXT: vmov.8 q0[13], r1
-; CHECK-NEXT: add.w r1, r0, #56
-; CHECK-NEXT: vmov.8 q0[14], r1
-; CHECK-NEXT: adds r0, #60
-; CHECK-NEXT: vmov.8 q0[15], r0
+; CHECK-NEXT: vidup.u8 q0, r0, #4
; CHECK-NEXT: bx lr
%a1 = add i8 %index, 4
%a2 = add i8 %index, 8