ret <16 x i8> %b
}
+define arm_aapcs_vfpcc <4 x i32> @shl_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) {
+; CHECK-LABEL: shl_v4i32_x:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vshl.u32 q1, q0, q1
+; CHECK-NEXT: vctp.32 r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vmovt q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
+ %a = shl <4 x i32> %x, %y
+ %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x
+ ret <4 x i32> %b
+}
+
+define arm_aapcs_vfpcc <8 x i16> @shl_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) {
+; CHECK-LABEL: shl_v8i16_x:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vshl.u16 q1, q0, q1
+; CHECK-NEXT: vctp.16 r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vmovt q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
+ %a = shl <8 x i16> %x, %y
+ %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x
+ ret <8 x i16> %b
+}
+
+define arm_aapcs_vfpcc <16 x i8> @shl_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) {
+; CHECK-LABEL: shl_v16i8_x:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vshl.u8 q1, q0, q1
+; CHECK-NEXT: vctp.8 r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vmovt q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
+ %a = shl <16 x i8> %x, %y
+ %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x
+ ret <16 x i8> %b
+}
+
+define arm_aapcs_vfpcc <4 x i32> @ashr_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) {
+; CHECK-LABEL: ashr_v4i32_x:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vneg.s32 q1, q1
+; CHECK-NEXT: vctp.32 r0
+; CHECK-NEXT: vshl.s32 q1, q0, q1
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vmovt q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
+ %a = ashr <4 x i32> %x, %y
+ %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x
+ ret <4 x i32> %b
+}
+
+define arm_aapcs_vfpcc <8 x i16> @ashr_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) {
+; CHECK-LABEL: ashr_v8i16_x:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vneg.s16 q1, q1
+; CHECK-NEXT: vctp.16 r0
+; CHECK-NEXT: vshl.s16 q1, q0, q1
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vmovt q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
+ %a = ashr <8 x i16> %x, %y
+ %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x
+ ret <8 x i16> %b
+}
+
+define arm_aapcs_vfpcc <16 x i8> @ashr_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) {
+; CHECK-LABEL: ashr_v16i8_x:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vneg.s8 q1, q1
+; CHECK-NEXT: vctp.8 r0
+; CHECK-NEXT: vshl.s8 q1, q0, q1
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vmovt q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
+ %a = ashr <16 x i8> %x, %y
+ %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x
+ ret <16 x i8> %b
+}
+
+define arm_aapcs_vfpcc <4 x i32> @lshr_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) {
+; CHECK-LABEL: lshr_v4i32_x:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vneg.s32 q1, q1
+; CHECK-NEXT: vctp.32 r0
+; CHECK-NEXT: vshl.u32 q1, q0, q1
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vmovt q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
+ %a = lshr <4 x i32> %x, %y
+ %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x
+ ret <4 x i32> %b
+}
+
+define arm_aapcs_vfpcc <8 x i16> @lshr_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) {
+; CHECK-LABEL: lshr_v8i16_x:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vneg.s16 q1, q1
+; CHECK-NEXT: vctp.16 r0
+; CHECK-NEXT: vshl.u16 q1, q0, q1
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vmovt q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
+ %a = lshr <8 x i16> %x, %y
+ %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x
+ ret <8 x i16> %b
+}
+
+define arm_aapcs_vfpcc <16 x i8> @lshr_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) {
+; CHECK-LABEL: lshr_v16i8_x:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vneg.s8 q1, q1
+; CHECK-NEXT: vctp.8 r0
+; CHECK-NEXT: vshl.u8 q1, q0, q1
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vmovt q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
+ %a = lshr <16 x i8> %x, %y
+ %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x
+ ret <16 x i8> %b
+}
+
define arm_aapcs_vfpcc <4 x i32> @andnot_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) {
; CHECK-LABEL: andnot_v4i32_x:
; CHECK: @ %bb.0: @ %entry
ret <8 x half> %b
}
+define arm_aapcs_vfpcc <4 x float> @fdiv_v4f32_x(<4 x float> %x, <4 x float> %y, i32 %n) {
+; CHECK-LABEL: fdiv_v4f32_x:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vdiv.f32 s7, s3, s7
+; CHECK-NEXT: vctp.32 r0
+; CHECK-NEXT: vdiv.f32 s6, s2, s6
+; CHECK-NEXT: vdiv.f32 s5, s1, s5
+; CHECK-NEXT: vdiv.f32 s4, s0, s4
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vmovt q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
+ %a = fdiv <4 x float> %x, %y
+ %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %x
+ ret <4 x float> %b
+}
+
+define arm_aapcs_vfpcc <8 x half> @fdiv_v8f16_x(<8 x half> %x, <8 x half> %y, i32 %n) {
+; CHECK-LABEL: fdiv_v8f16_x:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmovx.f16 s8, s4
+; CHECK-NEXT: vmovx.f16 s10, s0
+; CHECK-NEXT: vdiv.f16 s8, s10, s8
+; CHECK-NEXT: vdiv.f16 s4, s0, s4
+; CHECK-NEXT: vins.f16 s4, s8
+; CHECK-NEXT: vmovx.f16 s8, s5
+; CHECK-NEXT: vmovx.f16 s10, s1
+; CHECK-NEXT: vdiv.f16 s5, s1, s5
+; CHECK-NEXT: vdiv.f16 s8, s10, s8
+; CHECK-NEXT: vmovx.f16 s10, s2
+; CHECK-NEXT: vins.f16 s5, s8
+; CHECK-NEXT: vmovx.f16 s8, s6
+; CHECK-NEXT: vdiv.f16 s8, s10, s8
+; CHECK-NEXT: vdiv.f16 s6, s2, s6
+; CHECK-NEXT: vins.f16 s6, s8
+; CHECK-NEXT: vmovx.f16 s8, s7
+; CHECK-NEXT: vmovx.f16 s10, s3
+; CHECK-NEXT: vdiv.f16 s7, s3, s7
+; CHECK-NEXT: vdiv.f16 s8, s10, s8
+; CHECK-NEXT: vctp.16 r0
+; CHECK-NEXT: vins.f16 s7, s8
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vmovt q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
+ %a = fdiv <8 x half> %x, %y
+ %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %x
+ ret <8 x half> %b
+}
+
define arm_aapcs_vfpcc <4 x float> @fmai_v4f32_x(<4 x float> %x, <4 x float> %y, <4 x float> %z, i32 %n) {
; CHECK-LABEL: fmai_v4f32_x:
; CHECK: @ %bb.0: @ %entry
ret <16 x i8> %b
}
+define arm_aapcs_vfpcc <4 x i32> @shl_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) {
+; CHECK-LABEL: shl_v4i32_y:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vshl.u32 q0, q0, q1
+; CHECK-NEXT: vctp.32 r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vmovt q1, q0
+; CHECK-NEXT: vmov q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
+ %a = shl <4 x i32> %x, %y
+ %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y
+ ret <4 x i32> %b
+}
+
+define arm_aapcs_vfpcc <8 x i16> @shl_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) {
+; CHECK-LABEL: shl_v8i16_y:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vshl.u16 q0, q0, q1
+; CHECK-NEXT: vctp.16 r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vmovt q1, q0
+; CHECK-NEXT: vmov q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
+ %a = shl <8 x i16> %x, %y
+ %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y
+ ret <8 x i16> %b
+}
+
+define arm_aapcs_vfpcc <16 x i8> @shl_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) {
+; CHECK-LABEL: shl_v16i8_y:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vshl.u8 q0, q0, q1
+; CHECK-NEXT: vctp.8 r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vmovt q1, q0
+; CHECK-NEXT: vmov q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
+ %a = shl <16 x i8> %x, %y
+ %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y
+ ret <16 x i8> %b
+}
+
+define arm_aapcs_vfpcc <4 x i32> @ashr_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) {
+; CHECK-LABEL: ashr_v4i32_y:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vneg.s32 q2, q1
+; CHECK-NEXT: vctp.32 r0
+; CHECK-NEXT: vshl.s32 q0, q0, q2
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vmovt q1, q0
+; CHECK-NEXT: vmov q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
+ %a = ashr <4 x i32> %x, %y
+ %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y
+ ret <4 x i32> %b
+}
+
+define arm_aapcs_vfpcc <8 x i16> @ashr_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) {
+; CHECK-LABEL: ashr_v8i16_y:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vneg.s16 q2, q1
+; CHECK-NEXT: vctp.16 r0
+; CHECK-NEXT: vshl.s16 q0, q0, q2
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vmovt q1, q0
+; CHECK-NEXT: vmov q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
+ %a = ashr <8 x i16> %x, %y
+ %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y
+ ret <8 x i16> %b
+}
+
+define arm_aapcs_vfpcc <16 x i8> @ashr_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) {
+; CHECK-LABEL: ashr_v16i8_y:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vneg.s8 q2, q1
+; CHECK-NEXT: vctp.8 r0
+; CHECK-NEXT: vshl.s8 q0, q0, q2
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vmovt q1, q0
+; CHECK-NEXT: vmov q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
+ %a = ashr <16 x i8> %x, %y
+ %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y
+ ret <16 x i8> %b
+}
+
+define arm_aapcs_vfpcc <4 x i32> @lshr_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) {
+; CHECK-LABEL: lshr_v4i32_y:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vneg.s32 q2, q1
+; CHECK-NEXT: vctp.32 r0
+; CHECK-NEXT: vshl.u32 q0, q0, q2
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vmovt q1, q0
+; CHECK-NEXT: vmov q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
+ %a = lshr <4 x i32> %x, %y
+ %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y
+ ret <4 x i32> %b
+}
+
+define arm_aapcs_vfpcc <8 x i16> @lshr_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) {
+; CHECK-LABEL: lshr_v8i16_y:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vneg.s16 q2, q1
+; CHECK-NEXT: vctp.16 r0
+; CHECK-NEXT: vshl.u16 q0, q0, q2
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vmovt q1, q0
+; CHECK-NEXT: vmov q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
+ %a = lshr <8 x i16> %x, %y
+ %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y
+ ret <8 x i16> %b
+}
+
+define arm_aapcs_vfpcc <16 x i8> @lshr_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) {
+; CHECK-LABEL: lshr_v16i8_y:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vneg.s8 q2, q1
+; CHECK-NEXT: vctp.8 r0
+; CHECK-NEXT: vshl.u8 q0, q0, q2
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vmovt q1, q0
+; CHECK-NEXT: vmov q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
+ %a = lshr <16 x i8> %x, %y
+ %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y
+ ret <16 x i8> %b
+}
+
define arm_aapcs_vfpcc <4 x i32> @andnot_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) {
; CHECK-LABEL: andnot_v4i32_y:
; CHECK: @ %bb.0: @ %entry
ret <8 x half> %b
}
+define arm_aapcs_vfpcc <4 x float> @fdiv_v4f32_y(<4 x float> %x, <4 x float> %y, i32 %n) {
+; CHECK-LABEL: fdiv_v4f32_y:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vdiv.f32 s3, s3, s7
+; CHECK-NEXT: vctp.32 r0
+; CHECK-NEXT: vdiv.f32 s2, s2, s6
+; CHECK-NEXT: vdiv.f32 s1, s1, s5
+; CHECK-NEXT: vdiv.f32 s0, s0, s4
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vmovt q1, q0
+; CHECK-NEXT: vmov q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
+ %a = fdiv <4 x float> %x, %y
+ %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %y
+ ret <4 x float> %b
+}
+
+define arm_aapcs_vfpcc <8 x half> @fdiv_v8f16_y(<8 x half> %x, <8 x half> %y, i32 %n) {
+; CHECK-LABEL: fdiv_v8f16_y:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmovx.f16 s10, s0
+; CHECK-NEXT: vmovx.f16 s8, s4
+; CHECK-NEXT: vdiv.f16 s8, s10, s8
+; CHECK-NEXT: vdiv.f16 s0, s0, s4
+; CHECK-NEXT: vins.f16 s0, s8
+; CHECK-NEXT: vmovx.f16 s10, s1
+; CHECK-NEXT: vmovx.f16 s8, s5
+; CHECK-NEXT: vdiv.f16 s1, s1, s5
+; CHECK-NEXT: vdiv.f16 s8, s10, s8
+; CHECK-NEXT: vmovx.f16 s10, s2
+; CHECK-NEXT: vins.f16 s1, s8
+; CHECK-NEXT: vmovx.f16 s8, s6
+; CHECK-NEXT: vdiv.f16 s8, s10, s8
+; CHECK-NEXT: vdiv.f16 s2, s2, s6
+; CHECK-NEXT: vins.f16 s2, s8
+; CHECK-NEXT: vmovx.f16 s10, s3
+; CHECK-NEXT: vmovx.f16 s8, s7
+; CHECK-NEXT: vdiv.f16 s3, s3, s7
+; CHECK-NEXT: vdiv.f16 s8, s10, s8
+; CHECK-NEXT: vctp.16 r0
+; CHECK-NEXT: vins.f16 s3, s8
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vmovt q1, q0
+; CHECK-NEXT: vmov q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
+ %a = fdiv <8 x half> %x, %y
+ %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %y
+ ret <8 x half> %b
+}
+
+define arm_aapcs_vfpcc <4 x float> @fmai_v4f32_y(<4 x float> %x, <4 x float> %y, <4 x float> %z, i32 %n) {
+; CHECK-LABEL: fmai_v4f32_y:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vfma.f32 q0, q1, q2
+; CHECK-NEXT: vctp.32 r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vmovt q1, q0
+; CHECK-NEXT: vmov q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
+ %a = call <4 x float> @llvm.fma.v4f32(<4 x float> %y, <4 x float> %z, <4 x float> %x)
+ %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %y
+ ret <4 x float> %b
+}
+
+define arm_aapcs_vfpcc <8 x half> @fmai_v8f16_y(<8 x half> %x, <8 x half> %y, <8 x half> %z, i32 %n) {
+; CHECK-LABEL: fmai_v8f16_y:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vfma.f16 q0, q1, q2
+; CHECK-NEXT: vctp.16 r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vmovt q1, q0
+; CHECK-NEXT: vmov q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
+ %a = call <8 x half> @llvm.fma.v8f16(<8 x half> %y, <8 x half> %z, <8 x half> %x)
+ %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %y
+ ret <8 x half> %b
+}
+
+define arm_aapcs_vfpcc <4 x float> @fma_v4f32_y(<4 x float> %x, <4 x float> %y, <4 x float> %z, i32 %n) {
+; CHECK-LABEL: fma_v4f32_y:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vfma.f32 q0, q1, q2
+; CHECK-NEXT: vctp.32 r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vmovt q1, q0
+; CHECK-NEXT: vmov q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
+ %m = fmul fast <4 x float> %y, %z
+ %a = fadd fast <4 x float> %m, %x
+ %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %y
+ ret <4 x float> %b
+}
+
+define arm_aapcs_vfpcc <8 x half> @fma_v8f16_y(<8 x half> %x, <8 x half> %y, <8 x half> %z, i32 %n) {
+; CHECK-LABEL: fma_v8f16_y:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vfma.f16 q0, q1, q2
+; CHECK-NEXT: vctp.16 r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vmovt q1, q0
+; CHECK-NEXT: vmov q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
+ %m = fmul fast <8 x half> %y, %z
+ %a = fadd fast <8 x half> %m, %x
+ %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %y
+ ret <8 x half> %b
+}
+
define arm_aapcs_vfpcc <4 x i32> @icmp_slt_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) {
; CHECK-LABEL: icmp_slt_v4i32_y:
; CHECK: @ %bb.0: @ %entry
declare <16 x i8> @llvm.usub.sat.v16i8(<16 x i8>, <16 x i8>)
declare <8 x i16> @llvm.usub.sat.v8i16(<8 x i16>, <8 x i16>)
declare <4 x i32> @llvm.usub.sat.v4i32(<4 x i32>, <4 x i32>)
+
declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>)
declare <8 x half> @llvm.fma.v8f16(<8 x half>, <8 x half>, <8 x half>)
ret <16 x i8> %b
}
+define arm_aapcs_vfpcc <4 x i32> @shl_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) {
+; CHECK-LABEL: shl_v4i32_x:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov.i32 q2, #0x0
+; CHECK-NEXT: vctp.32 r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vmovt q2, q1
+; CHECK-NEXT: vshl.u32 q0, q0, q2
+; CHECK-NEXT: bx lr
+entry:
+ %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
+ %a = select <4 x i1> %c, <4 x i32> %y, <4 x i32> zeroinitializer
+ %b = shl <4 x i32> %x, %a
+ ret <4 x i32> %b
+}
+
+define arm_aapcs_vfpcc <8 x i16> @shl_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) {
+; CHECK-LABEL: shl_v8i16_x:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov.i32 q2, #0x0
+; CHECK-NEXT: vctp.16 r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vmovt q2, q1
+; CHECK-NEXT: vshl.u16 q0, q0, q2
+; CHECK-NEXT: bx lr
+entry:
+ %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
+ %a = select <8 x i1> %c, <8 x i16> %y, <8 x i16> zeroinitializer
+ %b = shl <8 x i16> %x, %a
+ ret <8 x i16> %b
+}
+
+define arm_aapcs_vfpcc <16 x i8> @shl_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) {
+; CHECK-LABEL: shl_v16i8_x:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov.i32 q2, #0x0
+; CHECK-NEXT: vctp.8 r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vmovt q2, q1
+; CHECK-NEXT: vshl.u8 q0, q0, q2
+; CHECK-NEXT: bx lr
+entry:
+ %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
+ %a = select <16 x i1> %c, <16 x i8> %y, <16 x i8> zeroinitializer
+ %b = shl <16 x i8> %x, %a
+ ret <16 x i8> %b
+}
+
+define arm_aapcs_vfpcc <4 x i32> @ashr_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) {
+; CHECK-LABEL: ashr_v4i32_x:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov.i32 q2, #0x0
+; CHECK-NEXT: vctp.32 r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vsubt.i32 q2, q2, q1
+; CHECK-NEXT: vshl.s32 q0, q0, q2
+; CHECK-NEXT: bx lr
+entry:
+ %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
+ %a = select <4 x i1> %c, <4 x i32> %y, <4 x i32> zeroinitializer
+ %b = ashr <4 x i32> %x, %a
+ ret <4 x i32> %b
+}
+
+define arm_aapcs_vfpcc <8 x i16> @ashr_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) {
+; CHECK-LABEL: ashr_v8i16_x:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov.i32 q2, #0x0
+; CHECK-NEXT: vctp.16 r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vsubt.i16 q2, q2, q1
+; CHECK-NEXT: vshl.s16 q0, q0, q2
+; CHECK-NEXT: bx lr
+entry:
+ %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
+ %a = select <8 x i1> %c, <8 x i16> %y, <8 x i16> zeroinitializer
+ %b = ashr <8 x i16> %x, %a
+ ret <8 x i16> %b
+}
+
+define arm_aapcs_vfpcc <16 x i8> @ashr_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) {
+; CHECK-LABEL: ashr_v16i8_x:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov.i32 q2, #0x0
+; CHECK-NEXT: vctp.8 r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vsubt.i8 q2, q2, q1
+; CHECK-NEXT: vshl.s8 q0, q0, q2
+; CHECK-NEXT: bx lr
+entry:
+ %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
+ %a = select <16 x i1> %c, <16 x i8> %y, <16 x i8> zeroinitializer
+ %b = ashr <16 x i8> %x, %a
+ ret <16 x i8> %b
+}
+
+define arm_aapcs_vfpcc <4 x i32> @lshr_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) {
+; CHECK-LABEL: lshr_v4i32_x:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov.i32 q2, #0x0
+; CHECK-NEXT: vctp.32 r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vsubt.i32 q2, q2, q1
+; CHECK-NEXT: vshl.u32 q0, q0, q2
+; CHECK-NEXT: bx lr
+entry:
+ %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
+ %a = select <4 x i1> %c, <4 x i32> %y, <4 x i32> zeroinitializer
+ %b = lshr <4 x i32> %x, %a
+ ret <4 x i32> %b
+}
+
+define arm_aapcs_vfpcc <8 x i16> @lshr_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) {
+; CHECK-LABEL: lshr_v8i16_x:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov.i32 q2, #0x0
+; CHECK-NEXT: vctp.16 r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vsubt.i16 q2, q2, q1
+; CHECK-NEXT: vshl.u16 q0, q0, q2
+; CHECK-NEXT: bx lr
+entry:
+ %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
+ %a = select <8 x i1> %c, <8 x i16> %y, <8 x i16> zeroinitializer
+ %b = lshr <8 x i16> %x, %a
+ ret <8 x i16> %b
+}
+
+define arm_aapcs_vfpcc <16 x i8> @lshr_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) {
+; CHECK-LABEL: lshr_v16i8_x:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov.i32 q2, #0x0
+; CHECK-NEXT: vctp.8 r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vsubt.i8 q2, q2, q1
+; CHECK-NEXT: vshl.u8 q0, q0, q2
+; CHECK-NEXT: bx lr
+entry:
+ %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
+ %a = select <16 x i1> %c, <16 x i8> %y, <16 x i8> zeroinitializer
+ %b = lshr <16 x i8> %x, %a
+ ret <16 x i8> %b
+}
+
define arm_aapcs_vfpcc <4 x i32> @andnot_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) {
; CHECK-LABEL: andnot_v4i32_x:
; CHECK: @ %bb.0: @ %entry
ret <8 x half> %b
}
+define arm_aapcs_vfpcc <4 x float> @fdiv_v4f32_x(<4 x float> %x, <4 x float> %y, i32 %n) {
+; CHECK-LABEL: fdiv_v4f32_x:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov.f32 q2, #1.000000e+00
+; CHECK-NEXT: vctp.32 r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vmovt q2, q1
+; CHECK-NEXT: vdiv.f32 s3, s3, s11
+; CHECK-NEXT: vdiv.f32 s2, s2, s10
+; CHECK-NEXT: vdiv.f32 s1, s1, s9
+; CHECK-NEXT: vdiv.f32 s0, s0, s8
+; CHECK-NEXT: bx lr
+entry:
+ %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
+ %a = select <4 x i1> %c, <4 x float> %y, <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
+ %b = fdiv <4 x float> %x, %a
+ ret <4 x float> %b
+}
+
+define arm_aapcs_vfpcc <8 x half> @fdiv_v8f16_x(<8 x half> %x, <8 x half> %y, i32 %n) {
+; CHECK-LABEL: fdiv_v8f16_x:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov.i16 q2, #0x3c00
+; CHECK-NEXT: vctp.16 r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vmovt q2, q1
+; CHECK-NEXT: vmovx.f16 s6, s0
+; CHECK-NEXT: vmovx.f16 s4, s8
+; CHECK-NEXT: vdiv.f16 s0, s0, s8
+; CHECK-NEXT: vdiv.f16 s4, s6, s4
+; CHECK-NEXT: vmovx.f16 s6, s1
+; CHECK-NEXT: vins.f16 s0, s4
+; CHECK-NEXT: vmovx.f16 s4, s9
+; CHECK-NEXT: vdiv.f16 s4, s6, s4
+; CHECK-NEXT: vdiv.f16 s1, s1, s9
+; CHECK-NEXT: vins.f16 s1, s4
+; CHECK-NEXT: vmovx.f16 s6, s2
+; CHECK-NEXT: vmovx.f16 s4, s10
+; CHECK-NEXT: vdiv.f16 s2, s2, s10
+; CHECK-NEXT: vdiv.f16 s4, s6, s4
+; CHECK-NEXT: vmovx.f16 s6, s3
+; CHECK-NEXT: vins.f16 s2, s4
+; CHECK-NEXT: vmovx.f16 s4, s11
+; CHECK-NEXT: vdiv.f16 s4, s6, s4
+; CHECK-NEXT: vdiv.f16 s3, s3, s11
+; CHECK-NEXT: vins.f16 s3, s4
+; CHECK-NEXT: bx lr
+entry:
+ %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
+ %a = select <8 x i1> %c, <8 x half> %y, <8 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>
+ %b = fdiv <8 x half> %x, %a
+ ret <8 x half> %b
+}
+
define arm_aapcs_vfpcc <4 x float> @fmai_v4f32_x(<4 x float> %x, <4 x float> %y, <4 x float> %z, i32 %n) {
; CHECK-LABEL: fmai_v4f32_x:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: bx lr
entry:
%c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
- %a1 = icmp slt <4 x i32> %x, %y
- %a = select <4 x i1> %a1, <4 x i32> %x, <4 x i32> %y
+ %a = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %x, <4 x i32> %y)
%b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x
ret <4 x i32> %b
}
; CHECK-NEXT: bx lr
entry:
%c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
- %a1 = icmp slt <8 x i16> %x, %y
- %a = select <8 x i1> %a1, <8 x i16> %x, <8 x i16> %y
+ %a = call <8 x i16> @llvm.smin.v8i16(<8 x i16> %x, <8 x i16> %y)
%b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x
ret <8 x i16> %b
}
; CHECK-NEXT: bx lr
entry:
%c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
- %a1 = icmp slt <16 x i8> %x, %y
- %a = select <16 x i1> %a1, <16 x i8> %x, <16 x i8> %y
+ %a = call <16 x i8> @llvm.smin.v16i8(<16 x i8> %x, <16 x i8> %y)
%b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x
ret <16 x i8> %b
}
; CHECK-NEXT: bx lr
entry:
%c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
- %a1 = icmp sgt <4 x i32> %x, %y
- %a = select <4 x i1> %a1, <4 x i32> %x, <4 x i32> %y
+ %a = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %x, <4 x i32> %y)
%b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x
ret <4 x i32> %b
}
; CHECK-NEXT: bx lr
entry:
%c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
- %a1 = icmp sgt <8 x i16> %x, %y
- %a = select <8 x i1> %a1, <8 x i16> %x, <8 x i16> %y
+ %a = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %x, <8 x i16> %y)
%b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x
ret <8 x i16> %b
}
; CHECK-NEXT: bx lr
entry:
%c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
- %a1 = icmp sgt <16 x i8> %x, %y
- %a = select <16 x i1> %a1, <16 x i8> %x, <16 x i8> %y
+ %a = call <16 x i8> @llvm.smax.v16i8(<16 x i8> %x, <16 x i8> %y)
%b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x
ret <16 x i8> %b
}
; CHECK-NEXT: bx lr
entry:
%c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
- %a1 = icmp ult <4 x i32> %x, %y
- %a = select <4 x i1> %a1, <4 x i32> %x, <4 x i32> %y
+ %a = call <4 x i32> @llvm.umin.v4i32(<4 x i32> %x, <4 x i32> %y)
%b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x
ret <4 x i32> %b
}
; CHECK-NEXT: bx lr
entry:
%c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
- %a1 = icmp ult <8 x i16> %x, %y
- %a = select <8 x i1> %a1, <8 x i16> %x, <8 x i16> %y
+ %a = call <8 x i16> @llvm.umin.v8i16(<8 x i16> %x, <8 x i16> %y)
%b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x
ret <8 x i16> %b
}
; CHECK-NEXT: bx lr
entry:
%c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
- %a1 = icmp ult <16 x i8> %x, %y
- %a = select <16 x i1> %a1, <16 x i8> %x, <16 x i8> %y
+ %a = call <16 x i8> @llvm.umin.v16i8(<16 x i8> %x, <16 x i8> %y)
%b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x
ret <16 x i8> %b
}
; CHECK-NEXT: bx lr
entry:
%c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
- %a1 = icmp ugt <4 x i32> %x, %y
- %a = select <4 x i1> %a1, <4 x i32> %x, <4 x i32> %y
+ %a = call <4 x i32> @llvm.umax.v4i32(<4 x i32> %x, <4 x i32> %y)
%b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x
ret <4 x i32> %b
}
; CHECK-NEXT: bx lr
entry:
%c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
- %a1 = icmp ugt <8 x i16> %x, %y
- %a = select <8 x i1> %a1, <8 x i16> %x, <8 x i16> %y
+ %a = call <8 x i16> @llvm.umax.v8i16(<8 x i16> %x, <8 x i16> %y)
%b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x
ret <8 x i16> %b
}
; CHECK-NEXT: bx lr
entry:
%c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
- %a1 = icmp ugt <16 x i8> %x, %y
- %a = select <16 x i1> %a1, <16 x i8> %x, <16 x i8> %y
+ %a = call <16 x i8> @llvm.umax.v16i8(<16 x i8> %x, <16 x i8> %y)
%b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x
ret <16 x i8> %b
}
; CHECK-NEXT: bx lr
entry:
%c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
- %i = insertelement <4 x i32> undef, i32 %y, i32 0
+ %i = insertelement <4 x i32> undef, i32 %y, i64 0
%ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
%a = select <4 x i1> %c, <4 x i32> %ys, <4 x i32> zeroinitializer
%b = add <4 x i32> %a, %x
; CHECK-NEXT: bx lr
entry:
%c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
- %i = insertelement <8 x i16> undef, i16 %y, i32 0
+ %i = insertelement <8 x i16> undef, i16 %y, i64 0
%ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
%a = select <8 x i1> %c, <8 x i16> %ys, <8 x i16> zeroinitializer
%b = add <8 x i16> %a, %x
; CHECK-NEXT: bx lr
entry:
%c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
- %i = insertelement <16 x i8> undef, i8 %y, i32 0
+ %i = insertelement <16 x i8> undef, i8 %y, i64 0
%ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer
%a = select <16 x i1> %c, <16 x i8> %ys, <16 x i8> zeroinitializer
%b = add <16 x i8> %a, %x
; CHECK-NEXT: bx lr
entry:
%c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
- %i = insertelement <4 x i32> undef, i32 %y, i32 0
+ %i = insertelement <4 x i32> undef, i32 %y, i64 0
%ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
%a = select <4 x i1> %c, <4 x i32> %ys, <4 x i32> zeroinitializer
%b = sub <4 x i32> %x, %a
; CHECK-NEXT: bx lr
entry:
%c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
- %i = insertelement <8 x i16> undef, i16 %y, i32 0
+ %i = insertelement <8 x i16> undef, i16 %y, i64 0
%ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
%a = select <8 x i1> %c, <8 x i16> %ys, <8 x i16> zeroinitializer
%b = sub <8 x i16> %x, %a
; CHECK-NEXT: bx lr
entry:
%c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
- %i = insertelement <16 x i8> undef, i8 %y, i32 0
+ %i = insertelement <16 x i8> undef, i8 %y, i64 0
%ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer
%a = select <16 x i1> %c, <16 x i8> %ys, <16 x i8> zeroinitializer
%b = sub <16 x i8> %x, %a
; CHECK-NEXT: bx lr
entry:
%c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
- %i = insertelement <4 x i32> undef, i32 %y, i32 0
+ %i = insertelement <4 x i32> undef, i32 %y, i64 0
%ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
%a = select <4 x i1> %c, <4 x i32> %ys, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
%b = mul <4 x i32> %a, %x
; CHECK-NEXT: bx lr
entry:
%c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
- %i = insertelement <8 x i16> undef, i16 %y, i32 0
+ %i = insertelement <8 x i16> undef, i16 %y, i64 0
%ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
%a = select <8 x i1> %c, <8 x i16> %ys, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%b = mul <8 x i16> %a, %x
; CHECK-NEXT: bx lr
entry:
%c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
- %i = insertelement <16 x i8> undef, i8 %y, i32 0
+ %i = insertelement <16 x i8> undef, i8 %y, i64 0
%ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer
%a = select <16 x i1> %c, <16 x i8> %ys, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
%b = mul <16 x i8> %a, %x
; CHECK-NEXT: bx lr
entry:
%c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
- %i = insertelement <4 x float> undef, float %y, i32 0
+ %i = insertelement <4 x float> undef, float %y, i64 0
%ys = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer
%a = select <4 x i1> %c, <4 x float> %ys, <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>
%b = fadd <4 x float> %a, %x
; CHECK-NEXT: bx lr
entry:
%c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
- %i = insertelement <8 x half> undef, half %y, i32 0
+ %i = insertelement <8 x half> undef, half %y, i64 0
%ys = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
%a = select <8 x i1> %c, <8 x half> %ys, <8 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>
%b = fadd <8 x half> %a, %x
; CHECK-NEXT: bx lr
entry:
%c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
- %i = insertelement <4 x float> undef, float %y, i32 0
+ %i = insertelement <4 x float> undef, float %y, i64 0
%ys = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer
%a = select <4 x i1> %c, <4 x float> %ys, <4 x float> zeroinitializer
%b = fsub <4 x float> %x, %a
; CHECK-NEXT: bx lr
entry:
%c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
- %i = insertelement <8 x half> undef, half %y, i32 0
+ %i = insertelement <8 x half> undef, half %y, i64 0
%ys = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
%a = select <8 x i1> %c, <8 x half> %ys, <8 x half> zeroinitializer
%b = fsub <8 x half> %x, %a
; CHECK-NEXT: bx lr
entry:
%c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
- %i = insertelement <4 x float> undef, float %y, i32 0
+ %i = insertelement <4 x float> undef, float %y, i64 0
%ys = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer
%a = select <4 x i1> %c, <4 x float> %ys, <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
%b = fmul <4 x float> %a, %x
; CHECK-NEXT: bx lr
entry:
%c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
- %i = insertelement <8 x half> undef, half %y, i32 0
+ %i = insertelement <8 x half> undef, half %y, i64 0
%ys = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
%a = select <8 x i1> %c, <8 x half> %ys, <8 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>
%b = fmul <8 x half> %a, %x
; CHECK-NEXT: bx lr
entry:
%c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
- %i = insertelement <4 x i32> undef, i32 %y, i32 0
+ %i = insertelement <4 x i32> undef, i32 %y, i64 0
%ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
%a = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %x, <4 x i32> %ys)
%b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x
; CHECK-NEXT: bx lr
entry:
%c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
- %i = insertelement <8 x i16> undef, i16 %y, i32 0
+ %i = insertelement <8 x i16> undef, i16 %y, i64 0
%ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
%a = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %x, <8 x i16> %ys)
%b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x
; CHECK-NEXT: bx lr
entry:
%c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
- %i = insertelement <16 x i8> undef, i8 %y, i32 0
+ %i = insertelement <16 x i8> undef, i8 %y, i64 0
%ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer
%a = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %x, <16 x i8> %ys)
%b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x
; CHECK-NEXT: bx lr
entry:
%c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
- %i = insertelement <4 x i32> undef, i32 %y, i32 0
+ %i = insertelement <4 x i32> undef, i32 %y, i64 0
%ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
%a = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> %x, <4 x i32> %ys)
%b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x
; CHECK-NEXT: bx lr
entry:
%c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
- %i = insertelement <8 x i16> undef, i16 %y, i32 0
+ %i = insertelement <8 x i16> undef, i16 %y, i64 0
%ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
%a = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %x, <8 x i16> %ys)
%b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x
; CHECK-NEXT: bx lr
entry:
%c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
- %i = insertelement <16 x i8> undef, i8 %y, i32 0
+ %i = insertelement <16 x i8> undef, i8 %y, i64 0
%ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer
%a = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %x, <16 x i8> %ys)
%b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x
; CHECK-NEXT: bx lr
entry:
%c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
- %i = insertelement <4 x i32> undef, i32 %y, i32 0
+ %i = insertelement <4 x i32> undef, i32 %y, i64 0
%ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
%a = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %x, <4 x i32> %ys)
%b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x
; CHECK-NEXT: bx lr
entry:
%c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
- %i = insertelement <8 x i16> undef, i16 %y, i32 0
+ %i = insertelement <8 x i16> undef, i16 %y, i64 0
%ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
%a = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %x, <8 x i16> %ys)
%b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x
; CHECK-NEXT: bx lr
entry:
%c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
- %i = insertelement <16 x i8> undef, i8 %y, i32 0
+ %i = insertelement <16 x i8> undef, i8 %y, i64 0
%ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer
%a = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %x, <16 x i8> %ys)
%b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x
; CHECK-NEXT: bx lr
entry:
%c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
- %i = insertelement <4 x i32> undef, i32 %y, i32 0
+ %i = insertelement <4 x i32> undef, i32 %y, i64 0
%ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
%a = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> %x, <4 x i32> %ys)
%b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x
; CHECK-NEXT: bx lr
entry:
%c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
- %i = insertelement <8 x i16> undef, i16 %y, i32 0
+ %i = insertelement <8 x i16> undef, i16 %y, i64 0
%ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
%a = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %x, <8 x i16> %ys)
%b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x
; CHECK-NEXT: bx lr
entry:
%c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
- %i = insertelement <16 x i8> undef, i8 %y, i32 0
+ %i = insertelement <16 x i8> undef, i8 %y, i64 0
%ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer
%a = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %x, <16 x i8> %ys)
%b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x
ret <16 x i8> %b
}
+define arm_aapcs_vfpcc <4 x i32> @shl_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) {
+; CHECK-LABEL: shl_v4i32_y:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vshl.u32 q0, q0, q1
+; CHECK-NEXT: vctp.32 r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vmovt q1, q0
+; CHECK-NEXT: vmov q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
+ %a = shl <4 x i32> %x, %y
+ %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y
+ ret <4 x i32> %b
+}
+
+define arm_aapcs_vfpcc <8 x i16> @shl_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) {
+; CHECK-LABEL: shl_v8i16_y:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vshl.u16 q0, q0, q1
+; CHECK-NEXT: vctp.16 r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vmovt q1, q0
+; CHECK-NEXT: vmov q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
+ %a = shl <8 x i16> %x, %y
+ %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y
+ ret <8 x i16> %b
+}
+
+define arm_aapcs_vfpcc <16 x i8> @shl_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) {
+; CHECK-LABEL: shl_v16i8_y:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vshl.u8 q0, q0, q1
+; CHECK-NEXT: vctp.8 r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vmovt q1, q0
+; CHECK-NEXT: vmov q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
+ %a = shl <16 x i8> %x, %y
+ %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y
+ ret <16 x i8> %b
+}
+
+define arm_aapcs_vfpcc <4 x i32> @ashr_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) {
+; CHECK-LABEL: ashr_v4i32_y:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vneg.s32 q2, q1
+; CHECK-NEXT: vctp.32 r0
+; CHECK-NEXT: vshl.s32 q0, q0, q2
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vmovt q1, q0
+; CHECK-NEXT: vmov q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
+ %a = ashr <4 x i32> %x, %y
+ %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y
+ ret <4 x i32> %b
+}
+
+define arm_aapcs_vfpcc <8 x i16> @ashr_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) {
+; CHECK-LABEL: ashr_v8i16_y:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vneg.s16 q2, q1
+; CHECK-NEXT: vctp.16 r0
+; CHECK-NEXT: vshl.s16 q0, q0, q2
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vmovt q1, q0
+; CHECK-NEXT: vmov q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
+ %a = ashr <8 x i16> %x, %y
+ %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y
+ ret <8 x i16> %b
+}
+
+define arm_aapcs_vfpcc <16 x i8> @ashr_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) {
+; CHECK-LABEL: ashr_v16i8_y:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vneg.s8 q2, q1
+; CHECK-NEXT: vctp.8 r0
+; CHECK-NEXT: vshl.s8 q0, q0, q2
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vmovt q1, q0
+; CHECK-NEXT: vmov q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
+ %a = ashr <16 x i8> %x, %y
+ %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y
+ ret <16 x i8> %b
+}
+
+define arm_aapcs_vfpcc <4 x i32> @lshr_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) {
+; CHECK-LABEL: lshr_v4i32_y:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vneg.s32 q2, q1
+; CHECK-NEXT: vctp.32 r0
+; CHECK-NEXT: vshl.u32 q0, q0, q2
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vmovt q1, q0
+; CHECK-NEXT: vmov q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
+ %a = lshr <4 x i32> %x, %y
+ %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y
+ ret <4 x i32> %b
+}
+
+define arm_aapcs_vfpcc <8 x i16> @lshr_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) {
+; CHECK-LABEL: lshr_v8i16_y:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vneg.s16 q2, q1
+; CHECK-NEXT: vctp.16 r0
+; CHECK-NEXT: vshl.u16 q0, q0, q2
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vmovt q1, q0
+; CHECK-NEXT: vmov q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
+ %a = lshr <8 x i16> %x, %y
+ %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y
+ ret <8 x i16> %b
+}
+
+define arm_aapcs_vfpcc <16 x i8> @lshr_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) {
+; CHECK-LABEL: lshr_v16i8_y:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vneg.s8 q2, q1
+; CHECK-NEXT: vctp.8 r0
+; CHECK-NEXT: vshl.u8 q0, q0, q2
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vmovt q1, q0
+; CHECK-NEXT: vmov q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
+ %a = lshr <16 x i8> %x, %y
+ %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y
+ ret <16 x i8> %b
+}
+
define arm_aapcs_vfpcc <4 x i32> @andnot_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) {
; CHECK-LABEL: andnot_v4i32_y:
; CHECK: @ %bb.0: @ %entry
ret <8 x half> %b
}
+define arm_aapcs_vfpcc <4 x float> @fdiv_v4f32_y(<4 x float> %x, <4 x float> %y, i32 %n) {
+; CHECK-LABEL: fdiv_v4f32_y:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vdiv.f32 s3, s3, s7
+; CHECK-NEXT: vctp.32 r0
+; CHECK-NEXT: vdiv.f32 s2, s2, s6
+; CHECK-NEXT: vdiv.f32 s1, s1, s5
+; CHECK-NEXT: vdiv.f32 s0, s0, s4
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vmovt q1, q0
+; CHECK-NEXT: vmov q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
+ %a = fdiv <4 x float> %x, %y
+ %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %y
+ ret <4 x float> %b
+}
+
+define arm_aapcs_vfpcc <8 x half> @fdiv_v8f16_y(<8 x half> %x, <8 x half> %y, i32 %n) {
+; CHECK-LABEL: fdiv_v8f16_y:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmovx.f16 s10, s0
+; CHECK-NEXT: vmovx.f16 s8, s4
+; CHECK-NEXT: vdiv.f16 s8, s10, s8
+; CHECK-NEXT: vdiv.f16 s0, s0, s4
+; CHECK-NEXT: vins.f16 s0, s8
+; CHECK-NEXT: vmovx.f16 s10, s1
+; CHECK-NEXT: vmovx.f16 s8, s5
+; CHECK-NEXT: vdiv.f16 s1, s1, s5
+; CHECK-NEXT: vdiv.f16 s8, s10, s8
+; CHECK-NEXT: vmovx.f16 s10, s2
+; CHECK-NEXT: vins.f16 s1, s8
+; CHECK-NEXT: vmovx.f16 s8, s6
+; CHECK-NEXT: vdiv.f16 s8, s10, s8
+; CHECK-NEXT: vdiv.f16 s2, s2, s6
+; CHECK-NEXT: vins.f16 s2, s8
+; CHECK-NEXT: vmovx.f16 s10, s3
+; CHECK-NEXT: vmovx.f16 s8, s7
+; CHECK-NEXT: vdiv.f16 s3, s3, s7
+; CHECK-NEXT: vdiv.f16 s8, s10, s8
+; CHECK-NEXT: vctp.16 r0
+; CHECK-NEXT: vins.f16 s3, s8
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vmovt q1, q0
+; CHECK-NEXT: vmov q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
+ %a = fdiv <8 x half> %x, %y
+ %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %y
+ ret <8 x half> %b
+}
+
+define arm_aapcs_vfpcc <4 x float> @fmai_v4f32_y(<4 x float> %x, <4 x float> %y, <4 x float> %z, i32 %n) {
+; CHECK-LABEL: fmai_v4f32_y:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vfma.f32 q0, q1, q2
+; CHECK-NEXT: vctp.32 r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vmovt q1, q0
+; CHECK-NEXT: vmov q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
+ %a = call <4 x float> @llvm.fma.v4f32(<4 x float> %y, <4 x float> %z, <4 x float> %x)
+ %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %y
+ ret <4 x float> %b
+}
+
+define arm_aapcs_vfpcc <8 x half> @fmai_v8f16_y(<8 x half> %x, <8 x half> %y, <8 x half> %z, i32 %n) {
+; CHECK-LABEL: fmai_v8f16_y:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vfma.f16 q0, q1, q2
+; CHECK-NEXT: vctp.16 r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vmovt q1, q0
+; CHECK-NEXT: vmov q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
+ %a = call <8 x half> @llvm.fma.v8f16(<8 x half> %y, <8 x half> %z, <8 x half> %x)
+ %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %y
+ ret <8 x half> %b
+}
+
+define arm_aapcs_vfpcc <4 x float> @fma_v4f32_y(<4 x float> %x, <4 x float> %y, <4 x float> %z, i32 %n) {
+; CHECK-LABEL: fma_v4f32_y:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vfma.f32 q0, q1, q2
+; CHECK-NEXT: vctp.32 r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vmovt q1, q0
+; CHECK-NEXT: vmov q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
+ %m = fmul fast <4 x float> %y, %z
+ %a = fadd fast <4 x float> %m, %x
+ %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %y
+ ret <4 x float> %b
+}
+
+define arm_aapcs_vfpcc <8 x half> @fma_v8f16_y(<8 x half> %x, <8 x half> %y, <8 x half> %z, i32 %n) {
+; CHECK-LABEL: fma_v8f16_y:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vfma.f16 q0, q1, q2
+; CHECK-NEXT: vctp.16 r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vmovt q1, q0
+; CHECK-NEXT: vmov q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
+ %m = fmul fast <8 x half> %y, %z
+ %a = fadd fast <8 x half> %m, %x
+ %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %y
+ ret <8 x half> %b
+}
+
define arm_aapcs_vfpcc <4 x i32> @icmp_slt_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) {
; CHECK-LABEL: icmp_slt_v4i32_y:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vctp.32 r0
-; CHECK-NEXT: vpstt
-; CHECK-NEXT: vcmpt.s32 gt, q1, q0
-; CHECK-NEXT: vmovt q1, q0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vmint.s32 q1, q0, q1
; CHECK-NEXT: vmov q0, q1
; CHECK-NEXT: bx lr
entry:
%c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
- %a1 = icmp slt <4 x i32> %x, %y
- %0 = and <4 x i1> %c, %a1
- %b = select <4 x i1> %0, <4 x i32> %x, <4 x i32> %y
+ %a = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %x, <4 x i32> %y)
+ %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y
ret <4 x i32> %b
}
; CHECK-LABEL: icmp_slt_v8i16_y:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vctp.16 r0
-; CHECK-NEXT: vpstt
-; CHECK-NEXT: vcmpt.s16 gt, q1, q0
-; CHECK-NEXT: vmovt q1, q0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vmint.s16 q1, q0, q1
; CHECK-NEXT: vmov q0, q1
; CHECK-NEXT: bx lr
entry:
%c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
- %a1 = icmp slt <8 x i16> %x, %y
- %0 = and <8 x i1> %c, %a1
- %b = select <8 x i1> %0, <8 x i16> %x, <8 x i16> %y
+ %a = call <8 x i16> @llvm.smin.v8i16(<8 x i16> %x, <8 x i16> %y)
+ %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y
ret <8 x i16> %b
}
; CHECK-LABEL: icmp_slt_v16i8_y:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vctp.8 r0
-; CHECK-NEXT: vpstt
-; CHECK-NEXT: vcmpt.s8 gt, q1, q0
-; CHECK-NEXT: vmovt q1, q0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vmint.s8 q1, q0, q1
; CHECK-NEXT: vmov q0, q1
; CHECK-NEXT: bx lr
entry:
%c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
- %a1 = icmp slt <16 x i8> %x, %y
- %0 = and <16 x i1> %c, %a1
- %b = select <16 x i1> %0, <16 x i8> %x, <16 x i8> %y
+ %a = call <16 x i8> @llvm.smin.v16i8(<16 x i8> %x, <16 x i8> %y)
+ %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y
ret <16 x i8> %b
}
; CHECK-LABEL: icmp_sgt_v4i32_y:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vctp.32 r0
-; CHECK-NEXT: vpstt
-; CHECK-NEXT: vcmpt.s32 gt, q0, q1
-; CHECK-NEXT: vmovt q1, q0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vmaxt.s32 q1, q0, q1
; CHECK-NEXT: vmov q0, q1
; CHECK-NEXT: bx lr
entry:
%c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
- %a1 = icmp sgt <4 x i32> %x, %y
- %0 = and <4 x i1> %c, %a1
- %b = select <4 x i1> %0, <4 x i32> %x, <4 x i32> %y
+ %a = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %x, <4 x i32> %y)
+ %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y
ret <4 x i32> %b
}
; CHECK-LABEL: icmp_sgt_v8i16_y:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vctp.16 r0
-; CHECK-NEXT: vpstt
-; CHECK-NEXT: vcmpt.s16 gt, q0, q1
-; CHECK-NEXT: vmovt q1, q0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vmaxt.s16 q1, q0, q1
; CHECK-NEXT: vmov q0, q1
; CHECK-NEXT: bx lr
entry:
%c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
- %a1 = icmp sgt <8 x i16> %x, %y
- %0 = and <8 x i1> %c, %a1
- %b = select <8 x i1> %0, <8 x i16> %x, <8 x i16> %y
+ %a = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %x, <8 x i16> %y)
+ %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y
ret <8 x i16> %b
}
; CHECK-LABEL: icmp_sgt_v16i8_y:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vctp.8 r0
-; CHECK-NEXT: vpstt
-; CHECK-NEXT: vcmpt.s8 gt, q0, q1
-; CHECK-NEXT: vmovt q1, q0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vmaxt.s8 q1, q0, q1
; CHECK-NEXT: vmov q0, q1
; CHECK-NEXT: bx lr
entry:
%c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
- %a1 = icmp sgt <16 x i8> %x, %y
- %0 = and <16 x i1> %c, %a1
- %b = select <16 x i1> %0, <16 x i8> %x, <16 x i8> %y
+ %a = call <16 x i8> @llvm.smax.v16i8(<16 x i8> %x, <16 x i8> %y)
+ %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y
ret <16 x i8> %b
}
; CHECK-LABEL: icmp_ult_v4i32_y:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vctp.32 r0
-; CHECK-NEXT: vpstt
-; CHECK-NEXT: vcmpt.u32 hi, q1, q0
-; CHECK-NEXT: vmovt q1, q0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vmint.u32 q1, q0, q1
; CHECK-NEXT: vmov q0, q1
; CHECK-NEXT: bx lr
entry:
%c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
- %a1 = icmp ult <4 x i32> %x, %y
- %0 = and <4 x i1> %c, %a1
- %b = select <4 x i1> %0, <4 x i32> %x, <4 x i32> %y
+ %a = call <4 x i32> @llvm.umin.v4i32(<4 x i32> %x, <4 x i32> %y)
+ %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y
ret <4 x i32> %b
}
; CHECK-LABEL: icmp_ult_v8i16_y:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vctp.16 r0
-; CHECK-NEXT: vpstt
-; CHECK-NEXT: vcmpt.u16 hi, q1, q0
-; CHECK-NEXT: vmovt q1, q0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vmint.u16 q1, q0, q1
; CHECK-NEXT: vmov q0, q1
; CHECK-NEXT: bx lr
entry:
%c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
- %a1 = icmp ult <8 x i16> %x, %y
- %0 = and <8 x i1> %c, %a1
- %b = select <8 x i1> %0, <8 x i16> %x, <8 x i16> %y
+ %a = call <8 x i16> @llvm.umin.v8i16(<8 x i16> %x, <8 x i16> %y)
+ %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y
ret <8 x i16> %b
}
; CHECK-LABEL: icmp_ult_v16i8_y:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vctp.8 r0
-; CHECK-NEXT: vpstt
-; CHECK-NEXT: vcmpt.u8 hi, q1, q0
-; CHECK-NEXT: vmovt q1, q0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vmint.u8 q1, q0, q1
; CHECK-NEXT: vmov q0, q1
; CHECK-NEXT: bx lr
entry:
%c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
- %a1 = icmp ult <16 x i8> %x, %y
- %0 = and <16 x i1> %c, %a1
- %b = select <16 x i1> %0, <16 x i8> %x, <16 x i8> %y
+ %a = call <16 x i8> @llvm.umin.v16i8(<16 x i8> %x, <16 x i8> %y)
+ %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y
ret <16 x i8> %b
}
; CHECK-LABEL: icmp_ugt_v4i32_y:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vctp.32 r0
-; CHECK-NEXT: vpstt
-; CHECK-NEXT: vcmpt.u32 hi, q0, q1
-; CHECK-NEXT: vmovt q1, q0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vmaxt.u32 q1, q0, q1
; CHECK-NEXT: vmov q0, q1
; CHECK-NEXT: bx lr
entry:
%c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
- %a1 = icmp ugt <4 x i32> %x, %y
- %0 = and <4 x i1> %c, %a1
- %b = select <4 x i1> %0, <4 x i32> %x, <4 x i32> %y
+ %a = call <4 x i32> @llvm.umax.v4i32(<4 x i32> %x, <4 x i32> %y)
+ %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y
ret <4 x i32> %b
}
; CHECK-LABEL: icmp_ugt_v8i16_y:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vctp.16 r0
-; CHECK-NEXT: vpstt
-; CHECK-NEXT: vcmpt.u16 hi, q0, q1
-; CHECK-NEXT: vmovt q1, q0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vmaxt.u16 q1, q0, q1
; CHECK-NEXT: vmov q0, q1
; CHECK-NEXT: bx lr
entry:
%c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
- %a1 = icmp ugt <8 x i16> %x, %y
- %0 = and <8 x i1> %c, %a1
- %b = select <8 x i1> %0, <8 x i16> %x, <8 x i16> %y
+ %a = call <8 x i16> @llvm.umax.v8i16(<8 x i16> %x, <8 x i16> %y)
+ %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y
ret <8 x i16> %b
}
; CHECK-LABEL: icmp_ugt_v16i8_y:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vctp.8 r0
-; CHECK-NEXT: vpstt
-; CHECK-NEXT: vcmpt.u8 hi, q0, q1
-; CHECK-NEXT: vmovt q1, q0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vmaxt.u8 q1, q0, q1
; CHECK-NEXT: vmov q0, q1
; CHECK-NEXT: bx lr
entry:
%c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
- %a1 = icmp ugt <16 x i8> %x, %y
- %0 = and <16 x i1> %c, %a1
- %b = select <16 x i1> %0, <16 x i8> %x, <16 x i8> %y
+ %a = call <16 x i8> @llvm.umax.v16i8(<16 x i8> %x, <16 x i8> %y)
+ %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y
ret <16 x i8> %b
}
; CHECK-NEXT: bx lr
entry:
%c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
- %i = insertelement <4 x i32> undef, i32 %y, i32 0
+ %i = insertelement <4 x i32> undef, i32 %y, i64 0
%ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
%a = select <4 x i1> %c, <4 x i32> %x, <4 x i32> zeroinitializer
%b = add <4 x i32> %ys, %a
; CHECK-NEXT: bx lr
entry:
%c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
- %i = insertelement <8 x i16> undef, i16 %y, i32 0
+ %i = insertelement <8 x i16> undef, i16 %y, i64 0
%ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
%a = select <8 x i1> %c, <8 x i16> %x, <8 x i16> zeroinitializer
%b = add <8 x i16> %ys, %a
; CHECK-NEXT: bx lr
entry:
%c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
- %i = insertelement <16 x i8> undef, i8 %y, i32 0
+ %i = insertelement <16 x i8> undef, i8 %y, i64 0
%ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer
%a = select <16 x i1> %c, <16 x i8> %x, <16 x i8> zeroinitializer
%b = add <16 x i8> %ys, %a
; CHECK-NEXT: bx lr
entry:
%c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
- %i = insertelement <4 x i32> undef, i32 %y, i32 0
+ %i = insertelement <4 x i32> undef, i32 %y, i64 0
%ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
%a = sub <4 x i32> %x, %ys
%b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %ys
; CHECK-NEXT: bx lr
entry:
%c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
- %i = insertelement <8 x i16> undef, i16 %y, i32 0
+ %i = insertelement <8 x i16> undef, i16 %y, i64 0
%ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
%a = sub <8 x i16> %x, %ys
%b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %ys
; CHECK-NEXT: bx lr
entry:
%c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
- %i = insertelement <16 x i8> undef, i8 %y, i32 0
+ %i = insertelement <16 x i8> undef, i8 %y, i64 0
%ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer
%a = sub <16 x i8> %x, %ys
%b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %ys
; CHECK-NEXT: bx lr
entry:
%c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
- %i = insertelement <4 x i32> undef, i32 %y, i32 0
+ %i = insertelement <4 x i32> undef, i32 %y, i64 0
%ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
%a = select <4 x i1> %c, <4 x i32> %x, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
%b = mul <4 x i32> %ys, %a
; CHECK-NEXT: bx lr
entry:
%c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
- %i = insertelement <8 x i16> undef, i16 %y, i32 0
+ %i = insertelement <8 x i16> undef, i16 %y, i64 0
%ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
%a = select <8 x i1> %c, <8 x i16> %x, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%b = mul <8 x i16> %ys, %a
; CHECK-NEXT: bx lr
entry:
%c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
- %i = insertelement <16 x i8> undef, i8 %y, i32 0
+ %i = insertelement <16 x i8> undef, i8 %y, i64 0
%ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer
%a = select <16 x i1> %c, <16 x i8> %x, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
%b = mul <16 x i8> %ys, %a
; CHECK-NEXT: bx lr
entry:
%c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
- %i = insertelement <4 x float> undef, float %y, i32 0
+ %i = insertelement <4 x float> undef, float %y, i64 0
%ys = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer
%a = select <4 x i1> %c, <4 x float> %x, <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>
%b = fadd <4 x float> %ys, %a
; CHECK-NEXT: bx lr
entry:
%c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
- %i = insertelement <8 x half> undef, half %y, i32 0
+ %i = insertelement <8 x half> undef, half %y, i64 0
%ys = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
%a = select <8 x i1> %c, <8 x half> %x, <8 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>
%b = fadd <8 x half> %ys, %a
; CHECK-NEXT: bx lr
entry:
%c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
- %i = insertelement <4 x float> undef, float %y, i32 0
+ %i = insertelement <4 x float> undef, float %y, i64 0
%ys = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer
%a = fsub <4 x float> %x, %ys
%b = select <4 x i1> %c, <4 x float> %a, <4 x float> %ys
; CHECK-NEXT: bx lr
entry:
%c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
- %i = insertelement <8 x half> undef, half %y, i32 0
+ %i = insertelement <8 x half> undef, half %y, i64 0
%ys = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
%a = fsub <8 x half> %x, %ys
%b = select <8 x i1> %c, <8 x half> %a, <8 x half> %ys
; CHECK-NEXT: bx lr
entry:
%c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
- %i = insertelement <4 x float> undef, float %y, i32 0
+ %i = insertelement <4 x float> undef, float %y, i64 0
%ys = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer
%a = select <4 x i1> %c, <4 x float> %x, <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
%b = fmul <4 x float> %ys, %a
; CHECK-NEXT: bx lr
entry:
%c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
- %i = insertelement <8 x half> undef, half %y, i32 0
+ %i = insertelement <8 x half> undef, half %y, i64 0
%ys = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
%a = select <8 x i1> %c, <8 x half> %x, <8 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>
%b = fmul <8 x half> %ys, %a
; CHECK-NEXT: bx lr
entry:
%c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
- %i = insertelement <4 x i32> undef, i32 %y, i32 0
+ %i = insertelement <4 x i32> undef, i32 %y, i64 0
%ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
%a = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %x, <4 x i32> %ys)
%b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %ys
; CHECK-NEXT: bx lr
entry:
%c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
- %i = insertelement <8 x i16> undef, i16 %y, i32 0
+ %i = insertelement <8 x i16> undef, i16 %y, i64 0
%ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
%a = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %x, <8 x i16> %ys)
%b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %ys
; CHECK-NEXT: bx lr
entry:
%c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
- %i = insertelement <16 x i8> undef, i8 %y, i32 0
+ %i = insertelement <16 x i8> undef, i8 %y, i64 0
%ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer
%a = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %x, <16 x i8> %ys)
%b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %ys
; CHECK-NEXT: bx lr
entry:
%c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
- %i = insertelement <4 x i32> undef, i32 %y, i32 0
+ %i = insertelement <4 x i32> undef, i32 %y, i64 0
%ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
%a = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> %x, <4 x i32> %ys)
%b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %ys
; CHECK-NEXT: bx lr
entry:
%c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
- %i = insertelement <8 x i16> undef, i16 %y, i32 0
+ %i = insertelement <8 x i16> undef, i16 %y, i64 0
%ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
%a = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %x, <8 x i16> %ys)
%b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %ys
; CHECK-NEXT: bx lr
entry:
%c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
- %i = insertelement <16 x i8> undef, i8 %y, i32 0
+ %i = insertelement <16 x i8> undef, i8 %y, i64 0
%ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer
%a = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %x, <16 x i8> %ys)
%b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %ys
; CHECK-NEXT: bx lr
entry:
%c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
- %i = insertelement <4 x i32> undef, i32 %y, i32 0
+ %i = insertelement <4 x i32> undef, i32 %y, i64 0
%ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
%a = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %x, <4 x i32> %ys)
%b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %ys
; CHECK-NEXT: bx lr
entry:
%c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
- %i = insertelement <8 x i16> undef, i16 %y, i32 0
+ %i = insertelement <8 x i16> undef, i16 %y, i64 0
%ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
%a = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %x, <8 x i16> %ys)
%b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %ys
; CHECK-NEXT: bx lr
entry:
%c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
- %i = insertelement <16 x i8> undef, i8 %y, i32 0
+ %i = insertelement <16 x i8> undef, i8 %y, i64 0
%ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer
%a = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %x, <16 x i8> %ys)
%b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %ys
; CHECK-NEXT: bx lr
entry:
%c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
- %i = insertelement <4 x i32> undef, i32 %y, i32 0
+ %i = insertelement <4 x i32> undef, i32 %y, i64 0
%ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
%a = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> %x, <4 x i32> %ys)
%b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %ys
; CHECK-NEXT: bx lr
entry:
%c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
- %i = insertelement <8 x i16> undef, i16 %y, i32 0
+ %i = insertelement <8 x i16> undef, i16 %y, i64 0
%ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
%a = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %x, <8 x i16> %ys)
%b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %ys
; CHECK-NEXT: bx lr
entry:
%c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
- %i = insertelement <16 x i8> undef, i8 %y, i32 0
+ %i = insertelement <16 x i8> undef, i8 %y, i64 0
%ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer
%a = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %x, <16 x i8> %ys)
%b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %ys
declare <16 x i8> @llvm.usub.sat.v16i8(<16 x i8>, <16 x i8>)
declare <8 x i16> @llvm.usub.sat.v8i16(<8 x i16>, <8 x i16>)
declare <4 x i32> @llvm.usub.sat.v4i32(<4 x i32>, <4 x i32>)
+
+declare <16 x i8> @llvm.umin.v16i8(<16 x i8>, <16 x i8>)
+declare <8 x i16> @llvm.umin.v8i16(<8 x i16>, <8 x i16>)
+declare <4 x i32> @llvm.umin.v4i32(<4 x i32>, <4 x i32>)
+declare <16 x i8> @llvm.umax.v16i8(<16 x i8>, <16 x i8>)
+declare <8 x i16> @llvm.umax.v8i16(<8 x i16>, <8 x i16>)
+declare <4 x i32> @llvm.umax.v4i32(<4 x i32>, <4 x i32>)
+declare <16 x i8> @llvm.smin.v16i8(<16 x i8>, <16 x i8>)
+declare <8 x i16> @llvm.smin.v8i16(<8 x i16>, <8 x i16>)
+declare <4 x i32> @llvm.smin.v4i32(<4 x i32>, <4 x i32>)
+declare <16 x i8> @llvm.smax.v16i8(<16 x i8>, <16 x i8>)
+declare <8 x i16> @llvm.smax.v8i16(<8 x i16>, <8 x i16>)
+declare <4 x i32> @llvm.smax.v4i32(<4 x i32>, <4 x i32>)
+
declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>)
declare <8 x half> @llvm.fma.v8f16(<8 x half>, <8 x half>, <8 x half>)