let params = T.Int in {
def vmvnq: Intrinsic<Vector, (args Vector:$a),
(xor $a, (uint_max Vector))>;
+ defm vmvnq: IntrinsicMX<Vector, (args Vector:$a, Predicate:$pred),
+ (IRInt<"mvn_predicated", [Vector, Predicate]> $a, $pred, $inactive)>;
def vclzq: Intrinsic<Vector, (args Vector:$a),
(IRIntBase<"ctlz", [Vector]> $a, (i1 0))>;
+ defm vclzq: IntrinsicMX<Vector, (args Vector:$a, Predicate:$pred),
+ (IRInt<"clz_predicated", [Vector, Predicate]> $a, $pred, $inactive)>;
}
let params = T.Signed in {
def vclsq: Intrinsic<Vector, (args Vector:$a), (IRInt<"vcls", [Vector]> $a)>;
+ defm vclsq: IntrinsicMX<Vector, (args Vector:$a, Predicate:$pred),
+ (IRInt<"cls_predicated", [Vector, Predicate]> $a, $pred, $inactive)>;
+
def vnegq: Intrinsic<Vector, (args Vector:$a),
(sub (zeroinit Vector), $a)>;
def vabsq: Intrinsic<Vector, (args Vector:$a),
(select (icmp_eq $a, (int_min Vector)),
(int_max Vector),
(sub (zeroinit Vector), $a)))>;
+
+ foreach name = ["qneg", "qabs"] in {
+ defm v#name#q: IntrinsicMX<Vector, (args Vector:$a, Predicate:$pred),
+ (IRInt<name#"_predicated", [Vector, Predicate]> $a, $pred, $inactive),
+ 0 /* no _x variant for saturating intrinsics */>;
+ }
+}
+let params = !listconcat(T.Signed, T.Float) in {
+ foreach name = ["neg", "abs"] in {
+ defm v#name#q: IntrinsicMX<Vector, (args Vector:$a, Predicate:$pred),
+ (IRInt<name#"_predicated", [Vector, Predicate]> $a, $pred, $inactive)>;
+ }
}
let params = T.Float in {
def vnegq_f: Intrinsic<Vector, (args Vector:$a), (fneg $a)>,
defvar FVector = VecOf<FScalar>;
defvar IVector = VecOf<IScalar>;
- let params = [IScalar], pnt = PNT_2Type in
- def : Intrinsic<FVector, (args IVector:$a), (itof $a, FVector)>,
- NameOverride<"vcvtq_" # FScalar>;
- let params = [FScalar], pnt = PNT_None in
- def : Intrinsic<IVector, (args FVector:$a), (ftoi $a, IVector)>,
- NameOverride<"vcvtq_" # IScalar>;
+ let params = [IScalar] in {
+ let pnt = PNT_2Type in {
+ def : Intrinsic<FVector, (args IVector:$a), (itof $a, FVector)>,
+ NameOverride<"vcvtq_" # FScalar>;
+ }
+ defm vcvtq: IntrinsicMX<FVector, (args IVector:$a, Predicate:$pred),
+ (IRInt<"vcvt_fp_int_predicated", [FVector, IVector, Predicate]>
+ $a, (unsignedflag IScalar), $pred, $inactive),
+ 1, "_" # FScalar, PNT_2Type, PNT_2Type>;
+ }
+ let params = [FScalar] in {
+ let pnt = PNT_None in {
+ def : Intrinsic<IVector, (args FVector:$a), (ftoi $a, IVector)>,
+ NameOverride<"vcvtq_" # IScalar>;
+ }
+ defm vcvtq: IntrinsicMX<IVector, (args FVector:$a, Predicate:$pred),
+ (IRInt<"vcvt_fp_int_predicated", [IVector, FVector, Predicate]>
+ $a, (unsignedflag IScalar), $pred, $inactive),
+ 1, "_" # IScalar, PNT_2Type, PNT_None>;
+ }
}
-defm : float_int_conversions<f32, u32, fptoui, uitofp>;
-defm : float_int_conversions<f16, u16, fptoui, uitofp>;
-defm : float_int_conversions<f32, s32, fptosi, sitofp>;
-defm : float_int_conversions<f16, s16, fptosi, sitofp>;
+defm "" : float_int_conversions<f32, u32, fptoui, uitofp>;
+defm "" : float_int_conversions<f16, u16, fptoui, uitofp>;
+defm "" : float_int_conversions<f32, s32, fptosi, sitofp>;
+defm "" : float_int_conversions<f16, s16, fptosi, sitofp>;
-let params = [s8, u8, s16, u16] in {
- def vmovlbq: Intrinsic<DblVector, (args Vector:$a),
- (extend (unzip $a, 0), DblVector, (unsignedflag Scalar))>;
- def vmovltq: Intrinsic<DblVector, (args Vector:$a),
- (extend (unzip $a, 1), DblVector, (unsignedflag Scalar))>;
+multiclass vmovl<bit top> {
+ let params = [s8, u8, s16, u16] in {
+ def "": Intrinsic<DblVector, (args Vector:$a),
+ (extend (unzip $a, top), DblVector, (unsignedflag Scalar))>;
+ defm "": IntrinsicMX<DblVector, (args Vector:$a, DblPredicate:$pred),
+ (IRInt<"vmovl_predicated", [DblVector, Vector, DblPredicate]>
+ $a, (unsignedflag Scalar), top, $pred, $inactive)>;
+ }
}
-let params = [s16, u16, s32, u32] in {
- def vmovnbq: Intrinsic<HalfVector, (args HalfVector:$inactive, Vector:$a),
- (trunc (zip $a, (vreinterpret (vrev $inactive, (bitsize Scalar)), Vector)),
- HalfVector)>;
- def vmovntq: Intrinsic<HalfVector, (args HalfVector:$inactive, Vector:$a),
- (trunc (zip (vreinterpret $inactive, Vector), $a), HalfVector)>;
+defm vmovlbq: vmovl<0>;
+defm vmovltq: vmovl<1>;
+
+multiclass vmovn<bit top, dag wide_result> {
+ let params = [s16, u16, s32, u32] in {
+ def "": Intrinsic<HalfVector, (args HalfVector:$inactive, Vector:$a),
+ (trunc wide_result, HalfVector)>;
+ def _m: Intrinsic<HalfVector, (args HalfVector:$inactive, Vector:$a,
+ Predicate:$pred),
+ (IRInt<"vmovn_predicated", [HalfVector, Vector, Predicate]>
+ $inactive, $a, top, $pred)>;
+ }
}
-let params = T.Float in {
- def vrndq: Intrinsic<Vector, (args Vector:$a),
- (IRIntBase<"trunc", [Vector]> $a)>;
- def vrndmq: Intrinsic<Vector, (args Vector:$a),
- (IRIntBase<"floor", [Vector]> $a)>;
- def vrndpq: Intrinsic<Vector, (args Vector:$a),
- (IRIntBase<"ceil", [Vector]> $a)>;
- def vrndaq: Intrinsic<Vector, (args Vector:$a),
- (IRIntBase<"round", [Vector]> $a)>;
- def vrndxq: Intrinsic<Vector, (args Vector:$a),
- (IRIntBase<"rint", [Vector]> $a)>;
- def vrndnq: Intrinsic<Vector, (args Vector:$a),
- (IRInt<"vrintn", [Vector]> $a)>;
+defm vmovntq: vmovn<1, (zip (vreinterpret $inactive, Vector), $a)>;
+defm vmovnbq: vmovn<0,
+ (zip $a, (vreinterpret (vrev $inactive, (bitsize Scalar)), Vector))>;
+
+multiclass vrnd<IRIntBase ir_int, string suffix> {
+ let params = T.Float in {
+ def "": Intrinsic<Vector, (args Vector:$a), (ir_int $a)>;
+ defm "": IntrinsicMX<Vector, (args Vector:$a, Predicate:$pred),
+ (IRInt<"vrint"#suffix#"_predicated", [Vector, Predicate]>
+ $a, $pred, $inactive)>;
+ }
}
+defm vrndq: vrnd<IRIntBase<"trunc", [Vector]>, "z">;
+defm vrndmq: vrnd<IRIntBase<"floor", [Vector]>, "m">;
+defm vrndpq: vrnd<IRIntBase<"ceil", [Vector]>, "p">;
+defm vrndaq: vrnd<IRIntBase<"round", [Vector]>, "a">;
+defm vrndxq: vrnd<IRIntBase<"rint", [Vector]>, "x">;
+defm vrndnq: vrnd<IRInt<"vrintn", [Vector]>, "n">;
+
multiclass compare_with_pred<string condname, dag arguments,
dag cmp, string suffix> {
// Make the predicated and unpredicated versions of a single comparison.
defm vrmlsldavh : MVEBinaryVectorHoriz64R<V.True, V.True, "x">;
}
-let params = T.All8 in
-def vrev16q : Intrinsic<Vector, (args Vector:$a), (vrev $a, 16)>;
-let params = !listconcat(T.All8, T.All16) in
-def vrev32q : Intrinsic<Vector, (args Vector:$a), (vrev $a, 32)>;
-let params = T.Usual in
-def vrev64q : Intrinsic<Vector, (args Vector:$a), (vrev $a, 64)>;
+multiclass vrev_predicated<int revsize> {
+ defm "" : IntrinsicMX<Vector, (args Vector:$a, Predicate:$pred),
+ (IRInt<"vrev_predicated", [Vector, Predicate]>
+ $a, revsize, $pred, $inactive)>;
+}
+
+let params = T.All8 in {
+ def vrev16q : Intrinsic<Vector, (args Vector:$a), (vrev $a, 16)>;
+ defm vrev16q: vrev_predicated<16>;
+}
+let params = !listconcat(T.All8, T.All16) in {
+ def vrev32q : Intrinsic<Vector, (args Vector:$a), (vrev $a, 32)>;
+ defm vrev32q: vrev_predicated<32>;
+}
+let params = T.Usual in {
+ def vrev64q : Intrinsic<Vector, (args Vector:$a), (vrev $a, 64)>;
+ defm vrev64q: vrev_predicated<64>;
+}
foreach desttype = T.All in {
// We want a vreinterpretq between every pair of supported vector types
// provides the input value of the output register, i.e. all the
// inactive lanes in the predicated operation take their values from
// this.
- def "_m" # nameSuffix:
- Intrinsic<rettype, !con((args rettype:$inactive), arguments), cg> {
+ def : Intrinsic<rettype, !con((args rettype:$inactive), arguments), cg>,
+ NameOverride<NAME # "_m" # nameSuffix> {
let pnt = pnt_m;
}
// The _x variant leaves off that parameter, and simply uses an
// undef value of the same type.
- def "_x" # nameSuffix:
- Intrinsic<rettype, arguments, (seq (undef rettype):$inactive, cg)> {
+ def : Intrinsic<rettype, arguments, (seq (undef rettype):$inactive, cg)>,
+ NameOverride<NAME # "_x" # nameSuffix> {
let pnt = pnt_x;
}
}
#endif /* POLYMORPHIC */
}
+// CHECK-LABEL: @test_vmvnq_m_s8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.mvn.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <16 x i8> [[TMP2]]
+//
+int8x16_t test_vmvnq_m_s8(int8x16_t inactive, int8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vmvnq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+ return vmvnq_m_s8(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmvnq_m_s16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.mvn.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
+int16x8_t test_vmvnq_m_s16(int16x8_t inactive, int16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vmvnq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+ return vmvnq_m_s16(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmvnq_m_s32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.mvn.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
+int32x4_t test_vmvnq_m_s32(int32x4_t inactive, int32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vmvnq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+ return vmvnq_m_s32(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmvnq_m_u8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.mvn.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <16 x i8> [[TMP2]]
+//
+uint8x16_t test_vmvnq_m_u8(uint8x16_t inactive, uint8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vmvnq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+ return vmvnq_m_u8(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmvnq_m_u16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.mvn.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
+uint16x8_t test_vmvnq_m_u16(uint16x8_t inactive, uint16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vmvnq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+ return vmvnq_m_u16(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmvnq_m_u32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.mvn.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
+uint32x4_t test_vmvnq_m_u32(uint32x4_t inactive, uint32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vmvnq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+ return vmvnq_m_u32(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmvnq_x_s8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.mvn.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i1> [[TMP1]], <16 x i8> undef)
+// CHECK-NEXT: ret <16 x i8> [[TMP2]]
+//
+int8x16_t test_vmvnq_x_s8(int8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vmvnq_x(a, p);
+#else /* POLYMORPHIC */
+ return vmvnq_x_s8(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmvnq_x_s16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.mvn.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i1> [[TMP1]], <8 x i16> undef)
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
+int16x8_t test_vmvnq_x_s16(int16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vmvnq_x(a, p);
+#else /* POLYMORPHIC */
+ return vmvnq_x_s16(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmvnq_x_s32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.mvn.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i1> [[TMP1]], <4 x i32> undef)
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
+int32x4_t test_vmvnq_x_s32(int32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vmvnq_x(a, p);
+#else /* POLYMORPHIC */
+ return vmvnq_x_s32(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmvnq_x_u8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.mvn.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i1> [[TMP1]], <16 x i8> undef)
+// CHECK-NEXT: ret <16 x i8> [[TMP2]]
+//
+uint8x16_t test_vmvnq_x_u8(uint8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vmvnq_x(a, p);
+#else /* POLYMORPHIC */
+ return vmvnq_x_u8(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmvnq_x_u16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.mvn.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i1> [[TMP1]], <8 x i16> undef)
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
+uint16x8_t test_vmvnq_x_u16(uint16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vmvnq_x(a, p);
+#else /* POLYMORPHIC */
+ return vmvnq_x_u16(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmvnq_x_u32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.mvn.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i1> [[TMP1]], <4 x i32> undef)
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
+uint32x4_t test_vmvnq_x_u32(uint32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vmvnq_x(a, p);
+#else /* POLYMORPHIC */
+ return vmvnq_x_u32(a, p);
+#endif /* POLYMORPHIC */
+}
+
// CHECK-LABEL: @test_vnegq_f16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = fneg <8 x half> [[A:%.*]]
return vqnegq_s32(a);
#endif /* POLYMORPHIC */
}
+#include <arm_mve.h>
+
+// CHECK-LABEL: @test_vnegq_m_f16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.neg.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <8 x half> [[TMP2]]
+//
+float16x8_t test_vnegq_m_f16(float16x8_t inactive, float16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vnegq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+ return vnegq_m_f16(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vnegq_m_f32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.neg.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <4 x float> [[TMP2]]
+//
+float32x4_t test_vnegq_m_f32(float32x4_t inactive, float32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vnegq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+ return vnegq_m_f32(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vnegq_m_s8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.neg.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <16 x i8> [[TMP2]]
+//
+int8x16_t test_vnegq_m_s8(int8x16_t inactive, int8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vnegq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+ return vnegq_m_s8(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vnegq_m_s16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.neg.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
+int16x8_t test_vnegq_m_s16(int16x8_t inactive, int16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vnegq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+ return vnegq_m_s16(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vnegq_m_s32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.neg.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
+int32x4_t test_vnegq_m_s32(int32x4_t inactive, int32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vnegq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+ return vnegq_m_s32(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vnegq_x_f16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.neg.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> undef)
+// CHECK-NEXT: ret <8 x half> [[TMP2]]
+//
+float16x8_t test_vnegq_x_f16(float16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vnegq_x(a, p);
+#else /* POLYMORPHIC */
+ return vnegq_x_f16(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vnegq_x_f32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.neg.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> undef)
+// CHECK-NEXT: ret <4 x float> [[TMP2]]
+//
+float32x4_t test_vnegq_x_f32(float32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vnegq_x(a, p);
+#else /* POLYMORPHIC */
+ return vnegq_x_f32(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vnegq_x_s8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.neg.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i1> [[TMP1]], <16 x i8> undef)
+// CHECK-NEXT: ret <16 x i8> [[TMP2]]
+//
+int8x16_t test_vnegq_x_s8(int8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vnegq_x(a, p);
+#else /* POLYMORPHIC */
+ return vnegq_x_s8(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vnegq_x_s16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.neg.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i1> [[TMP1]], <8 x i16> undef)
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
+int16x8_t test_vnegq_x_s16(int16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vnegq_x(a, p);
+#else /* POLYMORPHIC */
+ return vnegq_x_s16(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vnegq_x_s32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.neg.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i1> [[TMP1]], <4 x i32> undef)
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
+int32x4_t test_vnegq_x_s32(int32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vnegq_x(a, p);
+#else /* POLYMORPHIC */
+ return vnegq_x_s32(a, p);
+#endif /* POLYMORPHIC */
+}
+
+#include <arm_mve.h>
+
+// CHECK-LABEL: @test_vabsq_m_f16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.abs.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <8 x half> [[TMP2]]
+//
+float16x8_t test_vabsq_m_f16(float16x8_t inactive, float16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vabsq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+ return vabsq_m_f16(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vabsq_m_f32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.abs.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <4 x float> [[TMP2]]
+//
+float32x4_t test_vabsq_m_f32(float32x4_t inactive, float32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vabsq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+ return vabsq_m_f32(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vabsq_m_s8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.abs.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <16 x i8> [[TMP2]]
+//
+int8x16_t test_vabsq_m_s8(int8x16_t inactive, int8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vabsq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+ return vabsq_m_s8(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vabsq_m_s16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.abs.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
+int16x8_t test_vabsq_m_s16(int16x8_t inactive, int16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vabsq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+ return vabsq_m_s16(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vabsq_m_s32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.abs.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
+int32x4_t test_vabsq_m_s32(int32x4_t inactive, int32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vabsq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+ return vabsq_m_s32(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vabsq_x_f16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.abs.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> undef)
+// CHECK-NEXT: ret <8 x half> [[TMP2]]
+//
+float16x8_t test_vabsq_x_f16(float16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vabsq_x(a, p);
+#else /* POLYMORPHIC */
+ return vabsq_x_f16(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vabsq_x_f32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.abs.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> undef)
+// CHECK-NEXT: ret <4 x float> [[TMP2]]
+//
+float32x4_t test_vabsq_x_f32(float32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vabsq_x(a, p);
+#else /* POLYMORPHIC */
+ return vabsq_x_f32(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vabsq_x_s8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.abs.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i1> [[TMP1]], <16 x i8> undef)
+// CHECK-NEXT: ret <16 x i8> [[TMP2]]
+//
+int8x16_t test_vabsq_x_s8(int8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vabsq_x(a, p);
+#else /* POLYMORPHIC */
+ return vabsq_x_s8(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vabsq_x_s16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.abs.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i1> [[TMP1]], <8 x i16> undef)
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
+int16x8_t test_vabsq_x_s16(int16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vabsq_x(a, p);
+#else /* POLYMORPHIC */
+ return vabsq_x_s16(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vabsq_x_s32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.abs.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i1> [[TMP1]], <4 x i32> undef)
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
+int32x4_t test_vabsq_x_s32(int32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vabsq_x(a, p);
+#else /* POLYMORPHIC */
+ return vabsq_x_s32(a, p);
+#endif /* POLYMORPHIC */
+}
+
+#include <arm_mve.h>
+
+// CHECK-LABEL: @test_vqnegq_m_s8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.qneg.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <16 x i8> [[TMP2]]
+//
+int8x16_t test_vqnegq_m_s8(int8x16_t inactive, int8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vqnegq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+ return vqnegq_m_s8(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vqnegq_m_s16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.qneg.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
+int16x8_t test_vqnegq_m_s16(int16x8_t inactive, int16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vqnegq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+ return vqnegq_m_s16(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vqnegq_m_s32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.qneg.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
+int32x4_t test_vqnegq_m_s32(int32x4_t inactive, int32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vqnegq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+ return vqnegq_m_s32(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+#include <arm_mve.h>
+
+// CHECK-LABEL: @test_vqabsq_m_s8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.qabs.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <16 x i8> [[TMP2]]
+//
+int8x16_t test_vqabsq_m_s8(int8x16_t inactive, int8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vqabsq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+ return vqabsq_m_s8(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vqabsq_m_s16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.qabs.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
+int16x8_t test_vqabsq_m_s16(int16x8_t inactive, int16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vqabsq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+ return vqabsq_m_s16(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vqabsq_m_s32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.qabs.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
+int32x4_t test_vqabsq_m_s32(int32x4_t inactive, int32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vqabsq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+ return vqabsq_m_s32(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
#endif /* POLYMORPHIC */
}
+// CHECK-LABEL: @test_vclsq_m_s8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.cls.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <16 x i8> [[TMP2]]
+//
+int8x16_t test_vclsq_m_s8(int8x16_t inactive, int8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vclsq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+ return vclsq_m_s8(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vclsq_m_s16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.cls.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
+int16x8_t test_vclsq_m_s16(int16x8_t inactive, int16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vclsq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+ return vclsq_m_s16(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vclsq_m_s32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.cls.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
+int32x4_t test_vclsq_m_s32(int32x4_t inactive, int32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vclsq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+ return vclsq_m_s32(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vclzq_m_s8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.clz.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <16 x i8> [[TMP2]]
+//
+int8x16_t test_vclzq_m_s8(int8x16_t inactive, int8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vclzq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+ return vclzq_m_s8(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vclzq_m_s16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.clz.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
+int16x8_t test_vclzq_m_s16(int16x8_t inactive, int16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vclzq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+ return vclzq_m_s16(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vclzq_m_s32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.clz.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
+int32x4_t test_vclzq_m_s32(int32x4_t inactive, int32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vclzq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+ return vclzq_m_s32(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vclzq_m_u8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.clz.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <16 x i8> [[TMP2]]
+//
+uint8x16_t test_vclzq_m_u8(uint8x16_t inactive, uint8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vclzq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+ return vclzq_m_u8(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vclzq_m_u16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.clz.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
+uint16x8_t test_vclzq_m_u16(uint16x8_t inactive, uint16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vclzq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+ return vclzq_m_u16(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vclzq_m_u32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.clz.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
+uint32x4_t test_vclzq_m_u32(uint32x4_t inactive, uint32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vclzq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+ return vclzq_m_u32(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vclsq_x_s8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.cls.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i1> [[TMP1]], <16 x i8> undef)
+// CHECK-NEXT: ret <16 x i8> [[TMP2]]
+//
+int8x16_t test_vclsq_x_s8(int8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vclsq_x(a, p);
+#else /* POLYMORPHIC */
+ return vclsq_x_s8(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vclsq_x_s16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.cls.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i1> [[TMP1]], <8 x i16> undef)
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
+int16x8_t test_vclsq_x_s16(int16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vclsq_x(a, p);
+#else /* POLYMORPHIC */
+ return vclsq_x_s16(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vclsq_x_s32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.cls.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i1> [[TMP1]], <4 x i32> undef)
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
+int32x4_t test_vclsq_x_s32(int32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vclsq_x(a, p);
+#else /* POLYMORPHIC */
+ return vclsq_x_s32(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vclzq_x_s8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.clz.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i1> [[TMP1]], <16 x i8> undef)
+// CHECK-NEXT: ret <16 x i8> [[TMP2]]
+//
+int8x16_t test_vclzq_x_s8(int8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vclzq_x(a, p);
+#else /* POLYMORPHIC */
+ return vclzq_x_s8(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vclzq_x_s16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.clz.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i1> [[TMP1]], <8 x i16> undef)
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
+int16x8_t test_vclzq_x_s16(int16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vclzq_x(a, p);
+#else /* POLYMORPHIC */
+ return vclzq_x_s16(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vclzq_x_s32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.clz.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i1> [[TMP1]], <4 x i32> undef)
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
+int32x4_t test_vclzq_x_s32(int32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vclzq_x(a, p);
+#else /* POLYMORPHIC */
+ return vclzq_x_s32(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vclzq_x_u8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.clz.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i1> [[TMP1]], <16 x i8> undef)
+// CHECK-NEXT: ret <16 x i8> [[TMP2]]
+//
+uint8x16_t test_vclzq_x_u8(uint8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vclzq_x(a, p);
+#else /* POLYMORPHIC */
+ return vclzq_x_u8(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vclzq_x_u16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.clz.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i1> [[TMP1]], <8 x i16> undef)
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
+uint16x8_t test_vclzq_x_u16(uint16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vclzq_x(a, p);
+#else /* POLYMORPHIC */
+ return vclzq_x_u16(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vclzq_x_u32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.clz.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i1> [[TMP1]], <4 x i32> undef)
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
+uint32x4_t test_vclzq_x_u32(uint32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vclzq_x(a, p);
+#else /* POLYMORPHIC */
+ return vclzq_x_u32(a, p);
+#endif /* POLYMORPHIC */
+}
return vcvtq_u32_f32(a);
}
+// CHECK-LABEL: @test_vcvtq_m_f16_s16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vcvt.fp.int.predicated.v8f16.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <8 x half> [[TMP2]]
+//
+float16x8_t test_vcvtq_m_f16_s16(float16x8_t inactive, int16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vcvtq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+ return vcvtq_m_f16_s16(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vcvtq_m_f16_u16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vcvt.fp.int.predicated.v8f16.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 1, <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <8 x half> [[TMP2]]
+//
+float16x8_t test_vcvtq_m_f16_u16(float16x8_t inactive, uint16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vcvtq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+ return vcvtq_m_f16_u16(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vcvtq_m_f32_s32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vcvt.fp.int.predicated.v4f32.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <4 x float> [[TMP2]]
+//
+float32x4_t test_vcvtq_m_f32_s32(float32x4_t inactive, int32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vcvtq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+ return vcvtq_m_f32_s32(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vcvtq_m_f32_u32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vcvt.fp.int.predicated.v4f32.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 1, <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <4 x float> [[TMP2]]
+//
+float32x4_t test_vcvtq_m_f32_u32(float32x4_t inactive, uint32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vcvtq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+ return vcvtq_m_f32_u32(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vcvtq_m_s16_f16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vcvt.fp.int.predicated.v8i16.v8f16.v8i1(<8 x half> [[A:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
+int16x8_t test_vcvtq_m_s16_f16(int16x8_t inactive, float16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vcvtq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+ return vcvtq_m_s16_f16(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vcvtq_m_s32_f32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vcvt.fp.int.predicated.v4i32.v4f32.v4i1(<4 x float> [[A:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
+int32x4_t test_vcvtq_m_s32_f32(int32x4_t inactive, float32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vcvtq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+ return vcvtq_m_s32_f32(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vcvtq_m_u16_f16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vcvt.fp.int.predicated.v8i16.v8f16.v8i1(<8 x half> [[A:%.*]], i32 1, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
+uint16x8_t test_vcvtq_m_u16_f16(uint16x8_t inactive, float16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vcvtq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+ return vcvtq_m_u16_f16(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vcvtq_m_u32_f32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vcvt.fp.int.predicated.v4i32.v4f32.v4i1(<4 x float> [[A:%.*]], i32 1, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
+uint32x4_t test_vcvtq_m_u32_f32(uint32x4_t inactive, float32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vcvtq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+ return vcvtq_m_u32_f32(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vcvtq_x_f16_s16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vcvt.fp.int.predicated.v8f16.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x half> undef)
+// CHECK-NEXT: ret <8 x half> [[TMP2]]
+//
+float16x8_t test_vcvtq_x_f16_s16(int16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vcvtq_x(a, p);
+#else /* POLYMORPHIC */
+ return vcvtq_x_f16_s16(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vcvtq_x_f16_u16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vcvt.fp.int.predicated.v8f16.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 1, <8 x i1> [[TMP1]], <8 x half> undef)
+// CHECK-NEXT: ret <8 x half> [[TMP2]]
+//
+float16x8_t test_vcvtq_x_f16_u16(uint16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vcvtq_x(a, p);
+#else /* POLYMORPHIC */
+ return vcvtq_x_f16_u16(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vcvtq_x_f32_s32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vcvt.fp.int.predicated.v4f32.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x float> undef)
+// CHECK-NEXT: ret <4 x float> [[TMP2]]
+//
+float32x4_t test_vcvtq_x_f32_s32(int32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vcvtq_x(a, p);
+#else /* POLYMORPHIC */
+ return vcvtq_x_f32_s32(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vcvtq_x_f32_u32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vcvt.fp.int.predicated.v4f32.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 1, <4 x i1> [[TMP1]], <4 x float> undef)
+// CHECK-NEXT: ret <4 x float> [[TMP2]]
+//
+float32x4_t test_vcvtq_x_f32_u32(uint32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vcvtq_x(a, p);
+#else /* POLYMORPHIC */
+ return vcvtq_x_f32_u32(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vcvtq_x_s16_f16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vcvt.fp.int.predicated.v8i16.v8f16.v8i1(<8 x half> [[A:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x i16> undef)
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
+int16x8_t test_vcvtq_x_s16_f16(float16x8_t a, mve_pred16_t p)
+{
+ return vcvtq_x_s16_f16(a, p);
+}
+
+// CHECK-LABEL: @test_vcvtq_x_s32_f32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vcvt.fp.int.predicated.v4i32.v4f32.v4i1(<4 x float> [[A:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x i32> undef)
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
+int32x4_t test_vcvtq_x_s32_f32(float32x4_t a, mve_pred16_t p)
+{
+ return vcvtq_x_s32_f32(a, p);
+}
+
+// CHECK-LABEL: @test_vcvtq_x_u16_f16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vcvt.fp.int.predicated.v8i16.v8f16.v8i1(<8 x half> [[A:%.*]], i32 1, <8 x i1> [[TMP1]], <8 x i16> undef)
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
+uint16x8_t test_vcvtq_x_u16_f16(float16x8_t a, mve_pred16_t p)
+{
+ return vcvtq_x_u16_f16(a, p);
+}
+
+// CHECK-LABEL: @test_vcvtq_x_u32_f32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vcvt.fp.int.predicated.v4i32.v4f32.v4i1(<4 x float> [[A:%.*]], i32 1, <4 x i1> [[TMP1]], <4 x i32> undef)
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
+uint32x4_t test_vcvtq_x_u32_f32(float32x4_t a, mve_pred16_t p)
+{
+ return vcvtq_x_u32_f32(a, p);
+}
+
// CHECK-LABEL: @test_vcvttq_f16_f32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.arm.mve.vcvt.narrow(<8 x half> [[A:%.*]], <4 x float> [[B:%.*]], i32 1)
#endif /* POLYMORPHIC */
}
+// CHECK-LABEL: @test_vmovlbq_m_s8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vmovl.predicated.v8i16.v16i8.v8i1(<16 x i8> [[A:%.*]], i32 0, i32 0, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
+int16x8_t test_vmovlbq_m_s8(int16x8_t inactive, int8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vmovlbq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+ return vmovlbq_m_s8(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmovlbq_m_s16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vmovl.predicated.v4i32.v8i16.v4i1(<8 x i16> [[A:%.*]], i32 0, i32 0, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
+int32x4_t test_vmovlbq_m_s16(int32x4_t inactive, int16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vmovlbq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+ return vmovlbq_m_s16(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmovlbq_m_u8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vmovl.predicated.v8i16.v16i8.v8i1(<16 x i8> [[A:%.*]], i32 1, i32 0, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
+uint16x8_t test_vmovlbq_m_u8(uint16x8_t inactive, uint8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vmovlbq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+ return vmovlbq_m_u8(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmovlbq_m_u16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vmovl.predicated.v4i32.v8i16.v4i1(<8 x i16> [[A:%.*]], i32 1, i32 0, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
+uint32x4_t test_vmovlbq_m_u16(uint32x4_t inactive, uint16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vmovlbq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+ return vmovlbq_m_u16(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmovltq_m_s8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vmovl.predicated.v8i16.v16i8.v8i1(<16 x i8> [[A:%.*]], i32 0, i32 1, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
+int16x8_t test_vmovltq_m_s8(int16x8_t inactive, int8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vmovltq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+ return vmovltq_m_s8(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmovltq_m_s16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vmovl.predicated.v4i32.v8i16.v4i1(<8 x i16> [[A:%.*]], i32 0, i32 1, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
+int32x4_t test_vmovltq_m_s16(int32x4_t inactive, int16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vmovltq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+ return vmovltq_m_s16(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmovltq_m_u8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vmovl.predicated.v8i16.v16i8.v8i1(<16 x i8> [[A:%.*]], i32 1, i32 1, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
+uint16x8_t test_vmovltq_m_u8(uint16x8_t inactive, uint8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vmovltq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+ return vmovltq_m_u8(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmovltq_m_u16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vmovl.predicated.v4i32.v8i16.v4i1(<8 x i16> [[A:%.*]], i32 1, i32 1, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
+uint32x4_t test_vmovltq_m_u16(uint32x4_t inactive, uint16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vmovltq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+ return vmovltq_m_u16(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmovlbq_x_s8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vmovl.predicated.v8i16.v16i8.v8i1(<16 x i8> [[A:%.*]], i32 0, i32 0, <8 x i1> [[TMP1]], <8 x i16> undef)
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
+int16x8_t test_vmovlbq_x_s8(int8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vmovlbq_x(a, p);
+#else /* POLYMORPHIC */
+ return vmovlbq_x_s8(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmovlbq_x_s16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vmovl.predicated.v4i32.v8i16.v4i1(<8 x i16> [[A:%.*]], i32 0, i32 0, <4 x i1> [[TMP1]], <4 x i32> undef)
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
+int32x4_t test_vmovlbq_x_s16(int16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vmovlbq_x(a, p);
+#else /* POLYMORPHIC */
+ return vmovlbq_x_s16(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmovlbq_x_u8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vmovl.predicated.v8i16.v16i8.v8i1(<16 x i8> [[A:%.*]], i32 1, i32 0, <8 x i1> [[TMP1]], <8 x i16> undef)
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
+uint16x8_t test_vmovlbq_x_u8(uint8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vmovlbq_x(a, p);
+#else /* POLYMORPHIC */
+ return vmovlbq_x_u8(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmovlbq_x_u16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vmovl.predicated.v4i32.v8i16.v4i1(<8 x i16> [[A:%.*]], i32 1, i32 0, <4 x i1> [[TMP1]], <4 x i32> undef)
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
+uint32x4_t test_vmovlbq_x_u16(uint16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vmovlbq_x(a, p);
+#else /* POLYMORPHIC */
+ return vmovlbq_x_u16(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmovltq_x_s8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vmovl.predicated.v8i16.v16i8.v8i1(<16 x i8> [[A:%.*]], i32 0, i32 1, <8 x i1> [[TMP1]], <8 x i16> undef)
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
+int16x8_t test_vmovltq_x_s8(int8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vmovltq_x(a, p);
+#else /* POLYMORPHIC */
+ return vmovltq_x_s8(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmovltq_x_s16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vmovl.predicated.v4i32.v8i16.v4i1(<8 x i16> [[A:%.*]], i32 0, i32 1, <4 x i1> [[TMP1]], <4 x i32> undef)
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
+int32x4_t test_vmovltq_x_s16(int16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vmovltq_x(a, p);
+#else /* POLYMORPHIC */
+ return vmovltq_x_s16(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmovltq_x_u8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vmovl.predicated.v8i16.v16i8.v8i1(<16 x i8> [[A:%.*]], i32 1, i32 1, <8 x i1> [[TMP1]], <8 x i16> undef)
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
+uint16x8_t test_vmovltq_x_u8(uint8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vmovltq_x(a, p);
+#else /* POLYMORPHIC */
+ return vmovltq_x_u8(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmovltq_x_u16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vmovl.predicated.v4i32.v8i16.v4i1(<8 x i16> [[A:%.*]], i32 1, i32 1, <4 x i1> [[TMP1]], <4 x i32> undef)
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
+uint32x4_t test_vmovltq_x_u16(uint16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vmovltq_x(a, p);
+#else /* POLYMORPHIC */
+ return vmovltq_x_u16(a, p);
+#endif /* POLYMORPHIC */
+}
+
return vmovntq_u32(a, b);
#endif /* POLYMORPHIC */
}
+
+// LE-LABEL: @test_vmovnbq_m_s16(
+// LE-NEXT: entry:
+// LE-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// LE-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// LE-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vmovn.predicated.v16i8.v8i16.v8i1(<16 x i8> [[A:%.*]], <8 x i16> [[B:%.*]], i32 0, <8 x i1> [[TMP1]])
+// LE-NEXT: ret <16 x i8> [[TMP2]]
+//
+// BE-LABEL: @test_vmovnbq_m_s16(
+// BE-NEXT: entry:
+// BE-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// BE-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// BE-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vmovn.predicated.v16i8.v8i16.v8i1(<16 x i8> [[A:%.*]], <8 x i16> [[B:%.*]], i32 0, <8 x i1> [[TMP1]])
+// BE-NEXT: ret <16 x i8> [[TMP2]]
+//
+int8x16_t test_vmovnbq_m_s16(int8x16_t a, int16x8_t b, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vmovnbq_m(a, b, p);
+#else /* POLYMORPHIC */
+ return vmovnbq_m_s16(a, b, p);
+#endif /* POLYMORPHIC */
+}
+
+// LE-LABEL: @test_vmovnbq_m_s32(
+// LE-NEXT: entry:
+// LE-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// LE-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// LE-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vmovn.predicated.v8i16.v4i32.v4i1(<8 x i16> [[A:%.*]], <4 x i32> [[B:%.*]], i32 0, <4 x i1> [[TMP1]])
+// LE-NEXT: ret <8 x i16> [[TMP2]]
+//
+// BE-LABEL: @test_vmovnbq_m_s32(
+// BE-NEXT: entry:
+// BE-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// BE-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// BE-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vmovn.predicated.v8i16.v4i32.v4i1(<8 x i16> [[A:%.*]], <4 x i32> [[B:%.*]], i32 0, <4 x i1> [[TMP1]])
+// BE-NEXT: ret <8 x i16> [[TMP2]]
+//
+int16x8_t test_vmovnbq_m_s32(int16x8_t a, int32x4_t b, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vmovnbq_m(a, b, p);
+#else /* POLYMORPHIC */
+ return vmovnbq_m_s32(a, b, p);
+#endif /* POLYMORPHIC */
+}
+
+// LE-LABEL: @test_vmovnbq_m_u16(
+// LE-NEXT: entry:
+// LE-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// LE-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// LE-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vmovn.predicated.v16i8.v8i16.v8i1(<16 x i8> [[A:%.*]], <8 x i16> [[B:%.*]], i32 0, <8 x i1> [[TMP1]])
+// LE-NEXT: ret <16 x i8> [[TMP2]]
+//
+// BE-LABEL: @test_vmovnbq_m_u16(
+// BE-NEXT: entry:
+// BE-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// BE-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// BE-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vmovn.predicated.v16i8.v8i16.v8i1(<16 x i8> [[A:%.*]], <8 x i16> [[B:%.*]], i32 0, <8 x i1> [[TMP1]])
+// BE-NEXT: ret <16 x i8> [[TMP2]]
+//
+uint8x16_t test_vmovnbq_m_u16(uint8x16_t a, uint16x8_t b, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vmovnbq_m(a, b, p);
+#else /* POLYMORPHIC */
+ return vmovnbq_m_u16(a, b, p);
+#endif /* POLYMORPHIC */
+}
+
+// LE-LABEL: @test_vmovnbq_m_u32(
+// LE-NEXT: entry:
+// LE-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// LE-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// LE-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vmovn.predicated.v8i16.v4i32.v4i1(<8 x i16> [[A:%.*]], <4 x i32> [[B:%.*]], i32 0, <4 x i1> [[TMP1]])
+// LE-NEXT: ret <8 x i16> [[TMP2]]
+//
+// BE-LABEL: @test_vmovnbq_m_u32(
+// BE-NEXT: entry:
+// BE-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// BE-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// BE-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vmovn.predicated.v8i16.v4i32.v4i1(<8 x i16> [[A:%.*]], <4 x i32> [[B:%.*]], i32 0, <4 x i1> [[TMP1]])
+// BE-NEXT: ret <8 x i16> [[TMP2]]
+//
+uint16x8_t test_vmovnbq_m_u32(uint16x8_t a, uint32x4_t b, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vmovnbq_m(a, b, p);
+#else /* POLYMORPHIC */
+ return vmovnbq_m_u32(a, b, p);
+#endif /* POLYMORPHIC */
+}
+
+// LE-LABEL: @test_vmovntq_m_s16(
+// LE-NEXT: entry:
+// LE-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// LE-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// LE-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vmovn.predicated.v16i8.v8i16.v8i1(<16 x i8> [[A:%.*]], <8 x i16> [[B:%.*]], i32 1, <8 x i1> [[TMP1]])
+// LE-NEXT: ret <16 x i8> [[TMP2]]
+//
+// BE-LABEL: @test_vmovntq_m_s16(
+// BE-NEXT: entry:
+// BE-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// BE-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// BE-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vmovn.predicated.v16i8.v8i16.v8i1(<16 x i8> [[A:%.*]], <8 x i16> [[B:%.*]], i32 1, <8 x i1> [[TMP1]])
+// BE-NEXT: ret <16 x i8> [[TMP2]]
+//
+int8x16_t test_vmovntq_m_s16(int8x16_t a, int16x8_t b, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vmovntq_m(a, b, p);
+#else /* POLYMORPHIC */
+ return vmovntq_m_s16(a, b, p);
+#endif /* POLYMORPHIC */
+}
+
+// LE-LABEL: @test_vmovntq_m_s32(
+// LE-NEXT: entry:
+// LE-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// LE-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// LE-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vmovn.predicated.v8i16.v4i32.v4i1(<8 x i16> [[A:%.*]], <4 x i32> [[B:%.*]], i32 1, <4 x i1> [[TMP1]])
+// LE-NEXT: ret <8 x i16> [[TMP2]]
+//
+// BE-LABEL: @test_vmovntq_m_s32(
+// BE-NEXT: entry:
+// BE-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// BE-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// BE-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vmovn.predicated.v8i16.v4i32.v4i1(<8 x i16> [[A:%.*]], <4 x i32> [[B:%.*]], i32 1, <4 x i1> [[TMP1]])
+// BE-NEXT: ret <8 x i16> [[TMP2]]
+//
+int16x8_t test_vmovntq_m_s32(int16x8_t a, int32x4_t b, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vmovntq_m(a, b, p);
+#else /* POLYMORPHIC */
+ return vmovntq_m_s32(a, b, p);
+#endif /* POLYMORPHIC */
+}
+
+// LE-LABEL: @test_vmovntq_m_u16(
+// LE-NEXT: entry:
+// LE-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// LE-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// LE-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vmovn.predicated.v16i8.v8i16.v8i1(<16 x i8> [[A:%.*]], <8 x i16> [[B:%.*]], i32 1, <8 x i1> [[TMP1]])
+// LE-NEXT: ret <16 x i8> [[TMP2]]
+//
+// BE-LABEL: @test_vmovntq_m_u16(
+// BE-NEXT: entry:
+// BE-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// BE-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// BE-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vmovn.predicated.v16i8.v8i16.v8i1(<16 x i8> [[A:%.*]], <8 x i16> [[B:%.*]], i32 1, <8 x i1> [[TMP1]])
+// BE-NEXT: ret <16 x i8> [[TMP2]]
+//
+uint8x16_t test_vmovntq_m_u16(uint8x16_t a, uint16x8_t b, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vmovntq_m(a, b, p);
+#else /* POLYMORPHIC */
+ return vmovntq_m_u16(a, b, p);
+#endif /* POLYMORPHIC */
+}
+
+// LE-LABEL: @test_vmovntq_m_u32(
+// LE-NEXT: entry:
+// LE-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// LE-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// LE-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vmovn.predicated.v8i16.v4i32.v4i1(<8 x i16> [[A:%.*]], <4 x i32> [[B:%.*]], i32 1, <4 x i1> [[TMP1]])
+// LE-NEXT: ret <8 x i16> [[TMP2]]
+//
+// BE-LABEL: @test_vmovntq_m_u32(
+// BE-NEXT: entry:
+// BE-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// BE-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// BE-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vmovn.predicated.v8i16.v4i32.v4i1(<8 x i16> [[A:%.*]], <4 x i32> [[B:%.*]], i32 1, <4 x i1> [[TMP1]])
+// BE-NEXT: ret <8 x i16> [[TMP2]]
+//
+uint16x8_t test_vmovntq_m_u32(uint16x8_t a, uint32x4_t b, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vmovntq_m(a, b, p);
+#else /* POLYMORPHIC */
+ return vmovntq_m_u32(a, b, p);
+#endif /* POLYMORPHIC */
+}
return vrev64q_u32(a);
#endif /* POLYMORPHIC */
}
+
+// CHECK-LABEL: @test_vrev16q_m_s8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vrev.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 16, <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <16 x i8> [[TMP2]]
+//
+int8x16_t test_vrev16q_m_s8(int8x16_t inactive, int8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vrev16q_m(inactive, a, p);
+#else /* POLYMORPHIC */
+ return vrev16q_m_s8(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrev16q_m_u8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vrev.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 16, <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <16 x i8> [[TMP2]]
+//
+uint8x16_t test_vrev16q_m_u8(uint8x16_t inactive, uint8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vrev16q_m(inactive, a, p);
+#else /* POLYMORPHIC */
+ return vrev16q_m_u8(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrev32q_m_f16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrev.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], i32 32, <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <8 x half> [[TMP2]]
+//
+float16x8_t test_vrev32q_m_f16(float16x8_t inactive, float16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vrev32q_m(inactive, a, p);
+#else /* POLYMORPHIC */
+ return vrev32q_m_f16(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrev32q_m_s8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vrev.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 32, <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <16 x i8> [[TMP2]]
+//
+int8x16_t test_vrev32q_m_s8(int8x16_t inactive, int8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vrev32q_m(inactive, a, p);
+#else /* POLYMORPHIC */
+ return vrev32q_m_s8(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrev32q_m_s16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vrev.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 32, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
+int16x8_t test_vrev32q_m_s16(int16x8_t inactive, int16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vrev32q_m(inactive, a, p);
+#else /* POLYMORPHIC */
+ return vrev32q_m_s16(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrev32q_m_u8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vrev.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 32, <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <16 x i8> [[TMP2]]
+//
+uint8x16_t test_vrev32q_m_u8(uint8x16_t inactive, uint8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vrev32q_m(inactive, a, p);
+#else /* POLYMORPHIC */
+ return vrev32q_m_u8(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrev32q_m_u16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vrev.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 32, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
+uint16x8_t test_vrev32q_m_u16(uint16x8_t inactive, uint16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vrev32q_m(inactive, a, p);
+#else /* POLYMORPHIC */
+ return vrev32q_m_u16(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrev64q_m_f16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrev.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], i32 64, <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <8 x half> [[TMP2]]
+//
+float16x8_t test_vrev64q_m_f16(float16x8_t inactive, float16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vrev64q_m(inactive, a, p);
+#else /* POLYMORPHIC */
+ return vrev64q_m_f16(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrev64q_m_f32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vrev.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], i32 64, <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <4 x float> [[TMP2]]
+//
+float32x4_t test_vrev64q_m_f32(float32x4_t inactive, float32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vrev64q_m(inactive, a, p);
+#else /* POLYMORPHIC */
+ return vrev64q_m_f32(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrev64q_m_s8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vrev.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 64, <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <16 x i8> [[TMP2]]
+//
+int8x16_t test_vrev64q_m_s8(int8x16_t inactive, int8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vrev64q_m(inactive, a, p);
+#else /* POLYMORPHIC */
+ return vrev64q_m_s8(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrev64q_m_s16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vrev.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 64, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
+int16x8_t test_vrev64q_m_s16(int16x8_t inactive, int16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vrev64q_m(inactive, a, p);
+#else /* POLYMORPHIC */
+ return vrev64q_m_s16(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrev64q_m_s32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vrev.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 64, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
+int32x4_t test_vrev64q_m_s32(int32x4_t inactive, int32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vrev64q_m(inactive, a, p);
+#else /* POLYMORPHIC */
+ return vrev64q_m_s32(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrev64q_m_u8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vrev.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 64, <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <16 x i8> [[TMP2]]
+//
+uint8x16_t test_vrev64q_m_u8(uint8x16_t inactive, uint8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vrev64q_m(inactive, a, p);
+#else /* POLYMORPHIC */
+ return vrev64q_m_u8(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrev64q_m_u16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vrev.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 64, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
+uint16x8_t test_vrev64q_m_u16(uint16x8_t inactive, uint16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vrev64q_m(inactive, a, p);
+#else /* POLYMORPHIC */
+ return vrev64q_m_u16(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrev64q_m_u32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vrev.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 64, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
+uint32x4_t test_vrev64q_m_u32(uint32x4_t inactive, uint32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vrev64q_m(inactive, a, p);
+#else /* POLYMORPHIC */
+ return vrev64q_m_u32(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrev16q_x_s8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vrev.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 16, <16 x i1> [[TMP1]], <16 x i8> undef)
+// CHECK-NEXT: ret <16 x i8> [[TMP2]]
+//
+int8x16_t test_vrev16q_x_s8(int8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vrev16q_x(a, p);
+#else /* POLYMORPHIC */
+ return vrev16q_x_s8(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrev16q_x_u8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vrev.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 16, <16 x i1> [[TMP1]], <16 x i8> undef)
+// CHECK-NEXT: ret <16 x i8> [[TMP2]]
+//
+uint8x16_t test_vrev16q_x_u8(uint8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vrev16q_x(a, p);
+#else /* POLYMORPHIC */
+ return vrev16q_x_u8(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrev32q_x_f16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrev.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], i32 32, <8 x i1> [[TMP1]], <8 x half> undef)
+// CHECK-NEXT: ret <8 x half> [[TMP2]]
+//
+float16x8_t test_vrev32q_x_f16(float16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vrev32q_x(a, p);
+#else /* POLYMORPHIC */
+ return vrev32q_x_f16(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrev32q_x_s8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vrev.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 32, <16 x i1> [[TMP1]], <16 x i8> undef)
+// CHECK-NEXT: ret <16 x i8> [[TMP2]]
+//
+int8x16_t test_vrev32q_x_s8(int8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vrev32q_x(a, p);
+#else /* POLYMORPHIC */
+ return vrev32q_x_s8(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrev32q_x_s16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vrev.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 32, <8 x i1> [[TMP1]], <8 x i16> undef)
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
+int16x8_t test_vrev32q_x_s16(int16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vrev32q_x(a, p);
+#else /* POLYMORPHIC */
+ return vrev32q_x_s16(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrev32q_x_u8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vrev.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 32, <16 x i1> [[TMP1]], <16 x i8> undef)
+// CHECK-NEXT: ret <16 x i8> [[TMP2]]
+//
+uint8x16_t test_vrev32q_x_u8(uint8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vrev32q_x(a, p);
+#else /* POLYMORPHIC */
+ return vrev32q_x_u8(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrev32q_x_u16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vrev.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 32, <8 x i1> [[TMP1]], <8 x i16> undef)
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
+uint16x8_t test_vrev32q_x_u16(uint16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vrev32q_x(a, p);
+#else /* POLYMORPHIC */
+ return vrev32q_x_u16(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrev64q_x_f16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrev.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], i32 64, <8 x i1> [[TMP1]], <8 x half> undef)
+// CHECK-NEXT: ret <8 x half> [[TMP2]]
+//
+float16x8_t test_vrev64q_x_f16(float16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vrev64q_x(a, p);
+#else /* POLYMORPHIC */
+ return vrev64q_x_f16(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrev64q_x_f32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vrev.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], i32 64, <4 x i1> [[TMP1]], <4 x float> undef)
+// CHECK-NEXT: ret <4 x float> [[TMP2]]
+//
+float32x4_t test_vrev64q_x_f32(float32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vrev64q_x(a, p);
+#else /* POLYMORPHIC */
+ return vrev64q_x_f32(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrev64q_x_s8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vrev.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 64, <16 x i1> [[TMP1]], <16 x i8> undef)
+// CHECK-NEXT: ret <16 x i8> [[TMP2]]
+//
+int8x16_t test_vrev64q_x_s8(int8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vrev64q_x(a, p);
+#else /* POLYMORPHIC */
+ return vrev64q_x_s8(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrev64q_x_s16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vrev.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 64, <8 x i1> [[TMP1]], <8 x i16> undef)
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
+int16x8_t test_vrev64q_x_s16(int16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vrev64q_x(a, p);
+#else /* POLYMORPHIC */
+ return vrev64q_x_s16(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrev64q_x_s32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vrev.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 64, <4 x i1> [[TMP1]], <4 x i32> undef)
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
+int32x4_t test_vrev64q_x_s32(int32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vrev64q_x(a, p);
+#else /* POLYMORPHIC */
+ return vrev64q_x_s32(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrev64q_x_u8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vrev.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 64, <16 x i1> [[TMP1]], <16 x i8> undef)
+// CHECK-NEXT: ret <16 x i8> [[TMP2]]
+//
+uint8x16_t test_vrev64q_x_u8(uint8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vrev64q_x(a, p);
+#else /* POLYMORPHIC */
+ return vrev64q_x_u8(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrev64q_x_u16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vrev.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 64, <8 x i1> [[TMP1]], <8 x i16> undef)
+// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+//
+uint16x8_t test_vrev64q_x_u16(uint16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vrev64q_x(a, p);
+#else /* POLYMORPHIC */
+ return vrev64q_x_u16(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrev64q_x_u32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vrev.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 64, <4 x i1> [[TMP1]], <4 x i32> undef)
+// CHECK-NEXT: ret <4 x i32> [[TMP2]]
+//
+uint32x4_t test_vrev64q_x_u32(uint32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vrev64q_x(a, p);
+#else /* POLYMORPHIC */
+ return vrev64q_x_u32(a, p);
+#endif /* POLYMORPHIC */
+}
return vrndnq_f32(a);
#endif /* POLYMORPHIC */
}
+
+// CHECK-LABEL: @test_vrndaq_m_f16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrinta.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <8 x half> [[TMP2]]
+//
+float16x8_t test_vrndaq_m_f16(float16x8_t inactive, float16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vrndaq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+ return vrndaq_m_f16(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrndaq_m_f32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vrinta.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <4 x float> [[TMP2]]
+//
+float32x4_t test_vrndaq_m_f32(float32x4_t inactive, float32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vrndaq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+ return vrndaq_m_f32(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrndmq_m_f16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrintm.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <8 x half> [[TMP2]]
+//
+float16x8_t test_vrndmq_m_f16(float16x8_t inactive, float16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vrndmq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+ return vrndmq_m_f16(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrndmq_m_f32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vrintm.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <4 x float> [[TMP2]]
+//
+float32x4_t test_vrndmq_m_f32(float32x4_t inactive, float32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vrndmq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+ return vrndmq_m_f32(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrndnq_m_f16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrintn.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <8 x half> [[TMP2]]
+//
+float16x8_t test_vrndnq_m_f16(float16x8_t inactive, float16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vrndnq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+ return vrndnq_m_f16(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrndnq_m_f32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vrintn.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <4 x float> [[TMP2]]
+//
+float32x4_t test_vrndnq_m_f32(float32x4_t inactive, float32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vrndnq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+ return vrndnq_m_f32(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrndpq_m_f16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrintp.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <8 x half> [[TMP2]]
+//
+float16x8_t test_vrndpq_m_f16(float16x8_t inactive, float16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vrndpq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+ return vrndpq_m_f16(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrndpq_m_f32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vrintp.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <4 x float> [[TMP2]]
+//
+float32x4_t test_vrndpq_m_f32(float32x4_t inactive, float32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vrndpq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+ return vrndpq_m_f32(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrndq_m_f16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrintz.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <8 x half> [[TMP2]]
+//
+float16x8_t test_vrndq_m_f16(float16x8_t inactive, float16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vrndq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+ return vrndq_m_f16(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrndq_m_f32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vrintz.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <4 x float> [[TMP2]]
+//
+float32x4_t test_vrndq_m_f32(float32x4_t inactive, float32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vrndq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+ return vrndq_m_f32(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrndxq_m_f16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrintx.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <8 x half> [[TMP2]]
+//
+float16x8_t test_vrndxq_m_f16(float16x8_t inactive, float16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vrndxq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+ return vrndxq_m_f16(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrndxq_m_f32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vrintx.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]])
+// CHECK-NEXT: ret <4 x float> [[TMP2]]
+//
+float32x4_t test_vrndxq_m_f32(float32x4_t inactive, float32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vrndxq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+ return vrndxq_m_f32(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrndaq_x_f16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrinta.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> undef)
+// CHECK-NEXT: ret <8 x half> [[TMP2]]
+//
+float16x8_t test_vrndaq_x_f16(float16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vrndaq_x(a, p);
+#else /* POLYMORPHIC */
+ return vrndaq_x_f16(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrndaq_x_f32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vrinta.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> undef)
+// CHECK-NEXT: ret <4 x float> [[TMP2]]
+//
+float32x4_t test_vrndaq_x_f32(float32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vrndaq_x(a, p);
+#else /* POLYMORPHIC */
+ return vrndaq_x_f32(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrndmq_x_f16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrintm.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> undef)
+// CHECK-NEXT: ret <8 x half> [[TMP2]]
+//
+float16x8_t test_vrndmq_x_f16(float16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vrndmq_x(a, p);
+#else /* POLYMORPHIC */
+ return vrndmq_x_f16(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrndmq_x_f32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vrintm.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> undef)
+// CHECK-NEXT: ret <4 x float> [[TMP2]]
+//
+float32x4_t test_vrndmq_x_f32(float32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vrndmq_x(a, p);
+#else /* POLYMORPHIC */
+ return vrndmq_x_f32(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrndnq_x_f16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrintn.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> undef)
+// CHECK-NEXT: ret <8 x half> [[TMP2]]
+//
+float16x8_t test_vrndnq_x_f16(float16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vrndnq_x(a, p);
+#else /* POLYMORPHIC */
+ return vrndnq_x_f16(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrndnq_x_f32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vrintn.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> undef)
+// CHECK-NEXT: ret <4 x float> [[TMP2]]
+//
+float32x4_t test_vrndnq_x_f32(float32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vrndnq_x(a, p);
+#else /* POLYMORPHIC */
+ return vrndnq_x_f32(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrndpq_x_f16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrintp.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> undef)
+// CHECK-NEXT: ret <8 x half> [[TMP2]]
+//
+float16x8_t test_vrndpq_x_f16(float16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vrndpq_x(a, p);
+#else /* POLYMORPHIC */
+ return vrndpq_x_f16(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrndpq_x_f32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vrintp.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> undef)
+// CHECK-NEXT: ret <4 x float> [[TMP2]]
+//
+float32x4_t test_vrndpq_x_f32(float32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vrndpq_x(a, p);
+#else /* POLYMORPHIC */
+ return vrndpq_x_f32(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrndq_x_f16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrintz.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> undef)
+// CHECK-NEXT: ret <8 x half> [[TMP2]]
+//
+float16x8_t test_vrndq_x_f16(float16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vrndq_x(a, p);
+#else /* POLYMORPHIC */
+ return vrndq_x_f16(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrndq_x_f32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vrintz.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> undef)
+// CHECK-NEXT: ret <4 x float> [[TMP2]]
+//
+float32x4_t test_vrndq_x_f32(float32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vrndq_x(a, p);
+#else /* POLYMORPHIC */
+ return vrndq_x_f32(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrndxq_x_f16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrintx.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> undef)
+// CHECK-NEXT: ret <8 x half> [[TMP2]]
+//
+float16x8_t test_vrndxq_x_f16(float16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vrndxq_x(a, p);
+#else /* POLYMORPHIC */
+ return vrndxq_x_f16(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrndxq_x_f32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vrintx.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> undef)
+// CHECK-NEXT: ret <4 x float> [[TMP2]]
+//
+float32x4_t test_vrndxq_x_f32(float32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+ return vrndxq_x(a, p);
+#else /* POLYMORPHIC */
+ return vrndxq_x_f32(a, p);
+#endif /* POLYMORPHIC */
+}
+
[llvm_anyvector_ty /* input vector */, llvm_i32_ty /* scale */],
LLVMMatchType<0>, llvm_anyvector_ty>;
+def int_arm_mve_vcvt_fp_int_predicated: Intrinsic<
+ [llvm_anyvector_ty], [llvm_anyvector_ty, llvm_i32_ty /* unsigned */,
+ llvm_anyvector_ty /* predicate */, LLVMMatchType<0> /* inactive */],
+ [IntrNoMem]>;
+
def int_arm_mve_vrintn: Intrinsic<
[llvm_anyvector_ty], [LLVMMatchType<0>], [IntrNoMem]>;
def int_arm_mve_vcls: Intrinsic<
LLVMMatchType<0>],
[IntrNoMem]>;
+class MVESimpleUnaryPredicated: Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>], [IntrNoMem]>;
+
+def int_arm_mve_mvn_predicated: MVESimpleUnaryPredicated;
+def int_arm_mve_abs_predicated: MVESimpleUnaryPredicated;
+def int_arm_mve_neg_predicated: MVESimpleUnaryPredicated;
+def int_arm_mve_qabs_predicated: MVESimpleUnaryPredicated;
+def int_arm_mve_qneg_predicated: MVESimpleUnaryPredicated;
+def int_arm_mve_clz_predicated: MVESimpleUnaryPredicated;
+def int_arm_mve_cls_predicated: MVESimpleUnaryPredicated;
+def int_arm_mve_vrintz_predicated: MVESimpleUnaryPredicated;
+def int_arm_mve_vrintm_predicated: MVESimpleUnaryPredicated;
+def int_arm_mve_vrintp_predicated: MVESimpleUnaryPredicated;
+def int_arm_mve_vrinta_predicated: MVESimpleUnaryPredicated;
+def int_arm_mve_vrintx_predicated: MVESimpleUnaryPredicated;
+def int_arm_mve_vrintn_predicated: MVESimpleUnaryPredicated;
+
+def int_arm_mve_vrev_predicated: Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, llvm_i32_ty /* size to reverse */,
+ llvm_anyvector_ty, LLVMMatchType<0>], [IntrNoMem]>;
+
+def int_arm_mve_vmovl_predicated: Intrinsic<[llvm_anyvector_ty],
+ [llvm_anyvector_ty, llvm_i32_ty /* unsigned */, llvm_i32_ty /* top half */,
+ llvm_anyvector_ty /* predicate */, LLVMMatchType<0>], [IntrNoMem]>;
+def int_arm_mve_vmovn_predicated: Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, llvm_anyvector_ty, llvm_i32_ty /* top half */,
+ llvm_anyvector_ty /* predicate */], [IntrNoMem]>;
+
} // end TargetPrefix
(v4i32 (MVE_VREV32_8 (v4i32 MQPR:$src)))>;
}
-let Predicates = [HasMVEInt] in {
- def : Pat<(v4i32 (ARMvrev64 (v4i32 MQPR:$src))),
- (v4i32 (MVE_VREV64_32 (v4i32 MQPR:$src)))>;
- def : Pat<(v8i16 (ARMvrev64 (v8i16 MQPR:$src))),
- (v8i16 (MVE_VREV64_16 (v8i16 MQPR:$src)))>;
- def : Pat<(v16i8 (ARMvrev64 (v16i8 MQPR:$src))),
- (v16i8 (MVE_VREV64_8 (v16i8 MQPR:$src)))>;
+multiclass MVE_VREV_basic_patterns<int revbits, list<MVEVectorVTInfo> VTIs,
+ Instruction Inst> {
+ defvar unpred_op = !cast<SDNode>("ARMvrev" # revbits);
+
+ foreach VTI = VTIs in {
+ def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$src))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$src)))>;
+ def : Pat<(VTI.Vec (int_arm_mve_vrev_predicated (VTI.Vec MQPR:$src),
+ revbits, (VTI.Pred VCCR:$pred), (VTI.Vec MQPR:$inactive))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$src), ARMVCCThen,
+ (VTI.Pred VCCR:$pred), (VTI.Vec MQPR:$inactive)))>;
+ }
+}
- def : Pat<(v8i16 (ARMvrev32 (v8i16 MQPR:$src))),
- (v8i16 (MVE_VREV32_16 (v8i16 MQPR:$src)))>;
- def : Pat<(v16i8 (ARMvrev32 (v16i8 MQPR:$src))),
- (v16i8 (MVE_VREV32_8 (v16i8 MQPR:$src)))>;
+let Predicates = [HasMVEInt] in {
+ defm: MVE_VREV_basic_patterns<64, [MVE_v4i32, MVE_v4f32], MVE_VREV64_32>;
+ defm: MVE_VREV_basic_patterns<64, [MVE_v8i16, MVE_v8f16], MVE_VREV64_16>;
+ defm: MVE_VREV_basic_patterns<64, [MVE_v16i8 ], MVE_VREV64_8>;
- def : Pat<(v16i8 (ARMvrev16 (v16i8 MQPR:$src))),
- (v16i8 (MVE_VREV16_8 (v16i8 MQPR:$src)))>;
+ defm: MVE_VREV_basic_patterns<32, [MVE_v8i16, MVE_v8f16], MVE_VREV32_16>;
+ defm: MVE_VREV_basic_patterns<32, [MVE_v16i8 ], MVE_VREV32_8>;
- def : Pat<(v4f32 (ARMvrev64 (v4f32 MQPR:$src))),
- (v4f32 (MVE_VREV64_32 (v4f32 MQPR:$src)))>;
- def : Pat<(v8f16 (ARMvrev64 (v8f16 MQPR:$src))),
- (v8f16 (MVE_VREV64_16 (v8f16 MQPR:$src)))>;
- def : Pat<(v8f16 (ARMvrev32 (v8f16 MQPR:$src))),
- (v8f16 (MVE_VREV32_16 (v8f16 MQPR:$src)))>;
+ defm: MVE_VREV_basic_patterns<16, [MVE_v16i8 ], MVE_VREV16_8>;
}
def MVE_VMVN : MVE_bit_arith<(outs MQPR:$Qd), (ins MQPR:$Qm),
}
let Predicates = [HasMVEInt] in {
- def : Pat<(v16i8 (vnotq (v16i8 MQPR:$val1))),
- (v16i8 (MVE_VMVN (v16i8 MQPR:$val1)))>;
- def : Pat<(v8i16 (vnotq (v8i16 MQPR:$val1))),
- (v8i16 (MVE_VMVN (v8i16 MQPR:$val1)))>;
- def : Pat<(v4i32 (vnotq (v4i32 MQPR:$val1))),
- (v4i32 (MVE_VMVN (v4i32 MQPR:$val1)))>;
- def : Pat<(v2i64 (vnotq (v2i64 MQPR:$val1))),
- (v2i64 (MVE_VMVN (v2i64 MQPR:$val1)))>;
+ foreach VTI = [ MVE_v16i8, MVE_v8i16, MVE_v4i32, MVE_v2i64 ] in {
+ def : Pat<(VTI.Vec (vnotq (VTI.Vec MQPR:$val1))),
+ (VTI.Vec (MVE_VMVN (VTI.Vec MQPR:$val1)))>;
+ def : Pat<(VTI.Vec (int_arm_mve_mvn_predicated (VTI.Vec MQPR:$val1),
+ (VTI.Pred VCCR:$pred), (VTI.Vec MQPR:$inactive))),
+ (VTI.Vec (MVE_VMVN (VTI.Vec MQPR:$val1), ARMVCCThen,
+ (VTI.Pred VCCR:$pred), (VTI.Vec MQPR:$inactive)))>;
+ }
}
class MVE_bit_ops<string iname, bits<2> bit_21_20, bit bit_28>
let validForTailPredication = 1;
}
-def MVE_VCLSs8 : MVE_VCLSCLZ<"vcls", "s8", 0b00, 0b0>;
-def MVE_VCLSs16 : MVE_VCLSCLZ<"vcls", "s16", 0b01, 0b0>;
-def MVE_VCLSs32 : MVE_VCLSCLZ<"vcls", "s32", 0b10, 0b0>;
+multiclass MVE_VCLSCLZ_p<string opname, bit opcode, MVEVectorVTInfo VTI,
+ SDNode unpred_op> {
+ def "": MVE_VCLSCLZ<"v"#opname, VTI.Suffix, VTI.Size, opcode>;
-def MVE_VCLZs8 : MVE_VCLSCLZ<"vclz", "i8", 0b00, 0b1>;
-def MVE_VCLZs16 : MVE_VCLSCLZ<"vclz", "i16", 0b01, 0b1>;
-def MVE_VCLZs32 : MVE_VCLSCLZ<"vclz", "i32", 0b10, 0b1>;
+ defvar Inst = !cast<Instruction>(NAME);
+ defvar pred_int = !cast<Intrinsic>("int_arm_mve_"#opname#"_predicated");
-let Predicates = [HasMVEInt] in {
- def : Pat<(v16i8 ( ctlz (v16i8 MQPR:$val1))),
- (v16i8 ( MVE_VCLZs8 (v16i8 MQPR:$val1)))>;
- def : Pat<(v4i32 ( ctlz (v4i32 MQPR:$val1))),
- (v4i32 ( MVE_VCLZs32 (v4i32 MQPR:$val1)))>;
- def : Pat<(v8i16 ( ctlz (v8i16 MQPR:$val1))),
- (v8i16 ( MVE_VCLZs16 (v8i16 MQPR:$val1)))>;
-
- def : Pat<(v16i8 ( int_arm_mve_vcls (v16i8 MQPR:$val1))),
- (v16i8 ( MVE_VCLSs8 (v16i8 MQPR:$val1)))>;
- def : Pat<(v4i32 ( int_arm_mve_vcls (v4i32 MQPR:$val1))),
- (v4i32 ( MVE_VCLSs32 (v4i32 MQPR:$val1)))>;
- def : Pat<(v8i16 ( int_arm_mve_vcls (v8i16 MQPR:$val1))),
- (v8i16 ( MVE_VCLSs16 (v8i16 MQPR:$val1)))>;
+ let Predicates = [HasMVEInt] in {
+ def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$val))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$val)))>;
+ def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$val), (VTI.Pred VCCR:$pred),
+ (VTI.Vec MQPR:$inactive))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$val), ARMVCCThen,
+ (VTI.Pred VCCR:$pred), (VTI.Vec MQPR:$inactive)))>;
+ }
}
+defm MVE_VCLSs8 : MVE_VCLSCLZ_p<"cls", 0, MVE_v16s8, int_arm_mve_vcls>;
+defm MVE_VCLSs16 : MVE_VCLSCLZ_p<"cls", 0, MVE_v8s16, int_arm_mve_vcls>;
+defm MVE_VCLSs32 : MVE_VCLSCLZ_p<"cls", 0, MVE_v4s32, int_arm_mve_vcls>;
+
+defm MVE_VCLZs8 : MVE_VCLSCLZ_p<"clz", 1, MVE_v16i8, ctlz>;
+defm MVE_VCLZs16 : MVE_VCLSCLZ_p<"clz", 1, MVE_v8i16, ctlz>;
+defm MVE_VCLZs32 : MVE_VCLSCLZ_p<"clz", 1, MVE_v4i32, ctlz>;
+
class MVE_VABSNEG_int<string iname, string suffix, bits<2> size, bit negate,
- list<dag> pattern=[]>
+ bit saturate, list<dag> pattern=[]>
: MVEIntSingleSrc<iname, suffix, size, pattern> {
let Inst{28} = 0b1;
let Inst{25-23} = 0b111;
let Inst{21-20} = 0b11;
- let Inst{17-16} = 0b01;
- let Inst{12-8} = 0b00011;
+ let Inst{17} = 0b0;
+ let Inst{16} = !eq(saturate, 0);
+ let Inst{12-11} = 0b00;
+ let Inst{10} = saturate;
+ let Inst{9-8} = 0b11;
let Inst{7} = negate;
let Inst{6} = 0b1;
let Inst{4} = 0b0;
let validForTailPredication = 1;
}
-def MVE_VABSs8 : MVE_VABSNEG_int<"vabs", "s8", 0b00, 0b0>;
-def MVE_VABSs16 : MVE_VABSNEG_int<"vabs", "s16", 0b01, 0b0>;
-def MVE_VABSs32 : MVE_VABSNEG_int<"vabs", "s32", 0b10, 0b0>;
-
-let Predicates = [HasMVEInt] in {
- def : Pat<(v16i8 (abs (v16i8 MQPR:$v))),
- (v16i8 (MVE_VABSs8 $v))>;
- def : Pat<(v8i16 (abs (v8i16 MQPR:$v))),
- (v8i16 (MVE_VABSs16 $v))>;
- def : Pat<(v4i32 (abs (v4i32 MQPR:$v))),
- (v4i32 (MVE_VABSs32 $v))>;
-}
+multiclass MVE_VABSNEG_int_m<string iname, bit negate, bit saturate,
+ SDNode unpred_op, Intrinsic pred_int,
+ MVEVectorVTInfo VTI> {
+ def "" : MVE_VABSNEG_int<iname, VTI.Suffix, VTI.Size, negate, saturate>;
+ defvar Inst = !cast<Instruction>(NAME);
-def MVE_VNEGs8 : MVE_VABSNEG_int<"vneg", "s8", 0b00, 0b1>;
-def MVE_VNEGs16 : MVE_VABSNEG_int<"vneg", "s16", 0b01, 0b1>;
-def MVE_VNEGs32 : MVE_VABSNEG_int<"vneg", "s32", 0b10, 0b1>;
+ let Predicates = [HasMVEInt] in {
+ // VQABS and VQNEG have more difficult isel patterns defined elsewhere
+ if !eq(saturate, 0) then {
+ def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$v))), (VTI.Vec (Inst $v))>;
+ }
-let Predicates = [HasMVEInt] in {
- def : Pat<(v16i8 (vnegq (v16i8 MQPR:$v))),
- (v16i8 (MVE_VNEGs8 $v))>;
- def : Pat<(v8i16 (vnegq (v8i16 MQPR:$v))),
- (v8i16 (MVE_VNEGs16 $v))>;
- def : Pat<(v4i32 (vnegq (v4i32 MQPR:$v))),
- (v4i32 (MVE_VNEGs32 $v))>;
+ def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$v), (VTI.Pred VCCR:$mask),
+ (VTI.Vec MQPR:$inactive))),
+ (VTI.Vec (Inst $v, ARMVCCThen, $mask, $inactive))>;
+ }
}
-class MVE_VQABSNEG<string iname, string suffix, bits<2> size,
- bit negate, list<dag> pattern=[]>
- : MVEIntSingleSrc<iname, suffix, size, pattern> {
-
- let Inst{28} = 0b1;
- let Inst{25-23} = 0b111;
- let Inst{21-20} = 0b11;
- let Inst{17-16} = 0b00;
- let Inst{12-8} = 0b00111;
- let Inst{7} = negate;
- let Inst{6} = 0b1;
- let Inst{4} = 0b0;
- let Inst{0} = 0b0;
- let validForTailPredication = 1;
+foreach VTI = [ MVE_v16s8, MVE_v8s16, MVE_v4s32 ] in {
+ defm "MVE_VABS" # VTI.Suffix : MVE_VABSNEG_int_m<
+ "vabs", 0, 0, abs, int_arm_mve_abs_predicated, VTI>;
+ defm "MVE_VQABS" # VTI.Suffix : MVE_VABSNEG_int_m<
+ "vqabs", 0, 1, ?, int_arm_mve_qabs_predicated, VTI>;
+ defm "MVE_VNEG" # VTI.Suffix : MVE_VABSNEG_int_m<
+ "vneg", 1, 0, vnegq, int_arm_mve_neg_predicated, VTI>;
+ defm "MVE_VQNEG" # VTI.Suffix : MVE_VABSNEG_int_m<
+ "vqneg", 1, 1, ?, int_arm_mve_qneg_predicated, VTI>;
}
-def MVE_VQABSs8 : MVE_VQABSNEG<"vqabs", "s8", 0b00, 0b0>;
-def MVE_VQABSs16 : MVE_VQABSNEG<"vqabs", "s16", 0b01, 0b0>;
-def MVE_VQABSs32 : MVE_VQABSNEG<"vqabs", "s32", 0b10, 0b0>;
-
-def MVE_VQNEGs8 : MVE_VQABSNEG<"vqneg", "s8", 0b00, 0b1>;
-def MVE_VQNEGs16 : MVE_VQABSNEG<"vqneg", "s16", 0b01, 0b1>;
-def MVE_VQNEGs32 : MVE_VQABSNEG<"vqneg", "s32", 0b10, 0b1>;
-
// int_min/int_max: vector containing INT_MIN/INT_MAX VTI.Size times
// zero_vec: v4i32-initialized zero vector, potentially wrapped in a bitconvert
multiclass vqabsneg_pattern<MVEVectorVTInfo VTI, dag int_min, dag int_max,
- dag zero_vec, MVE_VQABSNEG vqabs_instruction,
- MVE_VQABSNEG vqneg_instruction> {
+ dag zero_vec, MVE_VABSNEG_int vqabs_instruction,
+ MVE_VABSNEG_int vqneg_instruction> {
let Predicates = [HasMVEInt] in {
// The below tree can be replaced by a vqabs instruction, as it represents
// the following vectorized expression (r being the value in $reg):
let Inst{3-1} = Qm{2-0};
}
-class MVE_VMOVL<string iname, string suffix, bits<2> sz, bit U,
+class MVE_VMOVL<string iname, string suffix, bits<2> sz, bit U, bit top,
list<dag> pattern=[]>
: MVE_shift_imm<(outs MQPR:$Qd), (ins MQPR:$Qm),
iname, suffix, "$Qd, $Qm", vpred_r, "",
let Inst{21} = 0b1;
let Inst{20-19} = sz{1-0};
let Inst{18-16} = 0b000;
+ let Inst{12} = top;
let Inst{11-6} = 0b111101;
let Inst{4} = 0b0;
let Inst{0} = 0b0;
}
-multiclass MVE_VMOVL_shift_half<string iname, string suffix, bits<2> sz, bit U,
- list<dag> pattern=[]> {
- def bh : MVE_VMOVL<!strconcat(iname, "b"), suffix, sz, U, pattern> {
- let Inst{12} = 0b0;
- }
- def th : MVE_VMOVL<!strconcat(iname, "t"), suffix, sz, U, pattern> {
- let Inst{12} = 0b1;
- }
+multiclass MVE_VMOVL_m<bit top, string chr, MVEVectorVTInfo OutVTI,
+ MVEVectorVTInfo InVTI> {
+ def "": MVE_VMOVL<"vmovl" # chr, InVTI.Suffix, OutVTI.Size,
+ InVTI.Unsigned, top>;
+ defvar Inst = !cast<Instruction>(NAME);
+
+ def : Pat<(OutVTI.Vec (int_arm_mve_vmovl_predicated (InVTI.Vec MQPR:$src),
+ (i32 InVTI.Unsigned), (i32 top),
+ (OutVTI.Pred VCCR:$pred),
+ (OutVTI.Vec MQPR:$inactive))),
+ (OutVTI.Vec (Inst (InVTI.Vec MQPR:$src), ARMVCCThen,
+ (OutVTI.Pred VCCR:$pred),
+ (OutVTI.Vec MQPR:$inactive)))>;
}
-defm MVE_VMOVLs8 : MVE_VMOVL_shift_half<"vmovl", "s8", 0b01, 0b0>;
-defm MVE_VMOVLu8 : MVE_VMOVL_shift_half<"vmovl", "u8", 0b01, 0b1>;
-defm MVE_VMOVLs16 : MVE_VMOVL_shift_half<"vmovl", "s16", 0b10, 0b0>;
-defm MVE_VMOVLu16 : MVE_VMOVL_shift_half<"vmovl", "u16", 0b10, 0b1>;
+defm MVE_VMOVLs8bh : MVE_VMOVL_m<0, "b", MVE_v8s16, MVE_v16s8>;
+defm MVE_VMOVLs8th : MVE_VMOVL_m<1, "t", MVE_v8s16, MVE_v16s8>;
+defm MVE_VMOVLu8bh : MVE_VMOVL_m<0, "b", MVE_v8u16, MVE_v16u8>;
+defm MVE_VMOVLu8th : MVE_VMOVL_m<1, "t", MVE_v8u16, MVE_v16u8>;
+defm MVE_VMOVLs16bh : MVE_VMOVL_m<0, "b", MVE_v4s32, MVE_v8s16>;
+defm MVE_VMOVLs16th : MVE_VMOVL_m<1, "t", MVE_v4s32, MVE_v8s16>;
+defm MVE_VMOVLu16bh : MVE_VMOVL_m<0, "b", MVE_v4s32, MVE_v8u16>;
+defm MVE_VMOVLu16th : MVE_VMOVL_m<1, "t", MVE_v4s32, MVE_v8u16>;
let Predicates = [HasMVEInt] in {
def : Pat<(sext_inreg (v4i32 MQPR:$src), v4i16),
}
-multiclass MVE_VRINT_ops<string suffix, bits<2> size, list<dag> pattern=[]> {
- def N : MVE_VRINT<"n", 0b000, suffix, size, pattern>;
- def X : MVE_VRINT<"x", 0b001, suffix, size, pattern>;
- def A : MVE_VRINT<"a", 0b010, suffix, size, pattern>;
- def Z : MVE_VRINT<"z", 0b011, suffix, size, pattern>;
- def M : MVE_VRINT<"m", 0b101, suffix, size, pattern>;
- def P : MVE_VRINT<"p", 0b111, suffix, size, pattern>;
-}
+multiclass MVE_VRINT_m<MVEVectorVTInfo VTI, string suffix, bits<3> opcode,
+ SDNode unpred_op> {
+ def "": MVE_VRINT<suffix, opcode, VTI.Suffix, VTI.Size>;
+ defvar Inst = !cast<Instruction>(NAME);
+ defvar pred_int = !cast<Intrinsic>("int_arm_mve_vrint"#suffix#"_predicated");
-defm MVE_VRINTf16 : MVE_VRINT_ops<"f16", 0b01>;
-defm MVE_VRINTf32 : MVE_VRINT_ops<"f32", 0b10>;
+ let Predicates = [HasMVEFloat] in {
+ def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$val))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$val)))>;
+ def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$val), (VTI.Pred VCCR:$pred),
+ (VTI.Vec MQPR:$inactive))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$val), ARMVCCThen,
+ (VTI.Pred VCCR:$pred), (VTI.Vec MQPR:$inactive)))>;
+ }
+}
-let Predicates = [HasMVEFloat] in {
- def : Pat<(v4f32 (frint (v4f32 MQPR:$val1))),
- (v4f32 (MVE_VRINTf32X (v4f32 MQPR:$val1)))>;
- def : Pat<(v8f16 (frint (v8f16 MQPR:$val1))),
- (v8f16 (MVE_VRINTf16X (v8f16 MQPR:$val1)))>;
- def : Pat<(v4f32 (fround (v4f32 MQPR:$val1))),
- (v4f32 (MVE_VRINTf32A (v4f32 MQPR:$val1)))>;
- def : Pat<(v8f16 (fround (v8f16 MQPR:$val1))),
- (v8f16 (MVE_VRINTf16A (v8f16 MQPR:$val1)))>;
- def : Pat<(v4f32 (ftrunc (v4f32 MQPR:$val1))),
- (v4f32 (MVE_VRINTf32Z (v4f32 MQPR:$val1)))>;
- def : Pat<(v8f16 (ftrunc (v8f16 MQPR:$val1))),
- (v8f16 (MVE_VRINTf16Z (v8f16 MQPR:$val1)))>;
- def : Pat<(v4f32 (ffloor (v4f32 MQPR:$val1))),
- (v4f32 (MVE_VRINTf32M (v4f32 MQPR:$val1)))>;
- def : Pat<(v8f16 (ffloor (v8f16 MQPR:$val1))),
- (v8f16 (MVE_VRINTf16M (v8f16 MQPR:$val1)))>;
- def : Pat<(v4f32 (fceil (v4f32 MQPR:$val1))),
- (v4f32 (MVE_VRINTf32P (v4f32 MQPR:$val1)))>;
- def : Pat<(v8f16 (fceil (v8f16 MQPR:$val1))),
- (v8f16 (MVE_VRINTf16P (v8f16 MQPR:$val1)))>;
- def : Pat<(v4f32 (int_arm_mve_vrintn (v4f32 MQPR:$val1))),
- (v4f32 (MVE_VRINTf32N (v4f32 MQPR:$val1)))>;
- def : Pat<(v8f16 (int_arm_mve_vrintn (v8f16 MQPR:$val1))),
- (v8f16 (MVE_VRINTf16N (v8f16 MQPR:$val1)))>;
+multiclass MVE_VRINT_ops<MVEVectorVTInfo VTI> {
+ defm N : MVE_VRINT_m<VTI, "n", 0b000, int_arm_mve_vrintn>;
+ defm X : MVE_VRINT_m<VTI, "x", 0b001, frint>;
+ defm A : MVE_VRINT_m<VTI, "a", 0b010, fround>;
+ defm Z : MVE_VRINT_m<VTI, "z", 0b011, ftrunc>;
+ defm M : MVE_VRINT_m<VTI, "m", 0b101, ffloor>;
+ defm P : MVE_VRINT_m<VTI, "p", 0b111, fceil>;
}
+defm MVE_VRINTf16 : MVE_VRINT_ops<MVE_v8f16>;
+defm MVE_VRINTf32 : MVE_VRINT_ops<MVE_v4f32>;
+
class MVEFloatArithNeon<string iname, string suffix, bit size,
dag oops, dag iops, string ops,
vpred_ops vpred, string cstr, list<dag> pattern=[]>
defm MVE_VCVTs32f32 : MVE_VCVT_fp_int_anpm_multi<"s32.f32", 0b10, 0b0>;
defm MVE_VCVTu32f32 : MVE_VCVT_fp_int_anpm_multi<"u32.f32", 0b10, 0b1>;
-class MVE_VCVT_fp_int<string suffix, bits<2> size, bits<2> op,
+class MVE_VCVT_fp_int<string suffix, bits<2> size, bit toint, bit unsigned,
list<dag> pattern=[]>
: MVE_float<"vcvt", suffix, (outs MQPR:$Qd),
(ins MQPR:$Qm), "$Qd, $Qm", vpred_r, "", pattern> {
let Inst{17-16} = 0b11;
let Inst{15-13} = Qd{2-0};
let Inst{12-9} = 0b0011;
- let Inst{8-7} = op;
+ let Inst{8} = toint;
+ let Inst{7} = unsigned;
let Inst{4} = 0b0;
let validForTailPredication = 1;
}
+multiclass MVE_VCVT_fp_int_m<MVEVectorVTInfo Dest, MVEVectorVTInfo Src,
+ SDNode unpred_op> {
+ defvar Unsigned = !or(!eq(Dest.SuffixLetter,"u"), !eq(Src.SuffixLetter,"u"));
+ defvar ToInt = !eq(Src.SuffixLetter,"f");
+
+ def "" : MVE_VCVT_fp_int<Dest.Suffix # "." # Src.Suffix, Dest.Size,
+ ToInt, Unsigned>;
+ defvar Inst = !cast<Instruction>(NAME);
+
+ let Predicates = [HasMVEFloat] in {
+ def : Pat<(Dest.Vec (unpred_op (Src.Vec MQPR:$src))),
+ (Dest.Vec (Inst (Src.Vec MQPR:$src)))>;
+ def : Pat<(Dest.Vec (int_arm_mve_vcvt_fp_int_predicated
+ (Src.Vec MQPR:$src), (i32 Unsigned),
+ (Src.Pred VCCR:$mask), (Dest.Vec MQPR:$inactive))),
+ (Dest.Vec (Inst (Src.Vec MQPR:$src), ARMVCCThen,
+ (Src.Pred VCCR:$mask),
+ (Dest.Vec MQPR:$inactive)))>;
+ }
+}
// The unsuffixed VCVT for float->int implicitly rounds toward zero,
// which I reflect here in the llvm instruction names
-def MVE_VCVTs16f16z : MVE_VCVT_fp_int<"s16.f16", 0b01, 0b10>;
-def MVE_VCVTu16f16z : MVE_VCVT_fp_int<"u16.f16", 0b01, 0b11>;
-def MVE_VCVTs32f32z : MVE_VCVT_fp_int<"s32.f32", 0b10, 0b10>;
-def MVE_VCVTu32f32z : MVE_VCVT_fp_int<"u32.f32", 0b10, 0b11>;
+defm MVE_VCVTs16f16z : MVE_VCVT_fp_int_m<MVE_v8s16, MVE_v8f16, fp_to_sint>;
+defm MVE_VCVTu16f16z : MVE_VCVT_fp_int_m<MVE_v8u16, MVE_v8f16, fp_to_uint>;
+defm MVE_VCVTs32f32z : MVE_VCVT_fp_int_m<MVE_v4s32, MVE_v4f32, fp_to_sint>;
+defm MVE_VCVTu32f32z : MVE_VCVT_fp_int_m<MVE_v4u32, MVE_v4f32, fp_to_uint>;
// Whereas VCVT for int->float rounds to nearest
-def MVE_VCVTf16s16n : MVE_VCVT_fp_int<"f16.s16", 0b01, 0b00>;
-def MVE_VCVTf16u16n : MVE_VCVT_fp_int<"f16.u16", 0b01, 0b01>;
-def MVE_VCVTf32s32n : MVE_VCVT_fp_int<"f32.s32", 0b10, 0b00>;
-def MVE_VCVTf32u32n : MVE_VCVT_fp_int<"f32.u32", 0b10, 0b01>;
-
-let Predicates = [HasMVEFloat] in {
- def : Pat<(v4i32 (fp_to_sint (v4f32 MQPR:$src))),
- (v4i32 (MVE_VCVTs32f32z (v4f32 MQPR:$src)))>;
- def : Pat<(v4i32 (fp_to_uint (v4f32 MQPR:$src))),
- (v4i32 (MVE_VCVTu32f32z (v4f32 MQPR:$src)))>;
- def : Pat<(v8i16 (fp_to_sint (v8f16 MQPR:$src))),
- (v8i16 (MVE_VCVTs16f16z (v8f16 MQPR:$src)))>;
- def : Pat<(v8i16 (fp_to_uint (v8f16 MQPR:$src))),
- (v8i16 (MVE_VCVTu16f16z (v8f16 MQPR:$src)))>;
- def : Pat<(v4f32 (sint_to_fp (v4i32 MQPR:$src))),
- (v4f32 (MVE_VCVTf32s32n (v4i32 MQPR:$src)))>;
- def : Pat<(v4f32 (uint_to_fp (v4i32 MQPR:$src))),
- (v4f32 (MVE_VCVTf32u32n (v4i32 MQPR:$src)))>;
- def : Pat<(v8f16 (sint_to_fp (v8i16 MQPR:$src))),
- (v8f16 (MVE_VCVTf16s16n (v8i16 MQPR:$src)))>;
- def : Pat<(v8f16 (uint_to_fp (v8i16 MQPR:$src))),
- (v8f16 (MVE_VCVTf16u16n (v8i16 MQPR:$src)))>;
-}
+defm MVE_VCVTf16s16n : MVE_VCVT_fp_int_m<MVE_v8f16, MVE_v8s16, sint_to_fp>;
+defm MVE_VCVTf16u16n : MVE_VCVT_fp_int_m<MVE_v8f16, MVE_v8u16, uint_to_fp>;
+defm MVE_VCVTf32s32n : MVE_VCVT_fp_int_m<MVE_v4f32, MVE_v4s32, sint_to_fp>;
+defm MVE_VCVTf32u32n : MVE_VCVT_fp_int_m<MVE_v4f32, MVE_v4u32, uint_to_fp>;
class MVE_VABSNEG_fp<string iname, string suffix, bits<2> size, bit negate,
list<dag> pattern=[]>
let validForTailPredication = 1;
}
-def MVE_VABSf16 : MVE_VABSNEG_fp<"vabs", "f16", 0b01, 0b0>;
-def MVE_VABSf32 : MVE_VABSNEG_fp<"vabs", "f32", 0b10, 0b0>;
-
-let Predicates = [HasMVEFloat] in {
- def : Pat<(v8f16 (fabs MQPR:$src)),
- (MVE_VABSf16 MQPR:$src)>;
- def : Pat<(v4f32 (fabs MQPR:$src)),
- (MVE_VABSf32 MQPR:$src)>;
-}
+multiclass MVE_VABSNEG_fp_m<string iname, SDNode unpred_op, Intrinsic pred_int,
+ MVEVectorVTInfo VTI, bit opcode> {
+ def "" : MVE_VABSNEG_fp<iname, VTI.Suffix, VTI.Size, opcode>;
+ defvar Inst = !cast<Instruction>(NAME);
-def MVE_VNEGf16 : MVE_VABSNEG_fp<"vneg", "f16", 0b01, 0b1>;
-def MVE_VNEGf32 : MVE_VABSNEG_fp<"vneg", "f32", 0b10, 0b1>;
+ let Predicates = [HasMVEInt] in {
+ def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$v))), (VTI.Vec (Inst $v))>;
-let Predicates = [HasMVEFloat] in {
- def : Pat<(v8f16 (fneg MQPR:$src)),
- (MVE_VNEGf16 MQPR:$src)>;
- def : Pat<(v4f32 (fneg MQPR:$src)),
- (MVE_VNEGf32 MQPR:$src)>;
+ def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$v), (VTI.Pred VCCR:$mask),
+ (VTI.Vec MQPR:$inactive))),
+ (VTI.Vec (Inst $v, ARMVCCThen, $mask, $inactive))>;
+ }
}
+defm MVE_VABSf16 : MVE_VABSNEG_fp_m<"vabs", fabs, int_arm_mve_abs_predicated,
+ MVE_v8f16, 0>;
+defm MVE_VABSf32 : MVE_VABSNEG_fp_m<"vabs", fabs, int_arm_mve_abs_predicated,
+ MVE_v4f32, 0>;
+defm MVE_VNEGf16 : MVE_VABSNEG_fp_m<"vneg", fneg, int_arm_mve_neg_predicated,
+ MVE_v8f16, 1>;
+defm MVE_VNEGf32 : MVE_VABSNEG_fp_m<"vneg", fneg, int_arm_mve_neg_predicated,
+ MVE_v4f32, 1>;
+
class MVE_VMAXMINNMA<string iname, string suffix, bit size, bit bit_12,
list<dag> pattern=[]>
: MVE_f<(outs MQPR:$Qd), (ins MQPR:$Qd_src, MQPR:$Qm),
defm MVE_VQMOVUNs32 : MVE_VxMOVxN_halves<"vqmovun", "s32", 0b0, 0b0, 0b01>;
def MVEvmovn : SDNode<"ARMISD::VMOVN", SDTARMVEXT>;
-let Predicates = [HasMVEInt] in {
- def : Pat<(v8i16 (MVEvmovn (v8i16 MQPR:$Qd_src), (v8i16 MQPR:$Qm), (i32 0))),
- (v8i16 (MVE_VMOVNi32bh (v8i16 MQPR:$Qd_src), (v8i16 MQPR:$Qm)))>;
- def : Pat<(v8i16 (MVEvmovn (v8i16 MQPR:$Qd_src), (v8i16 MQPR:$Qm), (i32 1))),
- (v8i16 (MVE_VMOVNi32th (v8i16 MQPR:$Qd_src), (v8i16 MQPR:$Qm)))>;
- def : Pat<(v16i8 (MVEvmovn (v16i8 MQPR:$Qd_src), (v16i8 MQPR:$Qm), (i32 0))),
- (v16i8 (MVE_VMOVNi16bh (v16i8 MQPR:$Qd_src), (v16i8 MQPR:$Qm)))>;
- def : Pat<(v16i8 (MVEvmovn (v16i8 MQPR:$Qd_src), (v16i8 MQPR:$Qm), (i32 1))),
- (v16i8 (MVE_VMOVNi16th (v16i8 MQPR:$Qd_src), (v16i8 MQPR:$Qm)))>;
-
- def : Pat<(v8i16 (MVEvmovn (v8i16 MQPR:$Qm),
- (v8i16 (ARMvrev32 MQPR:$Qd_src)), (i32 1))),
- (v8i16 (MVE_VMOVNi32bh (v8i16 MQPR:$Qd_src), (v8i16 MQPR:$Qm)))>;
- def : Pat<(v16i8 (MVEvmovn (v16i8 MQPR:$Qm),
- (v16i8 (ARMvrev16 MQPR:$Qd_src)), (i32 1))),
- (v16i8 (MVE_VMOVNi16bh (v16i8 MQPR:$Qd_src), (v16i8 MQPR:$Qm)))>;
-}
+
+multiclass MVE_VMOVN_p<Instruction Inst, bit top,
+ MVEVectorVTInfo VTI, MVEVectorVTInfo InVTI> {
+ // Match the most obvious MVEvmovn(a,b,t), which overwrites the odd or even
+ // lanes of a (depending on t) with the even lanes of b.
+ def : Pat<(VTI.Vec (MVEvmovn (VTI.Vec MQPR:$Qd_src),
+ (VTI.Vec MQPR:$Qm), (i32 top))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qd_src), (VTI.Vec MQPR:$Qm)))>;
+
+ if !eq(top, 0) then {
+ // If we see MVEvmovn(a,ARMvrev(b),1), that wants to overwrite the odd
+ // lanes of a with the odd lanes of b. In other words, the lanes we're
+ // _keeping_ from a are the even ones. So we can flip it round and say that
+ // this is the same as overwriting the even lanes of b with the even lanes
+ // of a, i.e. it's a VMOVNB with the operands reversed.
+ defvar vrev = !cast<SDNode>("ARMvrev" # InVTI.LaneBits);
+ def : Pat<(VTI.Vec (MVEvmovn (VTI.Vec MQPR:$Qm),
+ (VTI.Vec (vrev MQPR:$Qd_src)), (i32 1))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qd_src), (VTI.Vec MQPR:$Qm)))>;
+ }
+
+ // Match the IR intrinsic for a predicated VMOVN. This regards the Qm input
+ // as having wider lanes that we're narrowing, instead of already-narrow
+ // lanes that we're taking every other one of.
+ def : Pat<(VTI.Vec (int_arm_mve_vmovn_predicated (VTI.Vec MQPR:$Qd_src),
+ (InVTI.Vec MQPR:$Qm), (i32 top),
+ (InVTI.Pred VCCR:$pred))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qd_src),
+ (InVTI.Vec MQPR:$Qm),
+ ARMVCCThen, (InVTI.Pred VCCR:$pred)))>;
+}
+
+defm : MVE_VMOVN_p<MVE_VMOVNi32bh, 0, MVE_v8i16, MVE_v4i32>;
+defm : MVE_VMOVN_p<MVE_VMOVNi32th, 1, MVE_v8i16, MVE_v4i32>;
+defm : MVE_VMOVN_p<MVE_VMOVNi16bh, 0, MVE_v16i8, MVE_v8i16>;
+defm : MVE_VMOVN_p<MVE_VMOVNi16th, 1, MVE_v16i8, MVE_v8i16>;
class MVE_VCVT_ff<string iname, string suffix, bit op, bit T,
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s
+; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s
+
+define arm_aapcs_vfpcc <16 x i8> @test_vmvnq_m_s8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vmvnq_m_s8:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vmvnt q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
+ %2 = tail call <16 x i8> @llvm.arm.mve.mvn.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i1> %1, <16 x i8> %inactive)
+ ret <16 x i8> %2
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vmvnq_m_s16(<8 x i16> %inactive, <8 x i16> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vmvnq_m_s16:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vmvnt q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+ %2 = tail call <8 x i16> @llvm.arm.mve.mvn.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i1> %1, <8 x i16> %inactive)
+ ret <8 x i16> %2
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vmvnq_m_s32(<4 x i32> %inactive, <4 x i32> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vmvnq_m_s32:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vmvnt q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+ %2 = tail call <4 x i32> @llvm.arm.mve.mvn.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i1> %1, <4 x i32> %inactive)
+ ret <4 x i32> %2
+}
+
+define arm_aapcs_vfpcc <16 x i8> @test_vmvnq_m_u8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vmvnq_m_u8:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vmvnt q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
+ %2 = tail call <16 x i8> @llvm.arm.mve.mvn.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i1> %1, <16 x i8> %inactive)
+ ret <16 x i8> %2
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vmvnq_m_u16(<8 x i16> %inactive, <8 x i16> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vmvnq_m_u16:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vmvnt q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+ %2 = tail call <8 x i16> @llvm.arm.mve.mvn.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i1> %1, <8 x i16> %inactive)
+ ret <8 x i16> %2
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vmvnq_m_u32(<4 x i32> %inactive, <4 x i32> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vmvnq_m_u32:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vmvnt q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+ %2 = tail call <4 x i32> @llvm.arm.mve.mvn.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i1> %1, <4 x i32> %inactive)
+ ret <4 x i32> %2
+}
+
+define arm_aapcs_vfpcc <8 x half> @test_vnegq_m_f16(<8 x half> %inactive, <8 x half> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vnegq_m_f16:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vnegt.f16 q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+ %2 = tail call <8 x half> @llvm.arm.mve.neg.predicated.v8f16.v8i1(<8 x half> %a, <8 x i1> %1, <8 x half> %inactive)
+ ret <8 x half> %2
+}
+
+define arm_aapcs_vfpcc <4 x float> @test_vnegq_m_f32(<4 x float> %inactive, <4 x float> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vnegq_m_f32:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vnegt.f32 q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+ %2 = tail call <4 x float> @llvm.arm.mve.neg.predicated.v4f32.v4i1(<4 x float> %a, <4 x i1> %1, <4 x float> %inactive)
+ ret <4 x float> %2
+}
+
+define arm_aapcs_vfpcc <16 x i8> @test_vnegq_m_s8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vnegq_m_s8:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vnegt.s8 q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
+ %2 = tail call <16 x i8> @llvm.arm.mve.neg.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i1> %1, <16 x i8> %inactive)
+ ret <16 x i8> %2
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vnegq_m_s16(<8 x i16> %inactive, <8 x i16> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vnegq_m_s16:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vnegt.s16 q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+ %2 = tail call <8 x i16> @llvm.arm.mve.neg.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i1> %1, <8 x i16> %inactive)
+ ret <8 x i16> %2
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vnegq_m_s32(<4 x i32> %inactive, <4 x i32> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vnegq_m_s32:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vnegt.s32 q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+ %2 = tail call <4 x i32> @llvm.arm.mve.neg.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i1> %1, <4 x i32> %inactive)
+ ret <4 x i32> %2
+}
+
+define arm_aapcs_vfpcc <8 x half> @test_vabsq_m_f16(<8 x half> %inactive, <8 x half> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vabsq_m_f16:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vabst.f16 q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+ %2 = tail call <8 x half> @llvm.arm.mve.abs.predicated.v8f16.v8i1(<8 x half> %a, <8 x i1> %1, <8 x half> %inactive)
+ ret <8 x half> %2
+}
+
+define arm_aapcs_vfpcc <4 x float> @test_vabsq_m_f32(<4 x float> %inactive, <4 x float> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vabsq_m_f32:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vabst.f32 q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+ %2 = tail call <4 x float> @llvm.arm.mve.abs.predicated.v4f32.v4i1(<4 x float> %a, <4 x i1> %1, <4 x float> %inactive)
+ ret <4 x float> %2
+}
+
+define arm_aapcs_vfpcc <16 x i8> @test_vabsq_m_s8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vabsq_m_s8:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vabst.s8 q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
+ %2 = tail call <16 x i8> @llvm.arm.mve.abs.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i1> %1, <16 x i8> %inactive)
+ ret <16 x i8> %2
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vabsq_m_s16(<8 x i16> %inactive, <8 x i16> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vabsq_m_s16:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vabst.s16 q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+ %2 = tail call <8 x i16> @llvm.arm.mve.abs.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i1> %1, <8 x i16> %inactive)
+ ret <8 x i16> %2
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vabsq_m_s32(<4 x i32> %inactive, <4 x i32> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vabsq_m_s32:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vabst.s32 q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+ %2 = tail call <4 x i32> @llvm.arm.mve.abs.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i1> %1, <4 x i32> %inactive)
+ ret <4 x i32> %2
+}
+
+define arm_aapcs_vfpcc <16 x i8> @test_vqnegq_m_s8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vqnegq_m_s8:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vqnegt.s8 q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
+ %2 = tail call <16 x i8> @llvm.arm.mve.qneg.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i1> %1, <16 x i8> %inactive)
+ ret <16 x i8> %2
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vqnegq_m_s16(<8 x i16> %inactive, <8 x i16> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vqnegq_m_s16:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vqnegt.s16 q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+ %2 = tail call <8 x i16> @llvm.arm.mve.qneg.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i1> %1, <8 x i16> %inactive)
+ ret <8 x i16> %2
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vqnegq_m_s32(<4 x i32> %inactive, <4 x i32> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vqnegq_m_s32:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vqnegt.s32 q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+ %2 = tail call <4 x i32> @llvm.arm.mve.qneg.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i1> %1, <4 x i32> %inactive)
+ ret <4 x i32> %2
+}
+
+define arm_aapcs_vfpcc <16 x i8> @test_vqabsq_m_s8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vqabsq_m_s8:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vqabst.s8 q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
+ %2 = tail call <16 x i8> @llvm.arm.mve.qabs.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i1> %1, <16 x i8> %inactive)
+ ret <16 x i8> %2
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vqabsq_m_s16(<8 x i16> %inactive, <8 x i16> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vqabsq_m_s16:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vqabst.s16 q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+ %2 = tail call <8 x i16> @llvm.arm.mve.qabs.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i1> %1, <8 x i16> %inactive)
+ ret <8 x i16> %2
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vqabsq_m_s32(<4 x i32> %inactive, <4 x i32> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vqabsq_m_s32:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vqabst.s32 q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+ %2 = tail call <4 x i32> @llvm.arm.mve.qabs.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i1> %1, <4 x i32> %inactive)
+ ret <4 x i32> %2
+}
+
+declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32)
+declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32)
+declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32)
+
+declare <16 x i8> @llvm.arm.mve.mvn.predicated.v16i8.v16i1(<16 x i8>, <16 x i1>, <16 x i8>)
+declare <8 x i16> @llvm.arm.mve.mvn.predicated.v8i16.v8i1(<8 x i16>, <8 x i1>, <8 x i16>)
+declare <4 x i32> @llvm.arm.mve.mvn.predicated.v4i32.v4i1(<4 x i32>, <4 x i1>, <4 x i32>)
+declare <8 x half> @llvm.arm.mve.neg.predicated.v8f16.v8i1(<8 x half>, <8 x i1>, <8 x half>)
+declare <4 x float> @llvm.arm.mve.neg.predicated.v4f32.v4i1(<4 x float>, <4 x i1>, <4 x float>)
+declare <16 x i8> @llvm.arm.mve.neg.predicated.v16i8.v16i1(<16 x i8>, <16 x i1>, <16 x i8>)
+declare <8 x i16> @llvm.arm.mve.neg.predicated.v8i16.v8i1(<8 x i16>, <8 x i1>, <8 x i16>)
+declare <4 x i32> @llvm.arm.mve.neg.predicated.v4i32.v4i1(<4 x i32>, <4 x i1>, <4 x i32>)
+declare <8 x half> @llvm.arm.mve.abs.predicated.v8f16.v8i1(<8 x half>, <8 x i1>, <8 x half>)
+declare <4 x float> @llvm.arm.mve.abs.predicated.v4f32.v4i1(<4 x float>, <4 x i1>, <4 x float>)
+declare <16 x i8> @llvm.arm.mve.abs.predicated.v16i8.v16i1(<16 x i8>, <16 x i1>, <16 x i8>)
+declare <8 x i16> @llvm.arm.mve.abs.predicated.v8i16.v8i1(<8 x i16>, <8 x i1>, <8 x i16>)
+declare <4 x i32> @llvm.arm.mve.abs.predicated.v4i32.v4i1(<4 x i32>, <4 x i1>, <4 x i32>)
+declare <16 x i8> @llvm.arm.mve.qneg.predicated.v16i8.v16i1(<16 x i8>, <16 x i1>, <16 x i8>)
+declare <8 x i16> @llvm.arm.mve.qneg.predicated.v8i16.v8i1(<8 x i16>, <8 x i1>, <8 x i16>)
+declare <4 x i32> @llvm.arm.mve.qneg.predicated.v4i32.v4i1(<4 x i32>, <4 x i1>, <4 x i32>)
+declare <16 x i8> @llvm.arm.mve.qabs.predicated.v16i8.v16i1(<16 x i8>, <16 x i1>, <16 x i8>)
+declare <8 x i16> @llvm.arm.mve.qabs.predicated.v8i16.v8i1(<8 x i16>, <8 x i1>, <8 x i16>)
+declare <4 x i32> @llvm.arm.mve.qabs.predicated.v4i32.v4i1(<4 x i32>, <4 x i1>, <4 x i32>)
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s
+
+define arm_aapcs_vfpcc <16 x i8> @test_vclsq_m_s8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vclsq_m_s8:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vclst.s8 q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
+ %2 = tail call <16 x i8> @llvm.arm.mve.cls.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i1> %1, <16 x i8> %inactive)
+ ret <16 x i8> %2
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vclsq_m_s16(<8 x i16> %inactive, <8 x i16> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vclsq_m_s16:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vclst.s16 q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+ %2 = tail call <8 x i16> @llvm.arm.mve.cls.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i1> %1, <8 x i16> %inactive)
+ ret <8 x i16> %2
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vclsq_m_s32(<4 x i32> %inactive, <4 x i32> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vclsq_m_s32:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vclst.s32 q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+ %2 = tail call <4 x i32> @llvm.arm.mve.cls.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i1> %1, <4 x i32> %inactive)
+ ret <4 x i32> %2
+}
+
+define arm_aapcs_vfpcc <16 x i8> @test_vclzq_m_s8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vclzq_m_s8:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vclzt.i8 q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
+ %2 = tail call <16 x i8> @llvm.arm.mve.clz.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i1> %1, <16 x i8> %inactive)
+ ret <16 x i8> %2
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vclzq_m_s16(<8 x i16> %inactive, <8 x i16> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vclzq_m_s16:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vclzt.i16 q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+ %2 = tail call <8 x i16> @llvm.arm.mve.clz.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i1> %1, <8 x i16> %inactive)
+ ret <8 x i16> %2
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vclzq_m_s32(<4 x i32> %inactive, <4 x i32> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vclzq_m_s32:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vclzt.i32 q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+ %2 = tail call <4 x i32> @llvm.arm.mve.clz.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i1> %1, <4 x i32> %inactive)
+ ret <4 x i32> %2
+}
+
+define arm_aapcs_vfpcc <16 x i8> @test_vclzq_m_u8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vclzq_m_u8:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vclzt.i8 q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
+ %2 = tail call <16 x i8> @llvm.arm.mve.clz.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i1> %1, <16 x i8> %inactive)
+ ret <16 x i8> %2
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vclzq_m_u16(<8 x i16> %inactive, <8 x i16> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vclzq_m_u16:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vclzt.i16 q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+ %2 = tail call <8 x i16> @llvm.arm.mve.clz.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i1> %1, <8 x i16> %inactive)
+ ret <8 x i16> %2
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vclzq_m_u32(<4 x i32> %inactive, <4 x i32> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vclzq_m_u32:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vclzt.i32 q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+ %2 = tail call <4 x i32> @llvm.arm.mve.clz.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i1> %1, <4 x i32> %inactive)
+ ret <4 x i32> %2
+}
+
+declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32)
+declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32)
+declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32)
+declare <16 x i8> @llvm.arm.mve.cls.predicated.v16i8.v16i1(<16 x i8>, <16 x i1>, <16 x i8>)
+declare <8 x i16> @llvm.arm.mve.cls.predicated.v8i16.v8i1(<8 x i16>, <8 x i1>, <8 x i16>)
+declare <4 x i32> @llvm.arm.mve.cls.predicated.v4i32.v4i1(<4 x i32>, <4 x i1>, <4 x i32>)
+declare <16 x i8> @llvm.arm.mve.clz.predicated.v16i8.v16i1(<16 x i8>, <16 x i1>, <16 x i8>)
+declare <8 x i16> @llvm.arm.mve.clz.predicated.v8i16.v8i1(<8 x i16>, <8 x i1>, <8 x i16>)
+declare <4 x i32> @llvm.arm.mve.clz.predicated.v4i32.v4i1(<4 x i32>, <4 x i1>, <4 x i32>)
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s
+
+define arm_aapcs_vfpcc <8 x half> @test_vcvtq_m_f16_s16(<8 x half> %inactive, <8 x i16> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vcvtq_m_f16_s16:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vcvtt.f16.s16 q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+ %2 = tail call <8 x half> @llvm.arm.mve.vcvt.fp.int.predicated.v8f16.v8i16.v8i1(<8 x i16> %a, i32 0, <8 x i1> %1, <8 x half> %inactive)
+ ret <8 x half> %2
+}
+
+define arm_aapcs_vfpcc <8 x half> @test_vcvtq_m_f16_u16(<8 x half> %inactive, <8 x i16> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vcvtq_m_f16_u16:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vcvtt.f16.u16 q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+ %2 = tail call <8 x half> @llvm.arm.mve.vcvt.fp.int.predicated.v8f16.v8i16.v8i1(<8 x i16> %a, i32 1, <8 x i1> %1, <8 x half> %inactive)
+ ret <8 x half> %2
+}
+
+define arm_aapcs_vfpcc <4 x float> @test_vcvtq_m_f32_s32(<4 x float> %inactive, <4 x i32> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vcvtq_m_f32_s32:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vcvtt.f32.s32 q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+ %2 = tail call <4 x float> @llvm.arm.mve.vcvt.fp.int.predicated.v4f32.v4i32.v4i1(<4 x i32> %a, i32 0, <4 x i1> %1, <4 x float> %inactive)
+ ret <4 x float> %2
+}
+
+define arm_aapcs_vfpcc <4 x float> @test_vcvtq_m_f32_u32(<4 x float> %inactive, <4 x i32> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vcvtq_m_f32_u32:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vcvtt.f32.u32 q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+ %2 = tail call <4 x float> @llvm.arm.mve.vcvt.fp.int.predicated.v4f32.v4i32.v4i1(<4 x i32> %a, i32 1, <4 x i1> %1, <4 x float> %inactive)
+ ret <4 x float> %2
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vcvtq_m_s16_f16(<8 x i16> %inactive, <8 x half> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vcvtq_m_s16_f16:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vcvtt.s16.f16 q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+ %2 = tail call <8 x i16> @llvm.arm.mve.vcvt.fp.int.predicated.v8i16.v8f16.v8i1(<8 x half> %a, i32 0, <8 x i1> %1, <8 x i16> %inactive)
+ ret <8 x i16> %2
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vcvtq_m_s32_f32(<4 x i32> %inactive, <4 x float> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vcvtq_m_s32_f32:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vcvtt.s32.f32 q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+ %2 = tail call <4 x i32> @llvm.arm.mve.vcvt.fp.int.predicated.v4i32.v4f32.v4i1(<4 x float> %a, i32 0, <4 x i1> %1, <4 x i32> %inactive)
+ ret <4 x i32> %2
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vcvtq_m_u16_f16(<8 x i16> %inactive, <8 x half> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vcvtq_m_u16_f16:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vcvtt.u16.f16 q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+ %2 = tail call <8 x i16> @llvm.arm.mve.vcvt.fp.int.predicated.v8i16.v8f16.v8i1(<8 x half> %a, i32 1, <8 x i1> %1, <8 x i16> %inactive)
+ ret <8 x i16> %2
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vcvtq_m_u32_f32(<4 x i32> %inactive, <4 x float> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vcvtq_m_u32_f32:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vcvtt.u32.f32 q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+ %2 = tail call <4 x i32> @llvm.arm.mve.vcvt.fp.int.predicated.v4i32.v4f32.v4i1(<4 x float> %a, i32 1, <4 x i1> %1, <4 x i32> %inactive)
+ ret <4 x i32> %2
+}
+
+declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32)
+declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32)
+
+declare <8 x half> @llvm.arm.mve.vcvt.fp.int.predicated.v8f16.v8i16.v8i1(<8 x i16>, i32, <8 x i1>, <8 x half>)
+declare <4 x float> @llvm.arm.mve.vcvt.fp.int.predicated.v4f32.v4i32.v4i1(<4 x i32>, i32, <4 x i1>, <4 x float>)
+declare <8 x i16> @llvm.arm.mve.vcvt.fp.int.predicated.v8i16.v8f16.v8i1(<8 x half>, i32, <8 x i1>, <8 x i16>)
+declare <4 x i32> @llvm.arm.mve.vcvt.fp.int.predicated.v4i32.v4f32.v4i1(<4 x float>, i32, <4 x i1>, <4 x i32>)
%1 = zext <4 x i16> %0 to <4 x i32>
ret <4 x i32> %1
}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vmovlbq_m_s8(<8 x i16> %inactive, <16 x i8> %a, i16 zeroext %p) {
+; LE-LABEL: test_vmovlbq_m_s8:
+; LE: @ %bb.0: @ %entry
+; LE-NEXT: vmsr p0, r0
+; LE-NEXT: vpst
+; LE-NEXT: vmovlbt.s8 q0, q1
+; LE-NEXT: bx lr
+;
+; BE-LABEL: test_vmovlbq_m_s8:
+; BE: @ %bb.0: @ %entry
+; BE-NEXT: vrev64.16 q2, q0
+; BE-NEXT: vrev64.8 q0, q1
+; BE-NEXT: vmsr p0, r0
+; BE-NEXT: vpst
+; BE-NEXT: vmovlbt.s8 q2, q0
+; BE-NEXT: vrev64.16 q0, q2
+; BE-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+ %2 = tail call <8 x i16> @llvm.arm.mve.vmovl.predicated.v8i16.v16i8.v8i1(<16 x i8> %a, i32 0, i32 0, <8 x i1> %1, <8 x i16> %inactive)
+ ret <8 x i16> %2
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vmovlbq_m_s16(<4 x i32> %inactive, <8 x i16> %a, i16 zeroext %p) {
+; LE-LABEL: test_vmovlbq_m_s16:
+; LE: @ %bb.0: @ %entry
+; LE-NEXT: vmsr p0, r0
+; LE-NEXT: vpst
+; LE-NEXT: vmovlbt.s16 q0, q1
+; LE-NEXT: bx lr
+;
+; BE-LABEL: test_vmovlbq_m_s16:
+; BE: @ %bb.0: @ %entry
+; BE-NEXT: vrev64.32 q2, q0
+; BE-NEXT: vrev64.16 q0, q1
+; BE-NEXT: vmsr p0, r0
+; BE-NEXT: vpst
+; BE-NEXT: vmovlbt.s16 q2, q0
+; BE-NEXT: vrev64.32 q0, q2
+; BE-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+ %2 = tail call <4 x i32> @llvm.arm.mve.vmovl.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, i32 0, i32 0, <4 x i1> %1, <4 x i32> %inactive)
+ ret <4 x i32> %2
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vmovlbq_m_u8(<8 x i16> %inactive, <16 x i8> %a, i16 zeroext %p) {
+; LE-LABEL: test_vmovlbq_m_u8:
+; LE: @ %bb.0: @ %entry
+; LE-NEXT: vmsr p0, r0
+; LE-NEXT: vpst
+; LE-NEXT: vmovlbt.u8 q0, q1
+; LE-NEXT: bx lr
+;
+; BE-LABEL: test_vmovlbq_m_u8:
+; BE: @ %bb.0: @ %entry
+; BE-NEXT: vrev64.16 q2, q0
+; BE-NEXT: vrev64.8 q0, q1
+; BE-NEXT: vmsr p0, r0
+; BE-NEXT: vpst
+; BE-NEXT: vmovlbt.u8 q2, q0
+; BE-NEXT: vrev64.16 q0, q2
+; BE-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+ %2 = tail call <8 x i16> @llvm.arm.mve.vmovl.predicated.v8i16.v16i8.v8i1(<16 x i8> %a, i32 1, i32 0, <8 x i1> %1, <8 x i16> %inactive)
+ ret <8 x i16> %2
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vmovlbq_m_u16(<4 x i32> %inactive, <8 x i16> %a, i16 zeroext %p) {
+; LE-LABEL: test_vmovlbq_m_u16:
+; LE: @ %bb.0: @ %entry
+; LE-NEXT: vmsr p0, r0
+; LE-NEXT: vpst
+; LE-NEXT: vmovlbt.u16 q0, q1
+; LE-NEXT: bx lr
+;
+; BE-LABEL: test_vmovlbq_m_u16:
+; BE: @ %bb.0: @ %entry
+; BE-NEXT: vrev64.32 q2, q0
+; BE-NEXT: vrev64.16 q0, q1
+; BE-NEXT: vmsr p0, r0
+; BE-NEXT: vpst
+; BE-NEXT: vmovlbt.u16 q2, q0
+; BE-NEXT: vrev64.32 q0, q2
+; BE-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+ %2 = tail call <4 x i32> @llvm.arm.mve.vmovl.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, i32 1, i32 0, <4 x i1> %1, <4 x i32> %inactive)
+ ret <4 x i32> %2
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vmovltq_m_s8(<8 x i16> %inactive, <16 x i8> %a, i16 zeroext %p) {
+; LE-LABEL: test_vmovltq_m_s8:
+; LE: @ %bb.0: @ %entry
+; LE-NEXT: vmsr p0, r0
+; LE-NEXT: vpst
+; LE-NEXT: vmovltt.s8 q0, q1
+; LE-NEXT: bx lr
+;
+; BE-LABEL: test_vmovltq_m_s8:
+; BE: @ %bb.0: @ %entry
+; BE-NEXT: vrev64.16 q2, q0
+; BE-NEXT: vrev64.8 q0, q1
+; BE-NEXT: vmsr p0, r0
+; BE-NEXT: vpst
+; BE-NEXT: vmovltt.s8 q2, q0
+; BE-NEXT: vrev64.16 q0, q2
+; BE-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+ %2 = tail call <8 x i16> @llvm.arm.mve.vmovl.predicated.v8i16.v16i8.v8i1(<16 x i8> %a, i32 0, i32 1, <8 x i1> %1, <8 x i16> %inactive)
+ ret <8 x i16> %2
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vmovltq_m_s16(<4 x i32> %inactive, <8 x i16> %a, i16 zeroext %p) {
+; LE-LABEL: test_vmovltq_m_s16:
+; LE: @ %bb.0: @ %entry
+; LE-NEXT: vmsr p0, r0
+; LE-NEXT: vpst
+; LE-NEXT: vmovltt.s16 q0, q1
+; LE-NEXT: bx lr
+;
+; BE-LABEL: test_vmovltq_m_s16:
+; BE: @ %bb.0: @ %entry
+; BE-NEXT: vrev64.32 q2, q0
+; BE-NEXT: vrev64.16 q0, q1
+; BE-NEXT: vmsr p0, r0
+; BE-NEXT: vpst
+; BE-NEXT: vmovltt.s16 q2, q0
+; BE-NEXT: vrev64.32 q0, q2
+; BE-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+ %2 = tail call <4 x i32> @llvm.arm.mve.vmovl.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, i32 0, i32 1, <4 x i1> %1, <4 x i32> %inactive)
+ ret <4 x i32> %2
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vmovltq_m_u8(<8 x i16> %inactive, <16 x i8> %a, i16 zeroext %p) {
+; LE-LABEL: test_vmovltq_m_u8:
+; LE: @ %bb.0: @ %entry
+; LE-NEXT: vmsr p0, r0
+; LE-NEXT: vpst
+; LE-NEXT: vmovltt.u8 q0, q1
+; LE-NEXT: bx lr
+;
+; BE-LABEL: test_vmovltq_m_u8:
+; BE: @ %bb.0: @ %entry
+; BE-NEXT: vrev64.16 q2, q0
+; BE-NEXT: vrev64.8 q0, q1
+; BE-NEXT: vmsr p0, r0
+; BE-NEXT: vpst
+; BE-NEXT: vmovltt.u8 q2, q0
+; BE-NEXT: vrev64.16 q0, q2
+; BE-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+ %2 = tail call <8 x i16> @llvm.arm.mve.vmovl.predicated.v8i16.v16i8.v8i1(<16 x i8> %a, i32 1, i32 1, <8 x i1> %1, <8 x i16> %inactive)
+ ret <8 x i16> %2
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vmovltq_m_u16(<4 x i32> %inactive, <8 x i16> %a, i16 zeroext %p) {
+; LE-LABEL: test_vmovltq_m_u16:
+; LE: @ %bb.0: @ %entry
+; LE-NEXT: vmsr p0, r0
+; LE-NEXT: vpst
+; LE-NEXT: vmovltt.u16 q0, q1
+; LE-NEXT: bx lr
+;
+; BE-LABEL: test_vmovltq_m_u16:
+; BE: @ %bb.0: @ %entry
+; BE-NEXT: vrev64.32 q2, q0
+; BE-NEXT: vrev64.16 q0, q1
+; BE-NEXT: vmsr p0, r0
+; BE-NEXT: vpst
+; BE-NEXT: vmovltt.u16 q2, q0
+; BE-NEXT: vrev64.32 q0, q2
+; BE-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+ %2 = tail call <4 x i32> @llvm.arm.mve.vmovl.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, i32 1, i32 1, <4 x i1> %1, <4 x i32> %inactive)
+ ret <4 x i32> %2
+}
+
+declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32)
+declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32)
+declare <8 x i16> @llvm.arm.mve.vmovl.predicated.v8i16.v16i8.v8i1(<16 x i8>, i32, i32, <8 x i1>, <8 x i16>)
+declare <4 x i32> @llvm.arm.mve.vmovl.predicated.v4i32.v8i16.v4i1(<8 x i16>, i32, i32, <4 x i1>, <4 x i32>)
ret <8 x i16> %2
}
+define arm_aapcs_vfpcc <16 x i8> @test_vmovnbq_m_s16(<16 x i8> %a, <8 x i16> %b, i16 zeroext %p) {
+; LE-LABEL: test_vmovnbq_m_s16:
+; LE: @ %bb.0: @ %entry
+; LE-NEXT: vmsr p0, r0
+; LE-NEXT: vpst
+; LE-NEXT: vmovnbt.i16 q0, q1
+; LE-NEXT: bx lr
+;
+; BE-LABEL: test_vmovnbq_m_s16:
+; BE: @ %bb.0: @ %entry
+; BE-NEXT: vrev64.16 q2, q1
+; BE-NEXT: vrev64.8 q1, q0
+; BE-NEXT: vmsr p0, r0
+; BE-NEXT: vpst
+; BE-NEXT: vmovnbt.i16 q1, q2
+; BE-NEXT: vrev64.8 q0, q1
+; BE-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+ %2 = tail call <16 x i8> @llvm.arm.mve.vmovn.predicated.v16i8.v8i16.v8i1(<16 x i8> %a, <8 x i16> %b, i32 0, <8 x i1> %1)
+ ret <16 x i8> %2
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vmovnbq_m_s32(<8 x i16> %a, <4 x i32> %b, i16 zeroext %p) {
+; LE-LABEL: test_vmovnbq_m_s32:
+; LE: @ %bb.0: @ %entry
+; LE-NEXT: vmsr p0, r0
+; LE-NEXT: vpst
+; LE-NEXT: vmovnbt.i32 q0, q1
+; LE-NEXT: bx lr
+;
+; BE-LABEL: test_vmovnbq_m_s32:
+; BE: @ %bb.0: @ %entry
+; BE-NEXT: vrev64.32 q2, q1
+; BE-NEXT: vrev64.16 q1, q0
+; BE-NEXT: vmsr p0, r0
+; BE-NEXT: vpst
+; BE-NEXT: vmovnbt.i32 q1, q2
+; BE-NEXT: vrev64.16 q0, q1
+; BE-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+ %2 = tail call <8 x i16> @llvm.arm.mve.vmovn.predicated.v8i16.v4i32.v4i1(<8 x i16> %a, <4 x i32> %b, i32 0, <4 x i1> %1)
+ ret <8 x i16> %2
+}
+
+define arm_aapcs_vfpcc <16 x i8> @test_vmovnbq_m_u16(<16 x i8> %a, <8 x i16> %b, i16 zeroext %p) {
+; LE-LABEL: test_vmovnbq_m_u16:
+; LE: @ %bb.0: @ %entry
+; LE-NEXT: vmsr p0, r0
+; LE-NEXT: vpst
+; LE-NEXT: vmovnbt.i16 q0, q1
+; LE-NEXT: bx lr
+;
+; BE-LABEL: test_vmovnbq_m_u16:
+; BE: @ %bb.0: @ %entry
+; BE-NEXT: vrev64.16 q2, q1
+; BE-NEXT: vrev64.8 q1, q0
+; BE-NEXT: vmsr p0, r0
+; BE-NEXT: vpst
+; BE-NEXT: vmovnbt.i16 q1, q2
+; BE-NEXT: vrev64.8 q0, q1
+; BE-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+ %2 = tail call <16 x i8> @llvm.arm.mve.vmovn.predicated.v16i8.v8i16.v8i1(<16 x i8> %a, <8 x i16> %b, i32 0, <8 x i1> %1)
+ ret <16 x i8> %2
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vmovnbq_m_u32(<8 x i16> %a, <4 x i32> %b, i16 zeroext %p) {
+; LE-LABEL: test_vmovnbq_m_u32:
+; LE: @ %bb.0: @ %entry
+; LE-NEXT: vmsr p0, r0
+; LE-NEXT: vpst
+; LE-NEXT: vmovnbt.i32 q0, q1
+; LE-NEXT: bx lr
+;
+; BE-LABEL: test_vmovnbq_m_u32:
+; BE: @ %bb.0: @ %entry
+; BE-NEXT: vrev64.32 q2, q1
+; BE-NEXT: vrev64.16 q1, q0
+; BE-NEXT: vmsr p0, r0
+; BE-NEXT: vpst
+; BE-NEXT: vmovnbt.i32 q1, q2
+; BE-NEXT: vrev64.16 q0, q1
+; BE-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+ %2 = tail call <8 x i16> @llvm.arm.mve.vmovn.predicated.v8i16.v4i32.v4i1(<8 x i16> %a, <4 x i32> %b, i32 0, <4 x i1> %1)
+ ret <8 x i16> %2
+}
+
+define arm_aapcs_vfpcc <16 x i8> @test_vmovntq_m_s16(<16 x i8> %a, <8 x i16> %b, i16 zeroext %p) {
+; LE-LABEL: test_vmovntq_m_s16:
+; LE: @ %bb.0: @ %entry
+; LE-NEXT: vmsr p0, r0
+; LE-NEXT: vpst
+; LE-NEXT: vmovntt.i16 q0, q1
+; LE-NEXT: bx lr
+;
+; BE-LABEL: test_vmovntq_m_s16:
+; BE: @ %bb.0: @ %entry
+; BE-NEXT: vrev64.16 q2, q1
+; BE-NEXT: vrev64.8 q1, q0
+; BE-NEXT: vmsr p0, r0
+; BE-NEXT: vpst
+; BE-NEXT: vmovntt.i16 q1, q2
+; BE-NEXT: vrev64.8 q0, q1
+; BE-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+ %2 = tail call <16 x i8> @llvm.arm.mve.vmovn.predicated.v16i8.v8i16.v8i1(<16 x i8> %a, <8 x i16> %b, i32 1, <8 x i1> %1)
+ ret <16 x i8> %2
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vmovntq_m_s32(<8 x i16> %a, <4 x i32> %b, i16 zeroext %p) {
+; LE-LABEL: test_vmovntq_m_s32:
+; LE: @ %bb.0: @ %entry
+; LE-NEXT: vmsr p0, r0
+; LE-NEXT: vpst
+; LE-NEXT: vmovntt.i32 q0, q1
+; LE-NEXT: bx lr
+;
+; BE-LABEL: test_vmovntq_m_s32:
+; BE: @ %bb.0: @ %entry
+; BE-NEXT: vrev64.32 q2, q1
+; BE-NEXT: vrev64.16 q1, q0
+; BE-NEXT: vmsr p0, r0
+; BE-NEXT: vpst
+; BE-NEXT: vmovntt.i32 q1, q2
+; BE-NEXT: vrev64.16 q0, q1
+; BE-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+ %2 = tail call <8 x i16> @llvm.arm.mve.vmovn.predicated.v8i16.v4i32.v4i1(<8 x i16> %a, <4 x i32> %b, i32 1, <4 x i1> %1)
+ ret <8 x i16> %2
+}
+
+define arm_aapcs_vfpcc <16 x i8> @test_vmovntq_m_u16(<16 x i8> %a, <8 x i16> %b, i16 zeroext %p) {
+; LE-LABEL: test_vmovntq_m_u16:
+; LE: @ %bb.0: @ %entry
+; LE-NEXT: vmsr p0, r0
+; LE-NEXT: vpst
+; LE-NEXT: vmovntt.i16 q0, q1
+; LE-NEXT: bx lr
+;
+; BE-LABEL: test_vmovntq_m_u16:
+; BE: @ %bb.0: @ %entry
+; BE-NEXT: vrev64.16 q2, q1
+; BE-NEXT: vrev64.8 q1, q0
+; BE-NEXT: vmsr p0, r0
+; BE-NEXT: vpst
+; BE-NEXT: vmovntt.i16 q1, q2
+; BE-NEXT: vrev64.8 q0, q1
+; BE-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+ %2 = tail call <16 x i8> @llvm.arm.mve.vmovn.predicated.v16i8.v8i16.v8i1(<16 x i8> %a, <8 x i16> %b, i32 1, <8 x i1> %1)
+ ret <16 x i8> %2
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vmovntq_m_u32(<8 x i16> %a, <4 x i32> %b, i16 zeroext %p) {
+; LE-LABEL: test_vmovntq_m_u32:
+; LE: @ %bb.0: @ %entry
+; LE-NEXT: vmsr p0, r0
+; LE-NEXT: vpst
+; LE-NEXT: vmovntt.i32 q0, q1
+; LE-NEXT: bx lr
+;
+; BE-LABEL: test_vmovntq_m_u32:
+; BE: @ %bb.0: @ %entry
+; BE-NEXT: vrev64.32 q2, q1
+; BE-NEXT: vrev64.16 q1, q0
+; BE-NEXT: vmsr p0, r0
+; BE-NEXT: vpst
+; BE-NEXT: vmovntt.i32 q1, q2
+; BE-NEXT: vrev64.16 q0, q1
+; BE-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+ %2 = tail call <8 x i16> @llvm.arm.mve.vmovn.predicated.v8i16.v4i32.v4i1(<8 x i16> %a, <4 x i32> %b, i32 1, <4 x i1> %1)
+ ret <8 x i16> %2
+}
+
declare <8 x i16> @llvm.arm.mve.vreinterpretq.v8i16.v16i8(<16 x i8>)
declare <4 x i32> @llvm.arm.mve.vreinterpretq.v4i32.v8i16(<8 x i16>)
+declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32)
+declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32)
+declare <16 x i8> @llvm.arm.mve.vmovn.predicated.v16i8.v8i16.v8i1(<16 x i8>, <8 x i16>, i32, <8 x i1>)
+declare <8 x i16> @llvm.arm.mve.vmovn.predicated.v8i16.v4i32.v4i1(<8 x i16>, <4 x i32>, i32, <4 x i1>)
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s
+
+define arm_aapcs_vfpcc <16 x i8> @test_vrev16q_m_i8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vrev16q_m_i8:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vrev16t.8 q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
+ %2 = tail call <16 x i8> @llvm.arm.mve.vrev.predicated.v16i8.v16i1(<16 x i8> %a, i32 16, <16 x i1> %1, <16 x i8> %inactive)
+ ret <16 x i8> %2
+}
+
+define arm_aapcs_vfpcc <16 x i8> @test_vrev32q_m_i8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vrev32q_m_i8:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vrev32t.8 q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
+ %2 = tail call <16 x i8> @llvm.arm.mve.vrev.predicated.v16i8.v16i1(<16 x i8> %a, i32 32, <16 x i1> %1, <16 x i8> %inactive)
+ ret <16 x i8> %2
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vrev32q_m_i16(<8 x i16> %inactive, <8 x i16> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vrev32q_m_i16:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vrev32t.16 q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+ %2 = tail call <8 x i16> @llvm.arm.mve.vrev.predicated.v8i16.v8i1(<8 x i16> %a, i32 32, <8 x i1> %1, <8 x i16> %inactive)
+ ret <8 x i16> %2
+}
+
+define arm_aapcs_vfpcc <8 x half> @test_vrev32q_m_f16(<8 x half> %inactive, <8 x half> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vrev32q_m_f16:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vrev32t.16 q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+ %2 = tail call <8 x half> @llvm.arm.mve.vrev.predicated.v8f16.v8i1(<8 x half> %a, i32 32, <8 x i1> %1, <8 x half> %inactive)
+ ret <8 x half> %2
+}
+
+define arm_aapcs_vfpcc <16 x i8> @test_vrev64q_m_i8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vrev64q_m_i8:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vrev64t.8 q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
+ %2 = tail call <16 x i8> @llvm.arm.mve.vrev.predicated.v16i8.v16i1(<16 x i8> %a, i32 64, <16 x i1> %1, <16 x i8> %inactive)
+ ret <16 x i8> %2
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vrev64q_m_i16(<8 x i16> %inactive, <8 x i16> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vrev64q_m_i16:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vrev64t.16 q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+ %2 = tail call <8 x i16> @llvm.arm.mve.vrev.predicated.v8i16.v8i1(<8 x i16> %a, i32 64, <8 x i1> %1, <8 x i16> %inactive)
+ ret <8 x i16> %2
+}
+
+define arm_aapcs_vfpcc <8 x half> @test_vrev64q_m_f16(<8 x half> %inactive, <8 x half> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vrev64q_m_f16:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vrev64t.16 q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+ %2 = tail call <8 x half> @llvm.arm.mve.vrev.predicated.v8f16.v8i1(<8 x half> %a, i32 64, <8 x i1> %1, <8 x half> %inactive)
+ ret <8 x half> %2
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vrev64q_m_i32(<4 x i32> %inactive, <4 x i32> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vrev64q_m_i32:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vrev64t.32 q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+ %2 = tail call <4 x i32> @llvm.arm.mve.vrev.predicated.v4i32.v4i1(<4 x i32> %a, i32 64, <4 x i1> %1, <4 x i32> %inactive)
+ ret <4 x i32> %2
+}
+
+define arm_aapcs_vfpcc <4 x float> @test_vrev64q_m_f32(<4 x float> %inactive, <4 x float> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vrev64q_m_f32:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vrev64t.32 q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+ %2 = tail call <4 x float> @llvm.arm.mve.vrev.predicated.v4f32.v4i1(<4 x float> %a, i32 64, <4 x i1> %1, <4 x float> %inactive)
+ ret <4 x float> %2
+}
+
+declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32)
+declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32)
+declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32)
+
+declare <16 x i8> @llvm.arm.mve.vrev.predicated.v16i8.v16i1(<16 x i8>, i32, <16 x i1>, <16 x i8>)
+declare <8 x i16> @llvm.arm.mve.vrev.predicated.v8i16.v8i1(<8 x i16>, i32, <8 x i1>, <8 x i16>)
+declare <8 x half> @llvm.arm.mve.vrev.predicated.v8f16.v8i1(<8 x half>, i32, <8 x i1>, <8 x half>)
+declare <4 x i32> @llvm.arm.mve.vrev.predicated.v4i32.v4i1(<4 x i32>, i32, <4 x i1>, <4 x i32>)
+declare <4 x float> @llvm.arm.mve.vrev.predicated.v4f32.v4i1(<4 x float>, i32, <4 x i1>, <4 x float>)
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s
+
+define arm_aapcs_vfpcc <8 x half> @test_vrndaq_m_f16(<8 x half> %inactive, <8 x half> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vrndaq_m_f16:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vrintat.f16 q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+ %2 = tail call <8 x half> @llvm.arm.mve.vrinta.predicated.v8f16.v8i1(<8 x half> %a, <8 x i1> %1, <8 x half> %inactive)
+ ret <8 x half> %2
+}
+
+define arm_aapcs_vfpcc <4 x float> @test_vrndaq_m_f32(<4 x float> %inactive, <4 x float> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vrndaq_m_f32:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vrintat.f32 q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+ %2 = tail call <4 x float> @llvm.arm.mve.vrinta.predicated.v4f32.v4i1(<4 x float> %a, <4 x i1> %1, <4 x float> %inactive)
+ ret <4 x float> %2
+}
+
+define arm_aapcs_vfpcc <8 x half> @test_vrndmq_m_f16(<8 x half> %inactive, <8 x half> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vrndmq_m_f16:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vrintmt.f16 q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+ %2 = tail call <8 x half> @llvm.arm.mve.vrintm.predicated.v8f16.v8i1(<8 x half> %a, <8 x i1> %1, <8 x half> %inactive)
+ ret <8 x half> %2
+}
+
+define arm_aapcs_vfpcc <4 x float> @test_vrndmq_m_f32(<4 x float> %inactive, <4 x float> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vrndmq_m_f32:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vrintmt.f32 q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+ %2 = tail call <4 x float> @llvm.arm.mve.vrintm.predicated.v4f32.v4i1(<4 x float> %a, <4 x i1> %1, <4 x float> %inactive)
+ ret <4 x float> %2
+}
+
+define arm_aapcs_vfpcc <8 x half> @test_vrndnq_m_f16(<8 x half> %inactive, <8 x half> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vrndnq_m_f16:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vrintnt.f16 q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+ %2 = tail call <8 x half> @llvm.arm.mve.vrintn.predicated.v8f16.v8i1(<8 x half> %a, <8 x i1> %1, <8 x half> %inactive)
+ ret <8 x half> %2
+}
+
+define arm_aapcs_vfpcc <4 x float> @test_vrndnq_m_f32(<4 x float> %inactive, <4 x float> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vrndnq_m_f32:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vrintnt.f32 q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+ %2 = tail call <4 x float> @llvm.arm.mve.vrintn.predicated.v4f32.v4i1(<4 x float> %a, <4 x i1> %1, <4 x float> %inactive)
+ ret <4 x float> %2
+}
+
+define arm_aapcs_vfpcc <8 x half> @test_vrndpq_m_f16(<8 x half> %inactive, <8 x half> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vrndpq_m_f16:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vrintpt.f16 q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+ %2 = tail call <8 x half> @llvm.arm.mve.vrintp.predicated.v8f16.v8i1(<8 x half> %a, <8 x i1> %1, <8 x half> %inactive)
+ ret <8 x half> %2
+}
+
+define arm_aapcs_vfpcc <4 x float> @test_vrndpq_m_f32(<4 x float> %inactive, <4 x float> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vrndpq_m_f32:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vrintpt.f32 q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+ %2 = tail call <4 x float> @llvm.arm.mve.vrintp.predicated.v4f32.v4i1(<4 x float> %a, <4 x i1> %1, <4 x float> %inactive)
+ ret <4 x float> %2
+}
+
+define arm_aapcs_vfpcc <8 x half> @test_vrndq_m_f16(<8 x half> %inactive, <8 x half> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vrndq_m_f16:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vrintzt.f16 q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+ %2 = tail call <8 x half> @llvm.arm.mve.vrintz.predicated.v8f16.v8i1(<8 x half> %a, <8 x i1> %1, <8 x half> %inactive)
+ ret <8 x half> %2
+}
+
+define arm_aapcs_vfpcc <4 x float> @test_vrndq_m_f32(<4 x float> %inactive, <4 x float> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vrndq_m_f32:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vrintzt.f32 q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+ %2 = tail call <4 x float> @llvm.arm.mve.vrintz.predicated.v4f32.v4i1(<4 x float> %a, <4 x i1> %1, <4 x float> %inactive)
+ ret <4 x float> %2
+}
+
+define arm_aapcs_vfpcc <8 x half> @test_vrndxq_m_f16(<8 x half> %inactive, <8 x half> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vrndxq_m_f16:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vrintxt.f16 q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+ %2 = tail call <8 x half> @llvm.arm.mve.vrintx.predicated.v8f16.v8i1(<8 x half> %a, <8 x i1> %1, <8 x half> %inactive)
+ ret <8 x half> %2
+}
+
+define arm_aapcs_vfpcc <4 x float> @test_vrndxq_m_f32(<4 x float> %inactive, <4 x float> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vrndxq_m_f32:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmsr p0, r0
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vrintxt.f32 q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = zext i16 %p to i32
+ %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+ %2 = tail call <4 x float> @llvm.arm.mve.vrintx.predicated.v4f32.v4i1(<4 x float> %a, <4 x i1> %1, <4 x float> %inactive)
+ ret <4 x float> %2
+}
+
+declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32)
+declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32)
+declare <8 x half> @llvm.arm.mve.vrinta.predicated.v8f16.v8i1(<8 x half>, <8 x i1>, <8 x half>)
+declare <4 x float> @llvm.arm.mve.vrinta.predicated.v4f32.v4i1(<4 x float>, <4 x i1>, <4 x float>)
+declare <8 x half> @llvm.arm.mve.vrintm.predicated.v8f16.v8i1(<8 x half>, <8 x i1>, <8 x half>)
+declare <4 x float> @llvm.arm.mve.vrintm.predicated.v4f32.v4i1(<4 x float>, <4 x i1>, <4 x float>)
+declare <8 x half> @llvm.arm.mve.vrintn.predicated.v8f16.v8i1(<8 x half>, <8 x i1>, <8 x half>)
+declare <4 x float> @llvm.arm.mve.vrintn.predicated.v4f32.v4i1(<4 x float>, <4 x i1>, <4 x float>)
+declare <8 x half> @llvm.arm.mve.vrintp.predicated.v8f16.v8i1(<8 x half>, <8 x i1>, <8 x half>)
+declare <4 x float> @llvm.arm.mve.vrintp.predicated.v4f32.v4i1(<4 x float>, <4 x i1>, <4 x float>)
+declare <8 x half> @llvm.arm.mve.vrintz.predicated.v8f16.v8i1(<8 x half>, <8 x i1>, <8 x half>)
+declare <4 x float> @llvm.arm.mve.vrintz.predicated.v4f32.v4i1(<4 x float>, <4 x i1>, <4 x float>)
+declare <8 x half> @llvm.arm.mve.vrintx.predicated.v8f16.v8i1(<8 x half>, <8 x i1>, <8 x half>)
+declare <4 x float> @llvm.arm.mve.vrintx.predicated.v4f32.v4i1(<4 x float>, <4 x i1>, <4 x float>)