[ARM,MVE] Add predicated intrinsics for many unary functions.

author Simon Tatham <simon.tatham@arm.com>

Wed, 26 Feb 2020 14:10:43 +0000 (14:10 +0000)

committer Simon Tatham <simon.tatham@arm.com>

Wed, 26 Feb 2020 15:12:07 +0000 (15:12 +0000)
author Simon Tatham <simon.tatham@arm.com>
Wed, 26 Feb 2020 14:10:43 +0000 (14:10 +0000)
committer Simon Tatham <simon.tatham@arm.com>
Wed, 26 Feb 2020 15:12:07 +0000 (15:12 +0000)
diff --git a/clang/include/clang/Basic/arm_mve.td b/clang/include/clang/Basic/arm_mve.td

index 7150852d700400af9d6d42fa58e87c8153fb63fe..efc6be1158b8540fda0d8d845cbfde8c1c463b56 100644 (file)
--- a/clang/include/clang/Basic/arm_mve.td
+++ b/clang/include/clang/Basic/arm_mve.td
@@ -237,11 +237,18 @@ let params = T.Unsigned in {
  let params = T.Int in {
    def vmvnq: Intrinsic<Vector, (args Vector:$a),
                         (xor $a, (uint_max Vector))>;
+  defm vmvnq: IntrinsicMX<Vector, (args Vector:$a, Predicate:$pred),
+    (IRInt<"mvn_predicated", [Vector, Predicate]> $a, $pred, $inactive)>;
    def vclzq: Intrinsic<Vector, (args Vector:$a),
                         (IRIntBase<"ctlz", [Vector]> $a, (i1 0))>;
+  defm vclzq: IntrinsicMX<Vector, (args Vector:$a, Predicate:$pred),
+    (IRInt<"clz_predicated", [Vector, Predicate]> $a, $pred, $inactive)>;
  }
  let params = T.Signed in {
    def vclsq: Intrinsic<Vector, (args Vector:$a), (IRInt<"vcls", [Vector]> $a)>;
+  defm vclsq: IntrinsicMX<Vector, (args Vector:$a, Predicate:$pred),
+    (IRInt<"cls_predicated", [Vector, Predicate]> $a, $pred, $inactive)>;
+
    def vnegq: Intrinsic<Vector, (args Vector:$a),
                         (sub (zeroinit Vector), $a)>;
    def vabsq: Intrinsic<Vector, (args Vector:$a),
@@ -256,6 +263,18 @@ let params = T.Signed in {
                                  (select (icmp_eq $a, (int_min Vector)),
                                          (int_max Vector),
                                          (sub (zeroinit Vector), $a)))>;
+
+  foreach name = ["qneg", "qabs"] in {
+    defm v#name#q: IntrinsicMX<Vector, (args Vector:$a, Predicate:$pred),
+      (IRInt<name#"_predicated", [Vector, Predicate]> $a, $pred, $inactive),
+      0 /* no _x variant for saturating intrinsics */>;
+  }
+}
+let params = !listconcat(T.Signed, T.Float) in {
+  foreach name = ["neg", "abs"] in {
+    defm v#name#q: IntrinsicMX<Vector, (args Vector:$a, Predicate:$pred),
+      (IRInt<name#"_predicated", [Vector, Predicate]> $a, $pred, $inactive)>;
+  }
  }
  let params = T.Float in {
    def vnegq_f: Intrinsic<Vector, (args Vector:$a), (fneg $a)>,
@@ -440,49 +459,77 @@ multiclass float_int_conversions<Type FScalar, Type IScalar, IRBuilderBase ftoi,
    defvar FVector = VecOf<FScalar>;
    defvar IVector = VecOf<IScalar>;
  
-  let params = [IScalar], pnt = PNT_2Type in
-    def : Intrinsic<FVector, (args IVector:$a), (itof $a, FVector)>,
-          NameOverride<"vcvtq_" # FScalar>;
-  let params = [FScalar], pnt = PNT_None in
-    def : Intrinsic<IVector, (args FVector:$a), (ftoi $a, IVector)>,
-          NameOverride<"vcvtq_" # IScalar>;
+  let params = [IScalar] in {
+    let pnt = PNT_2Type in {
+      def : Intrinsic<FVector, (args IVector:$a), (itof $a, FVector)>,
+            NameOverride<"vcvtq_" # FScalar>;
+    }
+    defm vcvtq: IntrinsicMX<FVector, (args IVector:$a, Predicate:$pred),
+        (IRInt<"vcvt_fp_int_predicated", [FVector, IVector, Predicate]>
+            $a, (unsignedflag IScalar), $pred, $inactive),
+        1, "_" # FScalar, PNT_2Type, PNT_2Type>;
+  }
+  let params = [FScalar] in {
+    let pnt = PNT_None in {
+      def : Intrinsic<IVector, (args FVector:$a), (ftoi $a, IVector)>,
+            NameOverride<"vcvtq_" # IScalar>;
+    }
+    defm vcvtq: IntrinsicMX<IVector, (args FVector:$a, Predicate:$pred),
+        (IRInt<"vcvt_fp_int_predicated", [IVector, FVector, Predicate]>
+            $a, (unsignedflag IScalar), $pred, $inactive),
+        1, "_" # IScalar, PNT_2Type, PNT_None>;
+  }
  }
  
-defm : float_int_conversions<f32, u32, fptoui, uitofp>;
-defm : float_int_conversions<f16, u16, fptoui, uitofp>;
-defm : float_int_conversions<f32, s32, fptosi, sitofp>;
-defm : float_int_conversions<f16, s16, fptosi, sitofp>;
+defm "" : float_int_conversions<f32, u32, fptoui, uitofp>;
+defm "" : float_int_conversions<f16, u16, fptoui, uitofp>;
+defm "" : float_int_conversions<f32, s32, fptosi, sitofp>;
+defm "" : float_int_conversions<f16, s16, fptosi, sitofp>;
  
-let params = [s8, u8, s16, u16] in {
-  def vmovlbq: Intrinsic<DblVector, (args Vector:$a),
-    (extend (unzip $a, 0), DblVector, (unsignedflag Scalar))>;
-  def vmovltq: Intrinsic<DblVector, (args Vector:$a),
-    (extend (unzip $a, 1), DblVector, (unsignedflag Scalar))>;
+multiclass vmovl<bit top> {
+  let params = [s8, u8, s16, u16] in {
+    def "": Intrinsic<DblVector, (args Vector:$a),
+      (extend (unzip $a, top), DblVector, (unsignedflag Scalar))>;
+    defm "": IntrinsicMX<DblVector, (args Vector:$a, DblPredicate:$pred),
+        (IRInt<"vmovl_predicated", [DblVector, Vector, DblPredicate]>
+         $a, (unsignedflag Scalar), top, $pred, $inactive)>;
+  }
  }
  
-let params = [s16, u16, s32, u32] in {
-  def vmovnbq: Intrinsic<HalfVector, (args HalfVector:$inactive, Vector:$a),
-    (trunc (zip $a, (vreinterpret (vrev $inactive, (bitsize Scalar)), Vector)),
-           HalfVector)>;
-  def vmovntq: Intrinsic<HalfVector, (args HalfVector:$inactive, Vector:$a),
-    (trunc (zip (vreinterpret $inactive, Vector), $a), HalfVector)>;
+defm vmovlbq: vmovl<0>;
+defm vmovltq: vmovl<1>;
+
+multiclass vmovn<bit top, dag wide_result> {
+  let params = [s16, u16, s32, u32] in {
+    def "": Intrinsic<HalfVector, (args HalfVector:$inactive, Vector:$a),
+      (trunc wide_result, HalfVector)>;
+    def _m: Intrinsic<HalfVector, (args HalfVector:$inactive, Vector:$a,
+                                        Predicate:$pred),
+      (IRInt<"vmovn_predicated", [HalfVector, Vector, Predicate]>
+          $inactive, $a, top, $pred)>;
+  }
  }
  
-let params = T.Float in {
-  def vrndq: Intrinsic<Vector, (args Vector:$a),
-      (IRIntBase<"trunc", [Vector]> $a)>;
-  def vrndmq: Intrinsic<Vector, (args Vector:$a),
-      (IRIntBase<"floor", [Vector]> $a)>;
-  def vrndpq: Intrinsic<Vector, (args Vector:$a),
-      (IRIntBase<"ceil", [Vector]> $a)>;
-  def vrndaq: Intrinsic<Vector, (args Vector:$a),
-      (IRIntBase<"round", [Vector]> $a)>;
-  def vrndxq: Intrinsic<Vector, (args Vector:$a),
-      (IRIntBase<"rint", [Vector]> $a)>;
-  def vrndnq: Intrinsic<Vector, (args Vector:$a),
-      (IRInt<"vrintn", [Vector]> $a)>;
+defm vmovntq: vmovn<1, (zip (vreinterpret $inactive, Vector), $a)>;
+defm vmovnbq: vmovn<0,
+   (zip $a, (vreinterpret (vrev $inactive, (bitsize Scalar)), Vector))>;
+
+multiclass vrnd<IRIntBase ir_int, string suffix> {
+  let params = T.Float in {
+    def "": Intrinsic<Vector, (args Vector:$a), (ir_int $a)>;
+    defm "": IntrinsicMX<Vector, (args Vector:$a, Predicate:$pred),
+        (IRInt<"vrint"#suffix#"_predicated", [Vector, Predicate]>
+            $a, $pred, $inactive)>;
+  }
  }
  
+defm vrndq:  vrnd<IRIntBase<"trunc", [Vector]>, "z">;
+defm vrndmq: vrnd<IRIntBase<"floor", [Vector]>, "m">;
+defm vrndpq: vrnd<IRIntBase<"ceil",  [Vector]>, "p">;
+defm vrndaq: vrnd<IRIntBase<"round", [Vector]>, "a">;
+defm vrndxq: vrnd<IRIntBase<"rint",  [Vector]>, "x">;
+defm vrndnq: vrnd<IRInt<"vrintn",    [Vector]>, "n">;
+
  multiclass compare_with_pred<string condname, dag arguments,
                               dag cmp, string suffix> {
    // Make the predicated and unpredicated versions of a single comparison.
@@ -1231,12 +1278,24 @@ defm vrmlsldavh : MVEBinaryVectorHoriz64R<V.True, V.False, "">;
  defm vrmlsldavh : MVEBinaryVectorHoriz64R<V.True, V.True, "x">;
  }
  
-let params = T.All8 in
-def vrev16q : Intrinsic<Vector, (args Vector:$a), (vrev $a, 16)>;
-let params = !listconcat(T.All8, T.All16) in
-def vrev32q : Intrinsic<Vector, (args Vector:$a), (vrev $a, 32)>;
-let params = T.Usual in
-def vrev64q : Intrinsic<Vector, (args Vector:$a), (vrev $a, 64)>;
+multiclass vrev_predicated<int revsize> {
+  defm "" : IntrinsicMX<Vector, (args Vector:$a, Predicate:$pred),
+      (IRInt<"vrev_predicated", [Vector, Predicate]>
+          $a, revsize, $pred, $inactive)>;
+}
+
+let params = T.All8 in {
+  def vrev16q : Intrinsic<Vector, (args Vector:$a), (vrev $a, 16)>;
+  defm vrev16q: vrev_predicated<16>;
+}
+let params = !listconcat(T.All8, T.All16) in {
+  def vrev32q : Intrinsic<Vector, (args Vector:$a), (vrev $a, 32)>;
+  defm vrev32q: vrev_predicated<32>;
+}
+let params = T.Usual in {
+  def vrev64q : Intrinsic<Vector, (args Vector:$a), (vrev $a, 64)>;
+  defm vrev64q: vrev_predicated<64>;
+}
  
  foreach desttype = T.All in {
    // We want a vreinterpretq between every pair of supported vector types
diff --git a/clang/include/clang/Basic/arm_mve_defs.td b/clang/include/clang/Basic/arm_mve_defs.td

index 776dc9c73da42f6a4efe46f61edf80a437caf679..dbcad78cce75273cf1cfa67575821299197a410c 100644 (file)
--- a/clang/include/clang/Basic/arm_mve_defs.td
+++ b/clang/include/clang/Basic/arm_mve_defs.td
@@ -506,8 +506,8 @@ multiclass IntrinsicMX<Type rettype, dag arguments, dag cg,
    // provides the input value of the output register, i.e. all the
    // inactive lanes in the predicated operation take their values from
    // this.
-  def "_m" # nameSuffix:
-     Intrinsic<rettype, !con((args rettype:$inactive), arguments), cg> {
+  def : Intrinsic<rettype, !con((args rettype:$inactive), arguments), cg>,
+        NameOverride<NAME # "_m" # nameSuffix> {
      let pnt = pnt_m;
    }
  
@@ -515,8 +515,8 @@ multiclass IntrinsicMX<Type rettype, dag arguments, dag cg,
      // The _x variant leaves off that parameter, and simply uses an
      // undef value of the same type.
  
-    def "_x" # nameSuffix:
-      Intrinsic<rettype, arguments, (seq (undef rettype):$inactive, cg)> {
+    def : Intrinsic<rettype, arguments, (seq (undef rettype):$inactive, cg)>,
+          NameOverride<NAME # "_x" # nameSuffix> {
        let pnt = pnt_x;
      }
    }
diff --git a/clang/test/CodeGen/arm-mve-intrinsics/absneg.c b/clang/test/CodeGen/arm-mve-intrinsics/absneg.c

index db4253f3590b9c68e2ba53c73c500183e454a233..94339c83480939257a842adf8634801c952a74b0 100644 (file)
--- a/clang/test/CodeGen/arm-mve-intrinsics/absneg.c
+++ b/clang/test/CodeGen/arm-mve-intrinsics/absneg.c
@@ -164,6 +164,198 @@ uint32x4_t test_vmvnq_u32(uint32x4_t a)
  #endif /* POLYMORPHIC */
  }
  
+// CHECK-LABEL: @test_vmvnq_m_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.mvn.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <16 x i8> [[TMP2]]
+//
+int8x16_t test_vmvnq_m_s8(int8x16_t inactive, int8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vmvnq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vmvnq_m_s8(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmvnq_m_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.mvn.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+//
+int16x8_t test_vmvnq_m_s16(int16x8_t inactive, int16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vmvnq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vmvnq_m_s16(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmvnq_m_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.mvn.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+int32x4_t test_vmvnq_m_s32(int32x4_t inactive, int32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vmvnq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vmvnq_m_s32(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmvnq_m_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.mvn.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <16 x i8> [[TMP2]]
+//
+uint8x16_t test_vmvnq_m_u8(uint8x16_t inactive, uint8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vmvnq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vmvnq_m_u8(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmvnq_m_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.mvn.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+//
+uint16x8_t test_vmvnq_m_u16(uint16x8_t inactive, uint16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vmvnq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vmvnq_m_u16(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmvnq_m_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.mvn.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+uint32x4_t test_vmvnq_m_u32(uint32x4_t inactive, uint32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vmvnq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vmvnq_m_u32(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmvnq_x_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.mvn.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i1> [[TMP1]], <16 x i8> undef)
+// CHECK-NEXT:    ret <16 x i8> [[TMP2]]
+//
+int8x16_t test_vmvnq_x_s8(int8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vmvnq_x(a, p);
+#else /* POLYMORPHIC */
+    return vmvnq_x_s8(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmvnq_x_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.mvn.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i1> [[TMP1]], <8 x i16> undef)
+// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+//
+int16x8_t test_vmvnq_x_s16(int16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vmvnq_x(a, p);
+#else /* POLYMORPHIC */
+    return vmvnq_x_s16(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmvnq_x_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.mvn.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i1> [[TMP1]], <4 x i32> undef)
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+int32x4_t test_vmvnq_x_s32(int32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vmvnq_x(a, p);
+#else /* POLYMORPHIC */
+    return vmvnq_x_s32(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmvnq_x_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.mvn.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i1> [[TMP1]], <16 x i8> undef)
+// CHECK-NEXT:    ret <16 x i8> [[TMP2]]
+//
+uint8x16_t test_vmvnq_x_u8(uint8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vmvnq_x(a, p);
+#else /* POLYMORPHIC */
+    return vmvnq_x_u8(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmvnq_x_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.mvn.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i1> [[TMP1]], <8 x i16> undef)
+// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+//
+uint16x8_t test_vmvnq_x_u16(uint16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vmvnq_x(a, p);
+#else /* POLYMORPHIC */
+    return vmvnq_x_u16(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmvnq_x_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.mvn.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i1> [[TMP1]], <4 x i32> undef)
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+uint32x4_t test_vmvnq_x_u32(uint32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vmvnq_x(a, p);
+#else /* POLYMORPHIC */
+    return vmvnq_x_u32(a, p);
+#endif /* POLYMORPHIC */
+}
+
  // CHECK-LABEL: @test_vnegq_f16(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = fneg <8 x half> [[A:%.*]]
@@ -335,4 +527,427 @@ int32x4_t test_vqnegq_s32(int32x4_t a)
      return vqnegq_s32(a);
  #endif /* POLYMORPHIC */
  }
+#include <arm_mve.h>
+
+// CHECK-LABEL: @test_vnegq_m_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.neg.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <8 x half> [[TMP2]]
+//
+float16x8_t test_vnegq_m_f16(float16x8_t inactive, float16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vnegq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vnegq_m_f16(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vnegq_m_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.neg.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <4 x float> [[TMP2]]
+//
+float32x4_t test_vnegq_m_f32(float32x4_t inactive, float32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vnegq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vnegq_m_f32(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vnegq_m_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.neg.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <16 x i8> [[TMP2]]
+//
+int8x16_t test_vnegq_m_s8(int8x16_t inactive, int8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vnegq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vnegq_m_s8(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vnegq_m_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.neg.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+//
+int16x8_t test_vnegq_m_s16(int16x8_t inactive, int16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vnegq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vnegq_m_s16(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vnegq_m_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.neg.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+int32x4_t test_vnegq_m_s32(int32x4_t inactive, int32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vnegq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vnegq_m_s32(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vnegq_x_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.neg.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> undef)
+// CHECK-NEXT:    ret <8 x half> [[TMP2]]
+//
+float16x8_t test_vnegq_x_f16(float16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vnegq_x(a, p);
+#else /* POLYMORPHIC */
+    return vnegq_x_f16(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vnegq_x_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.neg.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> undef)
+// CHECK-NEXT:    ret <4 x float> [[TMP2]]
+//
+float32x4_t test_vnegq_x_f32(float32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vnegq_x(a, p);
+#else /* POLYMORPHIC */
+    return vnegq_x_f32(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vnegq_x_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.neg.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i1> [[TMP1]], <16 x i8> undef)
+// CHECK-NEXT:    ret <16 x i8> [[TMP2]]
+//
+int8x16_t test_vnegq_x_s8(int8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vnegq_x(a, p);
+#else /* POLYMORPHIC */
+    return vnegq_x_s8(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vnegq_x_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.neg.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i1> [[TMP1]], <8 x i16> undef)
+// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+//
+int16x8_t test_vnegq_x_s16(int16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vnegq_x(a, p);
+#else /* POLYMORPHIC */
+    return vnegq_x_s16(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vnegq_x_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.neg.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i1> [[TMP1]], <4 x i32> undef)
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+int32x4_t test_vnegq_x_s32(int32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vnegq_x(a, p);
+#else /* POLYMORPHIC */
+    return vnegq_x_s32(a, p);
+#endif /* POLYMORPHIC */
+}
+
+#include <arm_mve.h>
+
+// CHECK-LABEL: @test_vabsq_m_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.abs.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <8 x half> [[TMP2]]
+//
+float16x8_t test_vabsq_m_f16(float16x8_t inactive, float16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vabsq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vabsq_m_f16(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vabsq_m_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.abs.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <4 x float> [[TMP2]]
+//
+float32x4_t test_vabsq_m_f32(float32x4_t inactive, float32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vabsq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vabsq_m_f32(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vabsq_m_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.abs.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <16 x i8> [[TMP2]]
+//
+int8x16_t test_vabsq_m_s8(int8x16_t inactive, int8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vabsq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vabsq_m_s8(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vabsq_m_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.abs.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+//
+int16x8_t test_vabsq_m_s16(int16x8_t inactive, int16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vabsq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vabsq_m_s16(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vabsq_m_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.abs.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+int32x4_t test_vabsq_m_s32(int32x4_t inactive, int32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vabsq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vabsq_m_s32(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vabsq_x_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.abs.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> undef)
+// CHECK-NEXT:    ret <8 x half> [[TMP2]]
+//
+float16x8_t test_vabsq_x_f16(float16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vabsq_x(a, p);
+#else /* POLYMORPHIC */
+    return vabsq_x_f16(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vabsq_x_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.abs.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> undef)
+// CHECK-NEXT:    ret <4 x float> [[TMP2]]
+//
+float32x4_t test_vabsq_x_f32(float32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vabsq_x(a, p);
+#else /* POLYMORPHIC */
+    return vabsq_x_f32(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vabsq_x_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.abs.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i1> [[TMP1]], <16 x i8> undef)
+// CHECK-NEXT:    ret <16 x i8> [[TMP2]]
+//
+int8x16_t test_vabsq_x_s8(int8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vabsq_x(a, p);
+#else /* POLYMORPHIC */
+    return vabsq_x_s8(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vabsq_x_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.abs.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i1> [[TMP1]], <8 x i16> undef)
+// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+//
+int16x8_t test_vabsq_x_s16(int16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vabsq_x(a, p);
+#else /* POLYMORPHIC */
+    return vabsq_x_s16(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vabsq_x_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.abs.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i1> [[TMP1]], <4 x i32> undef)
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+int32x4_t test_vabsq_x_s32(int32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vabsq_x(a, p);
+#else /* POLYMORPHIC */
+    return vabsq_x_s32(a, p);
+#endif /* POLYMORPHIC */
+}
+
+#include <arm_mve.h>
+
+// CHECK-LABEL: @test_vqnegq_m_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.qneg.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <16 x i8> [[TMP2]]
+//
+int8x16_t test_vqnegq_m_s8(int8x16_t inactive, int8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vqnegq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vqnegq_m_s8(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vqnegq_m_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.qneg.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+//
+int16x8_t test_vqnegq_m_s16(int16x8_t inactive, int16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vqnegq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vqnegq_m_s16(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vqnegq_m_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.qneg.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+int32x4_t test_vqnegq_m_s32(int32x4_t inactive, int32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vqnegq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vqnegq_m_s32(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+#include <arm_mve.h>
+
+// CHECK-LABEL: @test_vqabsq_m_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.qabs.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <16 x i8> [[TMP2]]
+//
+int8x16_t test_vqabsq_m_s8(int8x16_t inactive, int8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vqabsq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vqabsq_m_s8(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vqabsq_m_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.qabs.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+//
+int16x8_t test_vqabsq_m_s16(int16x8_t inactive, int16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vqabsq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vqabsq_m_s16(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vqabsq_m_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.qabs.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+int32x4_t test_vqabsq_m_s32(int32x4_t inactive, int32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vqabsq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vqabsq_m_s32(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
  
diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vclz.c b/clang/test/CodeGen/arm-mve-intrinsics/vclz.c

index 7a2ebe0a627afae6c7afd035f4dae06cc62475b7..b39ac36eb3409df4f9d16bad86e1b2ef19e7fcd4 100644 (file)
--- a/clang/test/CodeGen/arm-mve-intrinsics/vclz.c
+++ b/clang/test/CodeGen/arm-mve-intrinsics/vclz.c
@@ -130,3 +130,290 @@ int32x4_t test_vclsq_s32(int32x4_t a)
  #endif /* POLYMORPHIC */
  }
  
+// CHECK-LABEL: @test_vclsq_m_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.cls.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <16 x i8> [[TMP2]]
+//
+int8x16_t test_vclsq_m_s8(int8x16_t inactive, int8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vclsq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vclsq_m_s8(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vclsq_m_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.cls.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+//
+int16x8_t test_vclsq_m_s16(int16x8_t inactive, int16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vclsq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vclsq_m_s16(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vclsq_m_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.cls.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+int32x4_t test_vclsq_m_s32(int32x4_t inactive, int32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vclsq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vclsq_m_s32(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vclzq_m_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.clz.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <16 x i8> [[TMP2]]
+//
+int8x16_t test_vclzq_m_s8(int8x16_t inactive, int8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vclzq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vclzq_m_s8(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vclzq_m_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.clz.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+//
+int16x8_t test_vclzq_m_s16(int16x8_t inactive, int16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vclzq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vclzq_m_s16(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vclzq_m_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.clz.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+int32x4_t test_vclzq_m_s32(int32x4_t inactive, int32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vclzq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vclzq_m_s32(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vclzq_m_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.clz.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <16 x i8> [[TMP2]]
+//
+uint8x16_t test_vclzq_m_u8(uint8x16_t inactive, uint8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vclzq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vclzq_m_u8(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vclzq_m_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.clz.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+//
+uint16x8_t test_vclzq_m_u16(uint16x8_t inactive, uint16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vclzq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vclzq_m_u16(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vclzq_m_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.clz.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+uint32x4_t test_vclzq_m_u32(uint32x4_t inactive, uint32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vclzq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vclzq_m_u32(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vclsq_x_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.cls.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i1> [[TMP1]], <16 x i8> undef)
+// CHECK-NEXT:    ret <16 x i8> [[TMP2]]
+//
+int8x16_t test_vclsq_x_s8(int8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vclsq_x(a, p);
+#else /* POLYMORPHIC */
+    return vclsq_x_s8(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vclsq_x_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.cls.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i1> [[TMP1]], <8 x i16> undef)
+// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+//
+int16x8_t test_vclsq_x_s16(int16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vclsq_x(a, p);
+#else /* POLYMORPHIC */
+    return vclsq_x_s16(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vclsq_x_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.cls.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i1> [[TMP1]], <4 x i32> undef)
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+int32x4_t test_vclsq_x_s32(int32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vclsq_x(a, p);
+#else /* POLYMORPHIC */
+    return vclsq_x_s32(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vclzq_x_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.clz.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i1> [[TMP1]], <16 x i8> undef)
+// CHECK-NEXT:    ret <16 x i8> [[TMP2]]
+//
+int8x16_t test_vclzq_x_s8(int8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vclzq_x(a, p);
+#else /* POLYMORPHIC */
+    return vclzq_x_s8(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vclzq_x_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.clz.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i1> [[TMP1]], <8 x i16> undef)
+// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+//
+int16x8_t test_vclzq_x_s16(int16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vclzq_x(a, p);
+#else /* POLYMORPHIC */
+    return vclzq_x_s16(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vclzq_x_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.clz.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i1> [[TMP1]], <4 x i32> undef)
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+int32x4_t test_vclzq_x_s32(int32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vclzq_x(a, p);
+#else /* POLYMORPHIC */
+    return vclzq_x_s32(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vclzq_x_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.clz.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i1> [[TMP1]], <16 x i8> undef)
+// CHECK-NEXT:    ret <16 x i8> [[TMP2]]
+//
+uint8x16_t test_vclzq_x_u8(uint8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vclzq_x(a, p);
+#else /* POLYMORPHIC */
+    return vclzq_x_u8(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vclzq_x_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.clz.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i1> [[TMP1]], <8 x i16> undef)
+// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+//
+uint16x8_t test_vclzq_x_u16(uint16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vclzq_x(a, p);
+#else /* POLYMORPHIC */
+    return vclzq_x_u16(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vclzq_x_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.clz.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i1> [[TMP1]], <4 x i32> undef)
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+uint32x4_t test_vclzq_x_u32(uint32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vclzq_x(a, p);
+#else /* POLYMORPHIC */
+    return vclzq_x_u32(a, p);
+#endif /* POLYMORPHIC */
+}
diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vcvt.c b/clang/test/CodeGen/arm-mve-intrinsics/vcvt.c

index 3220100d7b8940e0b1a7ce4e45c23293877aba34..0391b77e365f9b28d91df4c43251392beb1663f9 100644 (file)
--- a/clang/test/CodeGen/arm-mve-intrinsics/vcvt.c
+++ b/clang/test/CodeGen/arm-mve-intrinsics/vcvt.c
@@ -100,6 +100,246 @@ uint32x4_t test_vcvtq_u32_f32(float32x4_t a)
      return vcvtq_u32_f32(a);
  }
  
+// CHECK-LABEL: @test_vcvtq_m_f16_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vcvt.fp.int.predicated.v8f16.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <8 x half> [[TMP2]]
+//
+float16x8_t test_vcvtq_m_f16_s16(float16x8_t inactive, int16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vcvtq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vcvtq_m_f16_s16(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vcvtq_m_f16_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vcvt.fp.int.predicated.v8f16.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 1, <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <8 x half> [[TMP2]]
+//
+float16x8_t test_vcvtq_m_f16_u16(float16x8_t inactive, uint16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vcvtq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vcvtq_m_f16_u16(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vcvtq_m_f32_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vcvt.fp.int.predicated.v4f32.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <4 x float> [[TMP2]]
+//
+float32x4_t test_vcvtq_m_f32_s32(float32x4_t inactive, int32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vcvtq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vcvtq_m_f32_s32(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vcvtq_m_f32_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vcvt.fp.int.predicated.v4f32.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 1, <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <4 x float> [[TMP2]]
+//
+float32x4_t test_vcvtq_m_f32_u32(float32x4_t inactive, uint32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vcvtq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vcvtq_m_f32_u32(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vcvtq_m_s16_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vcvt.fp.int.predicated.v8i16.v8f16.v8i1(<8 x half> [[A:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+//
+int16x8_t test_vcvtq_m_s16_f16(int16x8_t inactive, float16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vcvtq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vcvtq_m_s16_f16(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vcvtq_m_s32_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vcvt.fp.int.predicated.v4i32.v4f32.v4i1(<4 x float> [[A:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+int32x4_t test_vcvtq_m_s32_f32(int32x4_t inactive, float32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vcvtq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vcvtq_m_s32_f32(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vcvtq_m_u16_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vcvt.fp.int.predicated.v8i16.v8f16.v8i1(<8 x half> [[A:%.*]], i32 1, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+//
+uint16x8_t test_vcvtq_m_u16_f16(uint16x8_t inactive, float16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vcvtq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vcvtq_m_u16_f16(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vcvtq_m_u32_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vcvt.fp.int.predicated.v4i32.v4f32.v4i1(<4 x float> [[A:%.*]], i32 1, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+uint32x4_t test_vcvtq_m_u32_f32(uint32x4_t inactive, float32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vcvtq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vcvtq_m_u32_f32(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vcvtq_x_f16_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vcvt.fp.int.predicated.v8f16.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x half> undef)
+// CHECK-NEXT:    ret <8 x half> [[TMP2]]
+//
+float16x8_t test_vcvtq_x_f16_s16(int16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vcvtq_x(a, p);
+#else /* POLYMORPHIC */
+    return vcvtq_x_f16_s16(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vcvtq_x_f16_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vcvt.fp.int.predicated.v8f16.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 1, <8 x i1> [[TMP1]], <8 x half> undef)
+// CHECK-NEXT:    ret <8 x half> [[TMP2]]
+//
+float16x8_t test_vcvtq_x_f16_u16(uint16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vcvtq_x(a, p);
+#else /* POLYMORPHIC */
+    return vcvtq_x_f16_u16(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vcvtq_x_f32_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vcvt.fp.int.predicated.v4f32.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x float> undef)
+// CHECK-NEXT:    ret <4 x float> [[TMP2]]
+//
+float32x4_t test_vcvtq_x_f32_s32(int32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vcvtq_x(a, p);
+#else /* POLYMORPHIC */
+    return vcvtq_x_f32_s32(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vcvtq_x_f32_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vcvt.fp.int.predicated.v4f32.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 1, <4 x i1> [[TMP1]], <4 x float> undef)
+// CHECK-NEXT:    ret <4 x float> [[TMP2]]
+//
+float32x4_t test_vcvtq_x_f32_u32(uint32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vcvtq_x(a, p);
+#else /* POLYMORPHIC */
+    return vcvtq_x_f32_u32(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vcvtq_x_s16_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vcvt.fp.int.predicated.v8i16.v8f16.v8i1(<8 x half> [[A:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x i16> undef)
+// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+//
+int16x8_t test_vcvtq_x_s16_f16(float16x8_t a, mve_pred16_t p)
+{
+    return vcvtq_x_s16_f16(a, p);
+}
+
+// CHECK-LABEL: @test_vcvtq_x_s32_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vcvt.fp.int.predicated.v4i32.v4f32.v4i1(<4 x float> [[A:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x i32> undef)
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+int32x4_t test_vcvtq_x_s32_f32(float32x4_t a, mve_pred16_t p)
+{
+    return vcvtq_x_s32_f32(a, p);
+}
+
+// CHECK-LABEL: @test_vcvtq_x_u16_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vcvt.fp.int.predicated.v8i16.v8f16.v8i1(<8 x half> [[A:%.*]], i32 1, <8 x i1> [[TMP1]], <8 x i16> undef)
+// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+//
+uint16x8_t test_vcvtq_x_u16_f16(float16x8_t a, mve_pred16_t p)
+{
+    return vcvtq_x_u16_f16(a, p);
+}
+
+// CHECK-LABEL: @test_vcvtq_x_u32_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vcvt.fp.int.predicated.v4i32.v4f32.v4i1(<4 x float> [[A:%.*]], i32 1, <4 x i1> [[TMP1]], <4 x i32> undef)
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+uint32x4_t test_vcvtq_x_u32_f32(float32x4_t a, mve_pred16_t p)
+{
+    return vcvtq_x_u32_f32(a, p);
+}
+
  // CHECK-LABEL: @test_vcvttq_f16_f32(
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[TMP0:%.*]] = call <8 x half> @llvm.arm.mve.vcvt.narrow(<8 x half> [[A:%.*]], <4 x float> [[B:%.*]], i32 1)
diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vmovl.c b/clang/test/CodeGen/arm-mve-intrinsics/vmovl.c

index 0b8ef596faedab85fccf8c4c2494e5b7b6f77679..e66e67c499765bf9dd7e2d8ac15ef7c99328c226 100644 (file)
--- a/clang/test/CodeGen/arm-mve-intrinsics/vmovl.c
+++ b/clang/test/CodeGen/arm-mve-intrinsics/vmovl.c
@@ -124,3 +124,259 @@ uint32x4_t test_vmovltq_u16(uint16x8_t a)
  #endif /* POLYMORPHIC */
  }
  
+// CHECK-LABEL: @test_vmovlbq_m_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vmovl.predicated.v8i16.v16i8.v8i1(<16 x i8> [[A:%.*]], i32 0, i32 0, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+//
+int16x8_t test_vmovlbq_m_s8(int16x8_t inactive, int8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vmovlbq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vmovlbq_m_s8(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmovlbq_m_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vmovl.predicated.v4i32.v8i16.v4i1(<8 x i16> [[A:%.*]], i32 0, i32 0, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+int32x4_t test_vmovlbq_m_s16(int32x4_t inactive, int16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vmovlbq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vmovlbq_m_s16(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmovlbq_m_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vmovl.predicated.v8i16.v16i8.v8i1(<16 x i8> [[A:%.*]], i32 1, i32 0, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+//
+uint16x8_t test_vmovlbq_m_u8(uint16x8_t inactive, uint8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vmovlbq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vmovlbq_m_u8(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmovlbq_m_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vmovl.predicated.v4i32.v8i16.v4i1(<8 x i16> [[A:%.*]], i32 1, i32 0, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+uint32x4_t test_vmovlbq_m_u16(uint32x4_t inactive, uint16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vmovlbq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vmovlbq_m_u16(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmovltq_m_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vmovl.predicated.v8i16.v16i8.v8i1(<16 x i8> [[A:%.*]], i32 0, i32 1, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+//
+int16x8_t test_vmovltq_m_s8(int16x8_t inactive, int8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vmovltq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vmovltq_m_s8(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmovltq_m_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vmovl.predicated.v4i32.v8i16.v4i1(<8 x i16> [[A:%.*]], i32 0, i32 1, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+int32x4_t test_vmovltq_m_s16(int32x4_t inactive, int16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vmovltq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vmovltq_m_s16(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmovltq_m_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vmovl.predicated.v8i16.v16i8.v8i1(<16 x i8> [[A:%.*]], i32 1, i32 1, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+//
+uint16x8_t test_vmovltq_m_u8(uint16x8_t inactive, uint8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vmovltq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vmovltq_m_u8(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmovltq_m_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vmovl.predicated.v4i32.v8i16.v4i1(<8 x i16> [[A:%.*]], i32 1, i32 1, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+uint32x4_t test_vmovltq_m_u16(uint32x4_t inactive, uint16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vmovltq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vmovltq_m_u16(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmovlbq_x_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vmovl.predicated.v8i16.v16i8.v8i1(<16 x i8> [[A:%.*]], i32 0, i32 0, <8 x i1> [[TMP1]], <8 x i16> undef)
+// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+//
+int16x8_t test_vmovlbq_x_s8(int8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vmovlbq_x(a, p);
+#else /* POLYMORPHIC */
+    return vmovlbq_x_s8(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmovlbq_x_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vmovl.predicated.v4i32.v8i16.v4i1(<8 x i16> [[A:%.*]], i32 0, i32 0, <4 x i1> [[TMP1]], <4 x i32> undef)
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+int32x4_t test_vmovlbq_x_s16(int16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vmovlbq_x(a, p);
+#else /* POLYMORPHIC */
+    return vmovlbq_x_s16(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmovlbq_x_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vmovl.predicated.v8i16.v16i8.v8i1(<16 x i8> [[A:%.*]], i32 1, i32 0, <8 x i1> [[TMP1]], <8 x i16> undef)
+// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+//
+uint16x8_t test_vmovlbq_x_u8(uint8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vmovlbq_x(a, p);
+#else /* POLYMORPHIC */
+    return vmovlbq_x_u8(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmovlbq_x_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vmovl.predicated.v4i32.v8i16.v4i1(<8 x i16> [[A:%.*]], i32 1, i32 0, <4 x i1> [[TMP1]], <4 x i32> undef)
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+uint32x4_t test_vmovlbq_x_u16(uint16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vmovlbq_x(a, p);
+#else /* POLYMORPHIC */
+    return vmovlbq_x_u16(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmovltq_x_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vmovl.predicated.v8i16.v16i8.v8i1(<16 x i8> [[A:%.*]], i32 0, i32 1, <8 x i1> [[TMP1]], <8 x i16> undef)
+// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+//
+int16x8_t test_vmovltq_x_s8(int8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vmovltq_x(a, p);
+#else /* POLYMORPHIC */
+    return vmovltq_x_s8(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmovltq_x_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vmovl.predicated.v4i32.v8i16.v4i1(<8 x i16> [[A:%.*]], i32 0, i32 1, <4 x i1> [[TMP1]], <4 x i32> undef)
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+int32x4_t test_vmovltq_x_s16(int16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vmovltq_x(a, p);
+#else /* POLYMORPHIC */
+    return vmovltq_x_s16(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmovltq_x_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vmovl.predicated.v8i16.v16i8.v8i1(<16 x i8> [[A:%.*]], i32 1, i32 1, <8 x i1> [[TMP1]], <8 x i16> undef)
+// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+//
+uint16x8_t test_vmovltq_x_u8(uint8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vmovltq_x(a, p);
+#else /* POLYMORPHIC */
+    return vmovltq_x_u8(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmovltq_x_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vmovl.predicated.v4i32.v8i16.v4i1(<8 x i16> [[A:%.*]], i32 1, i32 1, <4 x i1> [[TMP1]], <4 x i32> undef)
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+uint32x4_t test_vmovltq_x_u16(uint16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vmovltq_x(a, p);
+#else /* POLYMORPHIC */
+    return vmovltq_x_u16(a, p);
+#endif /* POLYMORPHIC */
+}
+
diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vmovn.c b/clang/test/CodeGen/arm-mve-intrinsics/vmovn.c

index 5d157de0feb86d7d917d7ae7dfa04d1a5d574435..ed414b52ade8af95e4309287e6ea69bd732d6c18 100644 (file)
--- a/clang/test/CodeGen/arm-mve-intrinsics/vmovn.c
+++ b/clang/test/CodeGen/arm-mve-intrinsics/vmovn.c
@@ -197,3 +197,187 @@ uint16x8_t test_vmovntq_u32(uint16x8_t a, uint32x4_t b)
      return vmovntq_u32(a, b);
  #endif /* POLYMORPHIC */
  }
+
+// LE-LABEL: @test_vmovnbq_m_s16(
+// LE-NEXT:  entry:
+// LE-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// LE-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// LE-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vmovn.predicated.v16i8.v8i16.v8i1(<16 x i8> [[A:%.*]], <8 x i16> [[B:%.*]], i32 0, <8 x i1> [[TMP1]])
+// LE-NEXT:    ret <16 x i8> [[TMP2]]
+//
+// BE-LABEL: @test_vmovnbq_m_s16(
+// BE-NEXT:  entry:
+// BE-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// BE-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// BE-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vmovn.predicated.v16i8.v8i16.v8i1(<16 x i8> [[A:%.*]], <8 x i16> [[B:%.*]], i32 0, <8 x i1> [[TMP1]])
+// BE-NEXT:    ret <16 x i8> [[TMP2]]
+//
+int8x16_t test_vmovnbq_m_s16(int8x16_t a, int16x8_t b, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vmovnbq_m(a, b, p);
+#else /* POLYMORPHIC */
+    return vmovnbq_m_s16(a, b, p);
+#endif /* POLYMORPHIC */
+}
+
+// LE-LABEL: @test_vmovnbq_m_s32(
+// LE-NEXT:  entry:
+// LE-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// LE-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// LE-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vmovn.predicated.v8i16.v4i32.v4i1(<8 x i16> [[A:%.*]], <4 x i32> [[B:%.*]], i32 0, <4 x i1> [[TMP1]])
+// LE-NEXT:    ret <8 x i16> [[TMP2]]
+//
+// BE-LABEL: @test_vmovnbq_m_s32(
+// BE-NEXT:  entry:
+// BE-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// BE-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// BE-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vmovn.predicated.v8i16.v4i32.v4i1(<8 x i16> [[A:%.*]], <4 x i32> [[B:%.*]], i32 0, <4 x i1> [[TMP1]])
+// BE-NEXT:    ret <8 x i16> [[TMP2]]
+//
+int16x8_t test_vmovnbq_m_s32(int16x8_t a, int32x4_t b, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vmovnbq_m(a, b, p);
+#else /* POLYMORPHIC */
+    return vmovnbq_m_s32(a, b, p);
+#endif /* POLYMORPHIC */
+}
+
+// LE-LABEL: @test_vmovnbq_m_u16(
+// LE-NEXT:  entry:
+// LE-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// LE-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// LE-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vmovn.predicated.v16i8.v8i16.v8i1(<16 x i8> [[A:%.*]], <8 x i16> [[B:%.*]], i32 0, <8 x i1> [[TMP1]])
+// LE-NEXT:    ret <16 x i8> [[TMP2]]
+//
+// BE-LABEL: @test_vmovnbq_m_u16(
+// BE-NEXT:  entry:
+// BE-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// BE-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// BE-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vmovn.predicated.v16i8.v8i16.v8i1(<16 x i8> [[A:%.*]], <8 x i16> [[B:%.*]], i32 0, <8 x i1> [[TMP1]])
+// BE-NEXT:    ret <16 x i8> [[TMP2]]
+//
+uint8x16_t test_vmovnbq_m_u16(uint8x16_t a, uint16x8_t b, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vmovnbq_m(a, b, p);
+#else /* POLYMORPHIC */
+    return vmovnbq_m_u16(a, b, p);
+#endif /* POLYMORPHIC */
+}
+
+// LE-LABEL: @test_vmovnbq_m_u32(
+// LE-NEXT:  entry:
+// LE-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// LE-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// LE-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vmovn.predicated.v8i16.v4i32.v4i1(<8 x i16> [[A:%.*]], <4 x i32> [[B:%.*]], i32 0, <4 x i1> [[TMP1]])
+// LE-NEXT:    ret <8 x i16> [[TMP2]]
+//
+// BE-LABEL: @test_vmovnbq_m_u32(
+// BE-NEXT:  entry:
+// BE-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// BE-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// BE-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vmovn.predicated.v8i16.v4i32.v4i1(<8 x i16> [[A:%.*]], <4 x i32> [[B:%.*]], i32 0, <4 x i1> [[TMP1]])
+// BE-NEXT:    ret <8 x i16> [[TMP2]]
+//
+uint16x8_t test_vmovnbq_m_u32(uint16x8_t a, uint32x4_t b, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vmovnbq_m(a, b, p);
+#else /* POLYMORPHIC */
+    return vmovnbq_m_u32(a, b, p);
+#endif /* POLYMORPHIC */
+}
+
+// LE-LABEL: @test_vmovntq_m_s16(
+// LE-NEXT:  entry:
+// LE-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// LE-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// LE-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vmovn.predicated.v16i8.v8i16.v8i1(<16 x i8> [[A:%.*]], <8 x i16> [[B:%.*]], i32 1, <8 x i1> [[TMP1]])
+// LE-NEXT:    ret <16 x i8> [[TMP2]]
+//
+// BE-LABEL: @test_vmovntq_m_s16(
+// BE-NEXT:  entry:
+// BE-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// BE-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// BE-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vmovn.predicated.v16i8.v8i16.v8i1(<16 x i8> [[A:%.*]], <8 x i16> [[B:%.*]], i32 1, <8 x i1> [[TMP1]])
+// BE-NEXT:    ret <16 x i8> [[TMP2]]
+//
+int8x16_t test_vmovntq_m_s16(int8x16_t a, int16x8_t b, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vmovntq_m(a, b, p);
+#else /* POLYMORPHIC */
+    return vmovntq_m_s16(a, b, p);
+#endif /* POLYMORPHIC */
+}
+
+// LE-LABEL: @test_vmovntq_m_s32(
+// LE-NEXT:  entry:
+// LE-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// LE-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// LE-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vmovn.predicated.v8i16.v4i32.v4i1(<8 x i16> [[A:%.*]], <4 x i32> [[B:%.*]], i32 1, <4 x i1> [[TMP1]])
+// LE-NEXT:    ret <8 x i16> [[TMP2]]
+//
+// BE-LABEL: @test_vmovntq_m_s32(
+// BE-NEXT:  entry:
+// BE-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// BE-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// BE-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vmovn.predicated.v8i16.v4i32.v4i1(<8 x i16> [[A:%.*]], <4 x i32> [[B:%.*]], i32 1, <4 x i1> [[TMP1]])
+// BE-NEXT:    ret <8 x i16> [[TMP2]]
+//
+int16x8_t test_vmovntq_m_s32(int16x8_t a, int32x4_t b, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vmovntq_m(a, b, p);
+#else /* POLYMORPHIC */
+    return vmovntq_m_s32(a, b, p);
+#endif /* POLYMORPHIC */
+}
+
+// LE-LABEL: @test_vmovntq_m_u16(
+// LE-NEXT:  entry:
+// LE-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// LE-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// LE-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vmovn.predicated.v16i8.v8i16.v8i1(<16 x i8> [[A:%.*]], <8 x i16> [[B:%.*]], i32 1, <8 x i1> [[TMP1]])
+// LE-NEXT:    ret <16 x i8> [[TMP2]]
+//
+// BE-LABEL: @test_vmovntq_m_u16(
+// BE-NEXT:  entry:
+// BE-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// BE-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// BE-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vmovn.predicated.v16i8.v8i16.v8i1(<16 x i8> [[A:%.*]], <8 x i16> [[B:%.*]], i32 1, <8 x i1> [[TMP1]])
+// BE-NEXT:    ret <16 x i8> [[TMP2]]
+//
+uint8x16_t test_vmovntq_m_u16(uint8x16_t a, uint16x8_t b, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vmovntq_m(a, b, p);
+#else /* POLYMORPHIC */
+    return vmovntq_m_u16(a, b, p);
+#endif /* POLYMORPHIC */
+}
+
+// LE-LABEL: @test_vmovntq_m_u32(
+// LE-NEXT:  entry:
+// LE-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// LE-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// LE-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vmovn.predicated.v8i16.v4i32.v4i1(<8 x i16> [[A:%.*]], <4 x i32> [[B:%.*]], i32 1, <4 x i1> [[TMP1]])
+// LE-NEXT:    ret <8 x i16> [[TMP2]]
+//
+// BE-LABEL: @test_vmovntq_m_u32(
+// BE-NEXT:  entry:
+// BE-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// BE-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// BE-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vmovn.predicated.v8i16.v4i32.v4i1(<8 x i16> [[A:%.*]], <4 x i32> [[B:%.*]], i32 1, <4 x i1> [[TMP1]])
+// BE-NEXT:    ret <8 x i16> [[TMP2]]
+//
+uint16x8_t test_vmovntq_m_u32(uint16x8_t a, uint32x4_t b, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vmovntq_m(a, b, p);
+#else /* POLYMORPHIC */
+    return vmovntq_m_u32(a, b, p);
+#endif /* POLYMORPHIC */
+}
diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vrev.c b/clang/test/CodeGen/arm-mve-intrinsics/vrev.c

index 384d736d2a6dba2e9d5f306a3ae624b08821ec41..73675cc005b238ceb9b6a83387962942d29461f2 100644 (file)
--- a/clang/test/CodeGen/arm-mve-intrinsics/vrev.c
+++ b/clang/test/CodeGen/arm-mve-intrinsics/vrev.c
@@ -213,3 +213,483 @@ uint32x4_t test_vrev64q_u32(uint32x4_t a)
      return vrev64q_u32(a);
  #endif /* POLYMORPHIC */
  }
+
+// CHECK-LABEL: @test_vrev16q_m_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vrev.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 16, <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <16 x i8> [[TMP2]]
+//
+int8x16_t test_vrev16q_m_s8(int8x16_t inactive, int8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrev16q_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vrev16q_m_s8(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrev16q_m_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vrev.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 16, <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <16 x i8> [[TMP2]]
+//
+uint8x16_t test_vrev16q_m_u8(uint8x16_t inactive, uint8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrev16q_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vrev16q_m_u8(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrev32q_m_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrev.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], i32 32, <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <8 x half> [[TMP2]]
+//
+float16x8_t test_vrev32q_m_f16(float16x8_t inactive, float16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrev32q_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vrev32q_m_f16(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrev32q_m_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vrev.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 32, <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <16 x i8> [[TMP2]]
+//
+int8x16_t test_vrev32q_m_s8(int8x16_t inactive, int8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrev32q_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vrev32q_m_s8(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrev32q_m_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vrev.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 32, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+//
+int16x8_t test_vrev32q_m_s16(int16x8_t inactive, int16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrev32q_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vrev32q_m_s16(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrev32q_m_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vrev.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 32, <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <16 x i8> [[TMP2]]
+//
+uint8x16_t test_vrev32q_m_u8(uint8x16_t inactive, uint8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrev32q_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vrev32q_m_u8(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrev32q_m_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vrev.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 32, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+//
+uint16x8_t test_vrev32q_m_u16(uint16x8_t inactive, uint16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrev32q_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vrev32q_m_u16(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrev64q_m_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrev.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], i32 64, <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <8 x half> [[TMP2]]
+//
+float16x8_t test_vrev64q_m_f16(float16x8_t inactive, float16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrev64q_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vrev64q_m_f16(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrev64q_m_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vrev.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], i32 64, <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <4 x float> [[TMP2]]
+//
+float32x4_t test_vrev64q_m_f32(float32x4_t inactive, float32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrev64q_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vrev64q_m_f32(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrev64q_m_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vrev.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 64, <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <16 x i8> [[TMP2]]
+//
+int8x16_t test_vrev64q_m_s8(int8x16_t inactive, int8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrev64q_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vrev64q_m_s8(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrev64q_m_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vrev.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 64, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+//
+int16x8_t test_vrev64q_m_s16(int16x8_t inactive, int16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrev64q_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vrev64q_m_s16(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrev64q_m_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vrev.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 64, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+int32x4_t test_vrev64q_m_s32(int32x4_t inactive, int32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrev64q_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vrev64q_m_s32(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrev64q_m_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vrev.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 64, <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <16 x i8> [[TMP2]]
+//
+uint8x16_t test_vrev64q_m_u8(uint8x16_t inactive, uint8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrev64q_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vrev64q_m_u8(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrev64q_m_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vrev.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 64, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+//
+uint16x8_t test_vrev64q_m_u16(uint16x8_t inactive, uint16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrev64q_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vrev64q_m_u16(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrev64q_m_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vrev.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 64, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+uint32x4_t test_vrev64q_m_u32(uint32x4_t inactive, uint32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrev64q_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vrev64q_m_u32(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrev16q_x_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vrev.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 16, <16 x i1> [[TMP1]], <16 x i8> undef)
+// CHECK-NEXT:    ret <16 x i8> [[TMP2]]
+//
+int8x16_t test_vrev16q_x_s8(int8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrev16q_x(a, p);
+#else /* POLYMORPHIC */
+    return vrev16q_x_s8(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrev16q_x_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vrev.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 16, <16 x i1> [[TMP1]], <16 x i8> undef)
+// CHECK-NEXT:    ret <16 x i8> [[TMP2]]
+//
+uint8x16_t test_vrev16q_x_u8(uint8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrev16q_x(a, p);
+#else /* POLYMORPHIC */
+    return vrev16q_x_u8(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrev32q_x_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrev.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], i32 32, <8 x i1> [[TMP1]], <8 x half> undef)
+// CHECK-NEXT:    ret <8 x half> [[TMP2]]
+//
+float16x8_t test_vrev32q_x_f16(float16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrev32q_x(a, p);
+#else /* POLYMORPHIC */
+    return vrev32q_x_f16(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrev32q_x_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vrev.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 32, <16 x i1> [[TMP1]], <16 x i8> undef)
+// CHECK-NEXT:    ret <16 x i8> [[TMP2]]
+//
+int8x16_t test_vrev32q_x_s8(int8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrev32q_x(a, p);
+#else /* POLYMORPHIC */
+    return vrev32q_x_s8(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrev32q_x_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vrev.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 32, <8 x i1> [[TMP1]], <8 x i16> undef)
+// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+//
+int16x8_t test_vrev32q_x_s16(int16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrev32q_x(a, p);
+#else /* POLYMORPHIC */
+    return vrev32q_x_s16(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrev32q_x_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vrev.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 32, <16 x i1> [[TMP1]], <16 x i8> undef)
+// CHECK-NEXT:    ret <16 x i8> [[TMP2]]
+//
+uint8x16_t test_vrev32q_x_u8(uint8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrev32q_x(a, p);
+#else /* POLYMORPHIC */
+    return vrev32q_x_u8(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrev32q_x_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vrev.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 32, <8 x i1> [[TMP1]], <8 x i16> undef)
+// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+//
+uint16x8_t test_vrev32q_x_u16(uint16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrev32q_x(a, p);
+#else /* POLYMORPHIC */
+    return vrev32q_x_u16(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrev64q_x_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrev.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], i32 64, <8 x i1> [[TMP1]], <8 x half> undef)
+// CHECK-NEXT:    ret <8 x half> [[TMP2]]
+//
+float16x8_t test_vrev64q_x_f16(float16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrev64q_x(a, p);
+#else /* POLYMORPHIC */
+    return vrev64q_x_f16(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrev64q_x_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vrev.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], i32 64, <4 x i1> [[TMP1]], <4 x float> undef)
+// CHECK-NEXT:    ret <4 x float> [[TMP2]]
+//
+float32x4_t test_vrev64q_x_f32(float32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrev64q_x(a, p);
+#else /* POLYMORPHIC */
+    return vrev64q_x_f32(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrev64q_x_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vrev.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 64, <16 x i1> [[TMP1]], <16 x i8> undef)
+// CHECK-NEXT:    ret <16 x i8> [[TMP2]]
+//
+int8x16_t test_vrev64q_x_s8(int8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrev64q_x(a, p);
+#else /* POLYMORPHIC */
+    return vrev64q_x_s8(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrev64q_x_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vrev.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 64, <8 x i1> [[TMP1]], <8 x i16> undef)
+// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+//
+int16x8_t test_vrev64q_x_s16(int16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrev64q_x(a, p);
+#else /* POLYMORPHIC */
+    return vrev64q_x_s16(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrev64q_x_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vrev.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 64, <4 x i1> [[TMP1]], <4 x i32> undef)
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+int32x4_t test_vrev64q_x_s32(int32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrev64q_x(a, p);
+#else /* POLYMORPHIC */
+    return vrev64q_x_s32(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrev64q_x_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vrev.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 64, <16 x i1> [[TMP1]], <16 x i8> undef)
+// CHECK-NEXT:    ret <16 x i8> [[TMP2]]
+//
+uint8x16_t test_vrev64q_x_u8(uint8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrev64q_x(a, p);
+#else /* POLYMORPHIC */
+    return vrev64q_x_u8(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrev64q_x_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vrev.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 64, <8 x i1> [[TMP1]], <8 x i16> undef)
+// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+//
+uint16x8_t test_vrev64q_x_u16(uint16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrev64q_x(a, p);
+#else /* POLYMORPHIC */
+    return vrev64q_x_u16(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrev64q_x_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vrev.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 64, <4 x i1> [[TMP1]], <4 x i32> undef)
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+uint32x4_t test_vrev64q_x_u32(uint32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrev64q_x(a, p);
+#else /* POLYMORPHIC */
+    return vrev64q_x_u32(a, p);
+#endif /* POLYMORPHIC */
+}
diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vrnd.c b/clang/test/CodeGen/arm-mve-intrinsics/vrnd.c

index a324c36ed83829bbabf0696e3d124037e1436f5d..8ad5f48b58562e5072cb02363ea6a6e9b08205af 100644 (file)
--- a/clang/test/CodeGen/arm-mve-intrinsics/vrnd.c
+++ b/clang/test/CodeGen/arm-mve-intrinsics/vrnd.c
@@ -171,3 +171,388 @@ float32x4_t test_vrndnq_f32(float32x4_t a)
      return vrndnq_f32(a);
  #endif /* POLYMORPHIC */
  }
+
+// CHECK-LABEL: @test_vrndaq_m_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrinta.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <8 x half> [[TMP2]]
+//
+float16x8_t test_vrndaq_m_f16(float16x8_t inactive, float16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrndaq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vrndaq_m_f16(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrndaq_m_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vrinta.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <4 x float> [[TMP2]]
+//
+float32x4_t test_vrndaq_m_f32(float32x4_t inactive, float32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrndaq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vrndaq_m_f32(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrndmq_m_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrintm.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <8 x half> [[TMP2]]
+//
+float16x8_t test_vrndmq_m_f16(float16x8_t inactive, float16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrndmq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vrndmq_m_f16(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrndmq_m_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vrintm.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <4 x float> [[TMP2]]
+//
+float32x4_t test_vrndmq_m_f32(float32x4_t inactive, float32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrndmq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vrndmq_m_f32(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrndnq_m_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrintn.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <8 x half> [[TMP2]]
+//
+float16x8_t test_vrndnq_m_f16(float16x8_t inactive, float16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrndnq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vrndnq_m_f16(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrndnq_m_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vrintn.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <4 x float> [[TMP2]]
+//
+float32x4_t test_vrndnq_m_f32(float32x4_t inactive, float32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrndnq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vrndnq_m_f32(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrndpq_m_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrintp.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <8 x half> [[TMP2]]
+//
+float16x8_t test_vrndpq_m_f16(float16x8_t inactive, float16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrndpq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vrndpq_m_f16(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrndpq_m_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vrintp.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <4 x float> [[TMP2]]
+//
+float32x4_t test_vrndpq_m_f32(float32x4_t inactive, float32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrndpq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vrndpq_m_f32(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrndq_m_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrintz.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <8 x half> [[TMP2]]
+//
+float16x8_t test_vrndq_m_f16(float16x8_t inactive, float16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrndq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vrndq_m_f16(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrndq_m_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vrintz.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <4 x float> [[TMP2]]
+//
+float32x4_t test_vrndq_m_f32(float32x4_t inactive, float32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrndq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vrndq_m_f32(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrndxq_m_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrintx.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <8 x half> [[TMP2]]
+//
+float16x8_t test_vrndxq_m_f16(float16x8_t inactive, float16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrndxq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vrndxq_m_f16(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrndxq_m_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vrintx.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <4 x float> [[TMP2]]
+//
+float32x4_t test_vrndxq_m_f32(float32x4_t inactive, float32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrndxq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vrndxq_m_f32(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrndaq_x_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrinta.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> undef)
+// CHECK-NEXT:    ret <8 x half> [[TMP2]]
+//
+float16x8_t test_vrndaq_x_f16(float16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrndaq_x(a, p);
+#else /* POLYMORPHIC */
+    return vrndaq_x_f16(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrndaq_x_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vrinta.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> undef)
+// CHECK-NEXT:    ret <4 x float> [[TMP2]]
+//
+float32x4_t test_vrndaq_x_f32(float32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrndaq_x(a, p);
+#else /* POLYMORPHIC */
+    return vrndaq_x_f32(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrndmq_x_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrintm.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> undef)
+// CHECK-NEXT:    ret <8 x half> [[TMP2]]
+//
+float16x8_t test_vrndmq_x_f16(float16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrndmq_x(a, p);
+#else /* POLYMORPHIC */
+    return vrndmq_x_f16(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrndmq_x_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vrintm.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> undef)
+// CHECK-NEXT:    ret <4 x float> [[TMP2]]
+//
+float32x4_t test_vrndmq_x_f32(float32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrndmq_x(a, p);
+#else /* POLYMORPHIC */
+    return vrndmq_x_f32(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrndnq_x_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrintn.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> undef)
+// CHECK-NEXT:    ret <8 x half> [[TMP2]]
+//
+float16x8_t test_vrndnq_x_f16(float16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrndnq_x(a, p);
+#else /* POLYMORPHIC */
+    return vrndnq_x_f16(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrndnq_x_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vrintn.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> undef)
+// CHECK-NEXT:    ret <4 x float> [[TMP2]]
+//
+float32x4_t test_vrndnq_x_f32(float32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrndnq_x(a, p);
+#else /* POLYMORPHIC */
+    return vrndnq_x_f32(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrndpq_x_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrintp.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> undef)
+// CHECK-NEXT:    ret <8 x half> [[TMP2]]
+//
+float16x8_t test_vrndpq_x_f16(float16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrndpq_x(a, p);
+#else /* POLYMORPHIC */
+    return vrndpq_x_f16(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrndpq_x_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vrintp.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> undef)
+// CHECK-NEXT:    ret <4 x float> [[TMP2]]
+//
+float32x4_t test_vrndpq_x_f32(float32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrndpq_x(a, p);
+#else /* POLYMORPHIC */
+    return vrndpq_x_f32(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrndq_x_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrintz.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> undef)
+// CHECK-NEXT:    ret <8 x half> [[TMP2]]
+//
+float16x8_t test_vrndq_x_f16(float16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrndq_x(a, p);
+#else /* POLYMORPHIC */
+    return vrndq_x_f16(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrndq_x_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vrintz.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> undef)
+// CHECK-NEXT:    ret <4 x float> [[TMP2]]
+//
+float32x4_t test_vrndq_x_f32(float32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrndq_x(a, p);
+#else /* POLYMORPHIC */
+    return vrndq_x_f32(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrndxq_x_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrintx.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> undef)
+// CHECK-NEXT:    ret <8 x half> [[TMP2]]
+//
+float16x8_t test_vrndxq_x_f16(float16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrndxq_x(a, p);
+#else /* POLYMORPHIC */
+    return vrndxq_x_f16(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrndxq_x_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vrintx.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> undef)
+// CHECK-NEXT:    ret <4 x float> [[TMP2]]
+//
+float32x4_t test_vrndxq_x_f32(float32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrndxq_x(a, p);
+#else /* POLYMORPHIC */
+    return vrndxq_x_f32(a, p);
+#endif /* POLYMORPHIC */
+}
+
diff --git a/llvm/include/llvm/IR/IntrinsicsARM.td b/llvm/include/llvm/IR/IntrinsicsARM.td

index 68af4ae8257901c24a01821c018703623312309c..4e939fb4bc3a7db1d54f54899ff512984fbff8a5 100644 (file)
--- a/llvm/include/llvm/IR/IntrinsicsARM.td
+++ b/llvm/include/llvm/IR/IntrinsicsARM.td
@@ -1159,6 +1159,11 @@ defm int_arm_mve_vcvt_fix: MVEMXPredicated<
    [llvm_anyvector_ty /* input vector */, llvm_i32_ty /* scale */],
    LLVMMatchType<0>, llvm_anyvector_ty>;
  
+def int_arm_mve_vcvt_fp_int_predicated: Intrinsic<
+  [llvm_anyvector_ty], [llvm_anyvector_ty, llvm_i32_ty /* unsigned */,
+   llvm_anyvector_ty /* predicate */, LLVMMatchType<0> /* inactive */],
+  [IntrNoMem]>;
+
  def int_arm_mve_vrintn: Intrinsic<
    [llvm_anyvector_ty], [LLVMMatchType<0>], [IntrNoMem]>;
  def int_arm_mve_vcls: Intrinsic<
@@ -1178,4 +1183,32 @@ def int_arm_mve_vqdmull_predicated: Intrinsic<
     LLVMMatchType<0>],
    [IntrNoMem]>;
  
+class MVESimpleUnaryPredicated: Intrinsic<[llvm_anyvector_ty],
+   [LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>], [IntrNoMem]>;
+
+def int_arm_mve_mvn_predicated: MVESimpleUnaryPredicated;
+def int_arm_mve_abs_predicated: MVESimpleUnaryPredicated;
+def int_arm_mve_neg_predicated: MVESimpleUnaryPredicated;
+def int_arm_mve_qabs_predicated: MVESimpleUnaryPredicated;
+def int_arm_mve_qneg_predicated: MVESimpleUnaryPredicated;
+def int_arm_mve_clz_predicated: MVESimpleUnaryPredicated;
+def int_arm_mve_cls_predicated: MVESimpleUnaryPredicated;
+def int_arm_mve_vrintz_predicated: MVESimpleUnaryPredicated;
+def int_arm_mve_vrintm_predicated: MVESimpleUnaryPredicated;
+def int_arm_mve_vrintp_predicated: MVESimpleUnaryPredicated;
+def int_arm_mve_vrinta_predicated: MVESimpleUnaryPredicated;
+def int_arm_mve_vrintx_predicated: MVESimpleUnaryPredicated;
+def int_arm_mve_vrintn_predicated: MVESimpleUnaryPredicated;
+
+def int_arm_mve_vrev_predicated: Intrinsic<[llvm_anyvector_ty],
+   [LLVMMatchType<0>, llvm_i32_ty /* size to reverse */,
+    llvm_anyvector_ty, LLVMMatchType<0>], [IntrNoMem]>;
+
+def int_arm_mve_vmovl_predicated: Intrinsic<[llvm_anyvector_ty],
+   [llvm_anyvector_ty, llvm_i32_ty /* unsigned */, llvm_i32_ty /* top half */,
+    llvm_anyvector_ty /* predicate */, LLVMMatchType<0>], [IntrNoMem]>;
+def int_arm_mve_vmovn_predicated: Intrinsic<[llvm_anyvector_ty],
+   [LLVMMatchType<0>, llvm_anyvector_ty, llvm_i32_ty /* top half */,
+    llvm_anyvector_ty /* predicate */], [IntrNoMem]>;
+
  } // end TargetPrefix
diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td

index 9a7886d6ecc59d0e70929011154829e2f50b23ff..07cea414a1727893fda77e3eba4e95afc0ce43a9 100644 (file)
--- a/llvm/lib/Target/ARM/ARMInstrMVE.td
+++ b/llvm/lib/Target/ARM/ARMInstrMVE.td
@@ -1320,28 +1320,29 @@ let Predicates = [HasMVEInt] in {
              (v4i32 (MVE_VREV32_8 (v4i32 MQPR:$src)))>;
  }
  
-let Predicates = [HasMVEInt] in {
-  def : Pat<(v4i32 (ARMvrev64 (v4i32 MQPR:$src))),
-            (v4i32 (MVE_VREV64_32 (v4i32 MQPR:$src)))>;
-  def : Pat<(v8i16 (ARMvrev64 (v8i16 MQPR:$src))),
-            (v8i16 (MVE_VREV64_16 (v8i16 MQPR:$src)))>;
-  def : Pat<(v16i8 (ARMvrev64 (v16i8 MQPR:$src))),
-            (v16i8 (MVE_VREV64_8  (v16i8 MQPR:$src)))>;
+multiclass MVE_VREV_basic_patterns<int revbits, list<MVEVectorVTInfo> VTIs,
+                                   Instruction Inst> {
+  defvar unpred_op = !cast<SDNode>("ARMvrev" # revbits);
+
+  foreach VTI = VTIs in {
+    def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$src))),
+              (VTI.Vec (Inst (VTI.Vec MQPR:$src)))>;
+    def : Pat<(VTI.Vec (int_arm_mve_vrev_predicated (VTI.Vec MQPR:$src),
+                  revbits, (VTI.Pred VCCR:$pred), (VTI.Vec MQPR:$inactive))),
+              (VTI.Vec (Inst (VTI.Vec MQPR:$src), ARMVCCThen,
+                  (VTI.Pred VCCR:$pred), (VTI.Vec MQPR:$inactive)))>;
+  }
+}
  
-  def : Pat<(v8i16 (ARMvrev32 (v8i16 MQPR:$src))),
-            (v8i16 (MVE_VREV32_16 (v8i16 MQPR:$src)))>;
-  def : Pat<(v16i8 (ARMvrev32 (v16i8 MQPR:$src))),
-            (v16i8 (MVE_VREV32_8  (v16i8 MQPR:$src)))>;
+let Predicates = [HasMVEInt] in {
+  defm: MVE_VREV_basic_patterns<64, [MVE_v4i32, MVE_v4f32], MVE_VREV64_32>;
+  defm: MVE_VREV_basic_patterns<64, [MVE_v8i16, MVE_v8f16], MVE_VREV64_16>;
+  defm: MVE_VREV_basic_patterns<64, [MVE_v16i8           ], MVE_VREV64_8>;
  
-  def : Pat<(v16i8 (ARMvrev16 (v16i8 MQPR:$src))),
-            (v16i8 (MVE_VREV16_8  (v16i8 MQPR:$src)))>;
+  defm: MVE_VREV_basic_patterns<32, [MVE_v8i16, MVE_v8f16], MVE_VREV32_16>;
+  defm: MVE_VREV_basic_patterns<32, [MVE_v16i8           ], MVE_VREV32_8>;
  
-  def : Pat<(v4f32 (ARMvrev64 (v4f32 MQPR:$src))),
-            (v4f32 (MVE_VREV64_32 (v4f32 MQPR:$src)))>;
-  def : Pat<(v8f16 (ARMvrev64 (v8f16 MQPR:$src))),
-            (v8f16 (MVE_VREV64_16 (v8f16 MQPR:$src)))>;
-  def : Pat<(v8f16 (ARMvrev32 (v8f16 MQPR:$src))),
-            (v8f16 (MVE_VREV32_16 (v8f16 MQPR:$src)))>;
+  defm: MVE_VREV_basic_patterns<16, [MVE_v16i8           ], MVE_VREV16_8>;
  }
  
  def MVE_VMVN : MVE_bit_arith<(outs MQPR:$Qd), (ins MQPR:$Qm),
@@ -1356,14 +1357,14 @@ def MVE_VMVN : MVE_bit_arith<(outs MQPR:$Qd), (ins MQPR:$Qm),
  }
  
  let Predicates = [HasMVEInt] in {
-  def : Pat<(v16i8 (vnotq  (v16i8 MQPR:$val1))),
-            (v16i8 (MVE_VMVN (v16i8 MQPR:$val1)))>;
-  def : Pat<(v8i16 (vnotq  (v8i16 MQPR:$val1))),
-            (v8i16 (MVE_VMVN (v8i16 MQPR:$val1)))>;
-  def : Pat<(v4i32 (vnotq  (v4i32 MQPR:$val1))),
-            (v4i32 (MVE_VMVN (v4i32 MQPR:$val1)))>;
-  def : Pat<(v2i64 (vnotq  (v2i64 MQPR:$val1))),
-            (v2i64 (MVE_VMVN (v2i64 MQPR:$val1)))>;
+  foreach VTI = [ MVE_v16i8, MVE_v8i16, MVE_v4i32, MVE_v2i64 ] in {
+    def : Pat<(VTI.Vec (vnotq    (VTI.Vec MQPR:$val1))),
+              (VTI.Vec (MVE_VMVN (VTI.Vec MQPR:$val1)))>;
+    def : Pat<(VTI.Vec (int_arm_mve_mvn_predicated (VTI.Vec MQPR:$val1),
+                       (VTI.Pred VCCR:$pred), (VTI.Vec MQPR:$inactive))),
+              (VTI.Vec (MVE_VMVN (VTI.Vec MQPR:$val1), ARMVCCThen,
+                       (VTI.Pred VCCR:$pred), (VTI.Vec MQPR:$inactive)))>;
+  }
  }
  
  class MVE_bit_ops<string iname, bits<2> bit_21_20, bit bit_28>
@@ -2175,39 +2176,43 @@ class MVE_VCLSCLZ<string iname, string suffix, bits<2> size,
    let validForTailPredication = 1;
  }
  
-def MVE_VCLSs8  : MVE_VCLSCLZ<"vcls", "s8",  0b00, 0b0>;
-def MVE_VCLSs16 : MVE_VCLSCLZ<"vcls", "s16", 0b01, 0b0>;
-def MVE_VCLSs32 : MVE_VCLSCLZ<"vcls", "s32", 0b10, 0b0>;
+multiclass MVE_VCLSCLZ_p<string opname, bit opcode, MVEVectorVTInfo VTI,
+                         SDNode unpred_op> {
+  def "": MVE_VCLSCLZ<"v"#opname, VTI.Suffix, VTI.Size, opcode>;
  
-def MVE_VCLZs8  : MVE_VCLSCLZ<"vclz", "i8",  0b00, 0b1>;
-def MVE_VCLZs16 : MVE_VCLSCLZ<"vclz", "i16", 0b01, 0b1>;
-def MVE_VCLZs32 : MVE_VCLSCLZ<"vclz", "i32", 0b10, 0b1>;
+  defvar Inst     = !cast<Instruction>(NAME);
+  defvar pred_int = !cast<Intrinsic>("int_arm_mve_"#opname#"_predicated");
  
-let Predicates = [HasMVEInt] in {
-  def : Pat<(v16i8 ( ctlz (v16i8 MQPR:$val1))),
-            (v16i8 ( MVE_VCLZs8 (v16i8 MQPR:$val1)))>;
-  def : Pat<(v4i32 ( ctlz (v4i32 MQPR:$val1))),
-            (v4i32 ( MVE_VCLZs32 (v4i32 MQPR:$val1)))>;
-  def : Pat<(v8i16 ( ctlz (v8i16 MQPR:$val1))),
-            (v8i16 ( MVE_VCLZs16 (v8i16 MQPR:$val1)))>;
-
-  def : Pat<(v16i8 ( int_arm_mve_vcls (v16i8 MQPR:$val1))),
-            (v16i8 ( MVE_VCLSs8 (v16i8 MQPR:$val1)))>;
-  def : Pat<(v4i32 ( int_arm_mve_vcls (v4i32 MQPR:$val1))),
-            (v4i32 ( MVE_VCLSs32 (v4i32 MQPR:$val1)))>;
-  def : Pat<(v8i16 ( int_arm_mve_vcls (v8i16 MQPR:$val1))),
-            (v8i16 ( MVE_VCLSs16 (v8i16 MQPR:$val1)))>;
+  let Predicates = [HasMVEInt] in {
+    def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$val))),
+              (VTI.Vec (Inst (VTI.Vec MQPR:$val)))>;
+    def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$val), (VTI.Pred VCCR:$pred),
+                                 (VTI.Vec MQPR:$inactive))),
+              (VTI.Vec (Inst (VTI.Vec MQPR:$val), ARMVCCThen,
+                             (VTI.Pred VCCR:$pred), (VTI.Vec MQPR:$inactive)))>;
+  }
  }
  
+defm MVE_VCLSs8  : MVE_VCLSCLZ_p<"cls", 0, MVE_v16s8, int_arm_mve_vcls>;
+defm MVE_VCLSs16 : MVE_VCLSCLZ_p<"cls", 0, MVE_v8s16, int_arm_mve_vcls>;
+defm MVE_VCLSs32 : MVE_VCLSCLZ_p<"cls", 0, MVE_v4s32, int_arm_mve_vcls>;
+
+defm MVE_VCLZs8  : MVE_VCLSCLZ_p<"clz", 1, MVE_v16i8, ctlz>;
+defm MVE_VCLZs16 : MVE_VCLSCLZ_p<"clz", 1, MVE_v8i16, ctlz>;
+defm MVE_VCLZs32 : MVE_VCLSCLZ_p<"clz", 1, MVE_v4i32, ctlz>;
+
  class MVE_VABSNEG_int<string iname, string suffix, bits<2> size, bit negate,
-                      list<dag> pattern=[]>
+                      bit saturate, list<dag> pattern=[]>
    : MVEIntSingleSrc<iname, suffix, size, pattern> {
  
    let Inst{28} = 0b1;
    let Inst{25-23} = 0b111;
    let Inst{21-20} = 0b11;
-  let Inst{17-16} = 0b01;
-  let Inst{12-8} = 0b00011;
+  let Inst{17} = 0b0;
+  let Inst{16} = !eq(saturate, 0);
+  let Inst{12-11} = 0b00;
+  let Inst{10} = saturate;
+  let Inst{9-8} = 0b11;
    let Inst{7} = negate;
    let Inst{6} = 0b1;
    let Inst{4} = 0b0;
@@ -2215,61 +2220,40 @@ class MVE_VABSNEG_int<string iname, string suffix, bits<2> size, bit negate,
    let validForTailPredication = 1;
  }
  
-def MVE_VABSs8  : MVE_VABSNEG_int<"vabs", "s8",  0b00, 0b0>;
-def MVE_VABSs16 : MVE_VABSNEG_int<"vabs", "s16", 0b01, 0b0>;
-def MVE_VABSs32 : MVE_VABSNEG_int<"vabs", "s32", 0b10, 0b0>;
-
-let Predicates = [HasMVEInt] in {
-  def : Pat<(v16i8 (abs (v16i8 MQPR:$v))),
-            (v16i8 (MVE_VABSs8 $v))>;
-  def : Pat<(v8i16 (abs (v8i16 MQPR:$v))),
-            (v8i16 (MVE_VABSs16 $v))>;
-  def : Pat<(v4i32 (abs (v4i32 MQPR:$v))),
-            (v4i32 (MVE_VABSs32 $v))>;
-}
+multiclass MVE_VABSNEG_int_m<string iname, bit negate, bit saturate,
+                             SDNode unpred_op, Intrinsic pred_int,
+                             MVEVectorVTInfo VTI> {
+  def "" : MVE_VABSNEG_int<iname, VTI.Suffix, VTI.Size, negate, saturate>;
+  defvar Inst = !cast<Instruction>(NAME);
  
-def MVE_VNEGs8  : MVE_VABSNEG_int<"vneg", "s8",  0b00, 0b1>;
-def MVE_VNEGs16 : MVE_VABSNEG_int<"vneg", "s16", 0b01, 0b1>;
-def MVE_VNEGs32 : MVE_VABSNEG_int<"vneg", "s32", 0b10, 0b1>;
+  let Predicates = [HasMVEInt] in {
+    // VQABS and VQNEG have more difficult isel patterns defined elsewhere
+    if !eq(saturate, 0) then {
+      def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$v))), (VTI.Vec (Inst $v))>;
+    }
  
-let Predicates = [HasMVEInt] in {
-  def : Pat<(v16i8 (vnegq (v16i8 MQPR:$v))),
-            (v16i8 (MVE_VNEGs8 $v))>;
-  def : Pat<(v8i16 (vnegq (v8i16 MQPR:$v))),
-            (v8i16 (MVE_VNEGs16 $v))>;
-  def : Pat<(v4i32 (vnegq (v4i32 MQPR:$v))),
-            (v4i32 (MVE_VNEGs32 $v))>;
+    def : Pat<(VTI.Vec (pred_int  (VTI.Vec MQPR:$v), (VTI.Pred VCCR:$mask),
+                                  (VTI.Vec MQPR:$inactive))),
+              (VTI.Vec (Inst $v, ARMVCCThen, $mask, $inactive))>;
+  }
  }
  
-class MVE_VQABSNEG<string iname, string suffix, bits<2> size,
-                   bit negate, list<dag> pattern=[]>
-  : MVEIntSingleSrc<iname, suffix, size, pattern> {
-
-  let Inst{28} = 0b1;
-  let Inst{25-23} = 0b111;
-  let Inst{21-20} = 0b11;
-  let Inst{17-16} = 0b00;
-  let Inst{12-8} = 0b00111;
-  let Inst{7} = negate;
-  let Inst{6} = 0b1;
-  let Inst{4} = 0b0;
-  let Inst{0} = 0b0;
-  let validForTailPredication = 1;
+foreach VTI = [ MVE_v16s8, MVE_v8s16, MVE_v4s32 ] in {
+  defm "MVE_VABS" # VTI.Suffix : MVE_VABSNEG_int_m<
+     "vabs",  0, 0, abs,   int_arm_mve_abs_predicated,  VTI>;
+  defm "MVE_VQABS" # VTI.Suffix : MVE_VABSNEG_int_m<
+     "vqabs", 0, 1, ?,     int_arm_mve_qabs_predicated, VTI>;
+  defm "MVE_VNEG" # VTI.Suffix : MVE_VABSNEG_int_m<
+     "vneg",  1, 0, vnegq, int_arm_mve_neg_predicated,  VTI>;
+  defm "MVE_VQNEG" # VTI.Suffix : MVE_VABSNEG_int_m<
+     "vqneg", 1, 1, ?,     int_arm_mve_qneg_predicated, VTI>;
  }
  
-def MVE_VQABSs8  : MVE_VQABSNEG<"vqabs", "s8",  0b00, 0b0>;
-def MVE_VQABSs16 : MVE_VQABSNEG<"vqabs", "s16", 0b01, 0b0>;
-def MVE_VQABSs32 : MVE_VQABSNEG<"vqabs", "s32", 0b10, 0b0>;
-
-def MVE_VQNEGs8  : MVE_VQABSNEG<"vqneg", "s8",  0b00, 0b1>;
-def MVE_VQNEGs16 : MVE_VQABSNEG<"vqneg", "s16", 0b01, 0b1>;
-def MVE_VQNEGs32 : MVE_VQABSNEG<"vqneg", "s32", 0b10, 0b1>;
-
  // int_min/int_max: vector containing INT_MIN/INT_MAX VTI.Size times
  // zero_vec: v4i32-initialized zero vector, potentially wrapped in a bitconvert
  multiclass vqabsneg_pattern<MVEVectorVTInfo VTI, dag int_min, dag int_max,
-                         dag zero_vec,  MVE_VQABSNEG vqabs_instruction,
-                         MVE_VQABSNEG vqneg_instruction> {
+                         dag zero_vec,  MVE_VABSNEG_int vqabs_instruction,
+                         MVE_VABSNEG_int vqneg_instruction> {
    let Predicates = [HasMVEInt] in {
      // The below tree can be replaced by a vqabs instruction, as it represents
      // the following vectorized expression (r being the value in $reg):
@@ -2470,7 +2454,7 @@ class MVE_shift_imm<dag oops, dag iops, string iname, string suffix,
    let Inst{3-1} = Qm{2-0};
  }
  
-class MVE_VMOVL<string iname, string suffix, bits<2> sz, bit U,
+class MVE_VMOVL<string iname, string suffix, bits<2> sz, bit U, bit top,
                list<dag> pattern=[]>
    : MVE_shift_imm<(outs MQPR:$Qd), (ins MQPR:$Qm),
                    iname, suffix, "$Qd, $Qm", vpred_r, "",
@@ -2480,25 +2464,35 @@ class MVE_VMOVL<string iname, string suffix, bits<2> sz, bit U,
    let Inst{21} = 0b1;
    let Inst{20-19} = sz{1-0};
    let Inst{18-16} = 0b000;
+  let Inst{12} = top;
    let Inst{11-6} = 0b111101;
    let Inst{4} = 0b0;
    let Inst{0} = 0b0;
  }
  
-multiclass MVE_VMOVL_shift_half<string iname, string suffix, bits<2> sz, bit U,
-                                list<dag> pattern=[]> {
-  def bh : MVE_VMOVL<!strconcat(iname, "b"), suffix, sz, U, pattern> {
-    let Inst{12} = 0b0;
-  }
-  def th : MVE_VMOVL<!strconcat(iname, "t"), suffix, sz, U, pattern> {
-    let Inst{12} = 0b1;
-  }
+multiclass MVE_VMOVL_m<bit top, string chr, MVEVectorVTInfo OutVTI,
+                       MVEVectorVTInfo InVTI> {
+  def "": MVE_VMOVL<"vmovl" # chr, InVTI.Suffix, OutVTI.Size,
+                    InVTI.Unsigned, top>;
+  defvar Inst = !cast<Instruction>(NAME);
+
+  def : Pat<(OutVTI.Vec (int_arm_mve_vmovl_predicated (InVTI.Vec MQPR:$src),
+                            (i32 InVTI.Unsigned), (i32 top),
+                            (OutVTI.Pred VCCR:$pred),
+                            (OutVTI.Vec MQPR:$inactive))),
+            (OutVTI.Vec (Inst (InVTI.Vec MQPR:$src), ARMVCCThen,
+                            (OutVTI.Pred VCCR:$pred),
+                            (OutVTI.Vec MQPR:$inactive)))>;
  }
  
-defm MVE_VMOVLs8 : MVE_VMOVL_shift_half<"vmovl", "s8", 0b01, 0b0>;
-defm MVE_VMOVLu8 : MVE_VMOVL_shift_half<"vmovl", "u8", 0b01, 0b1>;
-defm MVE_VMOVLs16 : MVE_VMOVL_shift_half<"vmovl", "s16", 0b10, 0b0>;
-defm MVE_VMOVLu16 : MVE_VMOVL_shift_half<"vmovl", "u16", 0b10, 0b1>;
+defm MVE_VMOVLs8bh  : MVE_VMOVL_m<0, "b", MVE_v8s16, MVE_v16s8>;
+defm MVE_VMOVLs8th  : MVE_VMOVL_m<1, "t", MVE_v8s16, MVE_v16s8>;
+defm MVE_VMOVLu8bh  : MVE_VMOVL_m<0, "b", MVE_v8u16, MVE_v16u8>;
+defm MVE_VMOVLu8th  : MVE_VMOVL_m<1, "t", MVE_v8u16, MVE_v16u8>;
+defm MVE_VMOVLs16bh : MVE_VMOVL_m<0, "b", MVE_v4s32, MVE_v8s16>;
+defm MVE_VMOVLs16th : MVE_VMOVL_m<1, "t", MVE_v4s32, MVE_v8s16>;
+defm MVE_VMOVLu16bh : MVE_VMOVL_m<0, "b", MVE_v4s32, MVE_v8u16>;
+defm MVE_VMOVLu16th : MVE_VMOVL_m<1, "t", MVE_v4s32, MVE_v8u16>;
  
  let Predicates = [HasMVEInt] in {
    def : Pat<(sext_inreg (v4i32 MQPR:$src), v4i16),
@@ -3277,45 +3271,34 @@ class MVE_VRINT<string rmode, bits<3> op, string suffix, bits<2> size,
  
  }
  
-multiclass MVE_VRINT_ops<string suffix, bits<2> size, list<dag> pattern=[]> {
-  def N : MVE_VRINT<"n", 0b000, suffix, size, pattern>;
-  def X : MVE_VRINT<"x", 0b001, suffix, size, pattern>;
-  def A : MVE_VRINT<"a", 0b010, suffix, size, pattern>;
-  def Z : MVE_VRINT<"z", 0b011, suffix, size, pattern>;
-  def M : MVE_VRINT<"m", 0b101, suffix, size, pattern>;
-  def P : MVE_VRINT<"p", 0b111, suffix, size, pattern>;
-}
+multiclass MVE_VRINT_m<MVEVectorVTInfo VTI, string suffix, bits<3> opcode,
+                       SDNode unpred_op> {
+  def "": MVE_VRINT<suffix, opcode, VTI.Suffix, VTI.Size>;
+  defvar Inst = !cast<Instruction>(NAME);
+  defvar pred_int = !cast<Intrinsic>("int_arm_mve_vrint"#suffix#"_predicated");
  
-defm MVE_VRINTf16 : MVE_VRINT_ops<"f16", 0b01>;
-defm MVE_VRINTf32 : MVE_VRINT_ops<"f32", 0b10>;
+  let Predicates = [HasMVEFloat] in {
+    def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$val))),
+              (VTI.Vec (Inst (VTI.Vec MQPR:$val)))>;
+    def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$val), (VTI.Pred VCCR:$pred),
+                                 (VTI.Vec MQPR:$inactive))),
+              (VTI.Vec (Inst (VTI.Vec MQPR:$val), ARMVCCThen,
+                             (VTI.Pred VCCR:$pred), (VTI.Vec MQPR:$inactive)))>;
+  }
+}
  
-let Predicates = [HasMVEFloat] in {
-  def : Pat<(v4f32 (frint (v4f32 MQPR:$val1))),
-            (v4f32 (MVE_VRINTf32X (v4f32 MQPR:$val1)))>;
-  def : Pat<(v8f16 (frint (v8f16 MQPR:$val1))),
-            (v8f16 (MVE_VRINTf16X (v8f16 MQPR:$val1)))>;
-  def : Pat<(v4f32 (fround (v4f32 MQPR:$val1))),
-            (v4f32 (MVE_VRINTf32A (v4f32 MQPR:$val1)))>;
-  def : Pat<(v8f16 (fround (v8f16 MQPR:$val1))),
-            (v8f16 (MVE_VRINTf16A (v8f16 MQPR:$val1)))>;
-  def : Pat<(v4f32 (ftrunc (v4f32 MQPR:$val1))),
-            (v4f32 (MVE_VRINTf32Z (v4f32 MQPR:$val1)))>;
-  def : Pat<(v8f16 (ftrunc (v8f16 MQPR:$val1))),
-            (v8f16 (MVE_VRINTf16Z (v8f16 MQPR:$val1)))>;
-  def : Pat<(v4f32 (ffloor (v4f32 MQPR:$val1))),
-            (v4f32 (MVE_VRINTf32M (v4f32 MQPR:$val1)))>;
-  def : Pat<(v8f16 (ffloor (v8f16 MQPR:$val1))),
-            (v8f16 (MVE_VRINTf16M (v8f16 MQPR:$val1)))>;
-  def : Pat<(v4f32 (fceil (v4f32 MQPR:$val1))),
-            (v4f32 (MVE_VRINTf32P (v4f32 MQPR:$val1)))>;
-  def : Pat<(v8f16 (fceil (v8f16 MQPR:$val1))),
-            (v8f16 (MVE_VRINTf16P (v8f16 MQPR:$val1)))>;
-  def : Pat<(v4f32 (int_arm_mve_vrintn (v4f32 MQPR:$val1))),
-            (v4f32 (MVE_VRINTf32N (v4f32 MQPR:$val1)))>;
-  def : Pat<(v8f16 (int_arm_mve_vrintn (v8f16 MQPR:$val1))),
-            (v8f16 (MVE_VRINTf16N (v8f16 MQPR:$val1)))>;
+multiclass MVE_VRINT_ops<MVEVectorVTInfo VTI> {
+  defm N : MVE_VRINT_m<VTI, "n", 0b000, int_arm_mve_vrintn>;
+  defm X : MVE_VRINT_m<VTI, "x", 0b001, frint>;
+  defm A : MVE_VRINT_m<VTI, "a", 0b010, fround>;
+  defm Z : MVE_VRINT_m<VTI, "z", 0b011, ftrunc>;
+  defm M : MVE_VRINT_m<VTI, "m", 0b101, ffloor>;
+  defm P : MVE_VRINT_m<VTI, "p", 0b111, fceil>;
  }
  
+defm MVE_VRINTf16 : MVE_VRINT_ops<MVE_v8f16>;
+defm MVE_VRINTf32 : MVE_VRINT_ops<MVE_v4f32>;
+
  class MVEFloatArithNeon<string iname, string suffix, bit size,
                             dag oops, dag iops, string ops,
                             vpred_ops vpred, string cstr, list<dag> pattern=[]>
@@ -3692,7 +3675,7 @@ defm MVE_VCVTu16f16 : MVE_VCVT_fp_int_anpm_multi<"u16.f16", 0b01, 0b1>;
  defm MVE_VCVTs32f32 : MVE_VCVT_fp_int_anpm_multi<"s32.f32", 0b10, 0b0>;
  defm MVE_VCVTu32f32 : MVE_VCVT_fp_int_anpm_multi<"u32.f32", 0b10, 0b1>;
  
-class MVE_VCVT_fp_int<string suffix, bits<2> size, bits<2> op,
+class MVE_VCVT_fp_int<string suffix, bits<2> size, bit toint, bit unsigned,
                        list<dag> pattern=[]>
    : MVE_float<"vcvt", suffix, (outs MQPR:$Qd),
                (ins MQPR:$Qm), "$Qd, $Qm", vpred_r, "", pattern> {
@@ -3706,41 +3689,43 @@ class MVE_VCVT_fp_int<string suffix, bits<2> size, bits<2> op,
    let Inst{17-16} = 0b11;
    let Inst{15-13} = Qd{2-0};
    let Inst{12-9} = 0b0011;
-  let Inst{8-7} = op;
+  let Inst{8} = toint;
+  let Inst{7} = unsigned;
    let Inst{4} = 0b0;
    let validForTailPredication = 1;
  }
  
+multiclass MVE_VCVT_fp_int_m<MVEVectorVTInfo Dest, MVEVectorVTInfo Src,
+                             SDNode unpred_op> {
+  defvar Unsigned = !or(!eq(Dest.SuffixLetter,"u"), !eq(Src.SuffixLetter,"u"));
+  defvar ToInt = !eq(Src.SuffixLetter,"f");
+
+  def "" : MVE_VCVT_fp_int<Dest.Suffix # "." # Src.Suffix, Dest.Size,
+                           ToInt, Unsigned>;
+  defvar Inst = !cast<Instruction>(NAME);
+
+  let Predicates = [HasMVEFloat] in {
+    def : Pat<(Dest.Vec (unpred_op (Src.Vec MQPR:$src))),
+              (Dest.Vec (Inst (Src.Vec MQPR:$src)))>;
+    def : Pat<(Dest.Vec (int_arm_mve_vcvt_fp_int_predicated
+                             (Src.Vec MQPR:$src), (i32 Unsigned),
+                             (Src.Pred VCCR:$mask), (Dest.Vec MQPR:$inactive))),
+              (Dest.Vec (Inst (Src.Vec MQPR:$src), ARMVCCThen,
+                              (Src.Pred VCCR:$mask),
+                              (Dest.Vec MQPR:$inactive)))>;
+  }
+}
  // The unsuffixed VCVT for float->int implicitly rounds toward zero,
  // which I reflect here in the llvm instruction names
-def MVE_VCVTs16f16z : MVE_VCVT_fp_int<"s16.f16", 0b01, 0b10>;
-def MVE_VCVTu16f16z : MVE_VCVT_fp_int<"u16.f16", 0b01, 0b11>;
-def MVE_VCVTs32f32z : MVE_VCVT_fp_int<"s32.f32", 0b10, 0b10>;
-def MVE_VCVTu32f32z : MVE_VCVT_fp_int<"u32.f32", 0b10, 0b11>;
+defm MVE_VCVTs16f16z : MVE_VCVT_fp_int_m<MVE_v8s16, MVE_v8f16, fp_to_sint>;
+defm MVE_VCVTu16f16z : MVE_VCVT_fp_int_m<MVE_v8u16, MVE_v8f16, fp_to_uint>;
+defm MVE_VCVTs32f32z : MVE_VCVT_fp_int_m<MVE_v4s32, MVE_v4f32, fp_to_sint>;
+defm MVE_VCVTu32f32z : MVE_VCVT_fp_int_m<MVE_v4u32, MVE_v4f32, fp_to_uint>;
  // Whereas VCVT for int->float rounds to nearest
-def MVE_VCVTf16s16n : MVE_VCVT_fp_int<"f16.s16", 0b01, 0b00>;
-def MVE_VCVTf16u16n : MVE_VCVT_fp_int<"f16.u16", 0b01, 0b01>;
-def MVE_VCVTf32s32n : MVE_VCVT_fp_int<"f32.s32", 0b10, 0b00>;
-def MVE_VCVTf32u32n : MVE_VCVT_fp_int<"f32.u32", 0b10, 0b01>;
-
-let Predicates = [HasMVEFloat] in {
-  def : Pat<(v4i32 (fp_to_sint (v4f32 MQPR:$src))),
-            (v4i32 (MVE_VCVTs32f32z (v4f32 MQPR:$src)))>;
-  def : Pat<(v4i32 (fp_to_uint (v4f32 MQPR:$src))),
-            (v4i32 (MVE_VCVTu32f32z (v4f32 MQPR:$src)))>;
-  def : Pat<(v8i16 (fp_to_sint (v8f16 MQPR:$src))),
-            (v8i16 (MVE_VCVTs16f16z (v8f16 MQPR:$src)))>;
-  def : Pat<(v8i16 (fp_to_uint (v8f16 MQPR:$src))),
-            (v8i16 (MVE_VCVTu16f16z (v8f16 MQPR:$src)))>;
-  def : Pat<(v4f32 (sint_to_fp (v4i32 MQPR:$src))),
-            (v4f32 (MVE_VCVTf32s32n (v4i32 MQPR:$src)))>;
-  def : Pat<(v4f32 (uint_to_fp (v4i32 MQPR:$src))),
-            (v4f32 (MVE_VCVTf32u32n (v4i32 MQPR:$src)))>;
-  def : Pat<(v8f16 (sint_to_fp (v8i16 MQPR:$src))),
-            (v8f16 (MVE_VCVTf16s16n (v8i16 MQPR:$src)))>;
-  def : Pat<(v8f16 (uint_to_fp (v8i16 MQPR:$src))),
-            (v8f16 (MVE_VCVTf16u16n (v8i16 MQPR:$src)))>;
-}
+defm MVE_VCVTf16s16n : MVE_VCVT_fp_int_m<MVE_v8f16, MVE_v8s16, sint_to_fp>;
+defm MVE_VCVTf16u16n : MVE_VCVT_fp_int_m<MVE_v8f16, MVE_v8u16, uint_to_fp>;
+defm MVE_VCVTf32s32n : MVE_VCVT_fp_int_m<MVE_v4f32, MVE_v4s32, sint_to_fp>;
+defm MVE_VCVTf32u32n : MVE_VCVT_fp_int_m<MVE_v4f32, MVE_v4u32, uint_to_fp>;
  
  class MVE_VABSNEG_fp<string iname, string suffix, bits<2> size, bit negate,
                     list<dag> pattern=[]>
@@ -3761,26 +3746,29 @@ class MVE_VABSNEG_fp<string iname, string suffix, bits<2> size, bit negate,
    let validForTailPredication = 1;
  }
  
-def MVE_VABSf16 : MVE_VABSNEG_fp<"vabs", "f16", 0b01, 0b0>;
-def MVE_VABSf32 : MVE_VABSNEG_fp<"vabs", "f32", 0b10, 0b0>;
-
-let Predicates = [HasMVEFloat] in {
-  def : Pat<(v8f16 (fabs MQPR:$src)),
-            (MVE_VABSf16 MQPR:$src)>;
-  def : Pat<(v4f32 (fabs MQPR:$src)),
-            (MVE_VABSf32 MQPR:$src)>;
-}
+multiclass MVE_VABSNEG_fp_m<string iname, SDNode unpred_op, Intrinsic pred_int,
+                            MVEVectorVTInfo VTI, bit opcode> {
+  def "" : MVE_VABSNEG_fp<iname, VTI.Suffix, VTI.Size, opcode>;
+  defvar Inst = !cast<Instruction>(NAME);
  
-def MVE_VNEGf16 : MVE_VABSNEG_fp<"vneg", "f16", 0b01, 0b1>;
-def MVE_VNEGf32 : MVE_VABSNEG_fp<"vneg", "f32", 0b10, 0b1>;
+  let Predicates = [HasMVEInt] in {
+    def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$v))), (VTI.Vec (Inst $v))>;
  
-let Predicates = [HasMVEFloat] in {
-  def : Pat<(v8f16 (fneg MQPR:$src)),
-            (MVE_VNEGf16 MQPR:$src)>;
-  def : Pat<(v4f32 (fneg MQPR:$src)),
-            (MVE_VNEGf32 MQPR:$src)>;
+    def : Pat<(VTI.Vec (pred_int  (VTI.Vec MQPR:$v), (VTI.Pred VCCR:$mask),
+                                  (VTI.Vec MQPR:$inactive))),
+              (VTI.Vec (Inst $v, ARMVCCThen, $mask, $inactive))>;
+  }
  }
  
+defm MVE_VABSf16 : MVE_VABSNEG_fp_m<"vabs", fabs, int_arm_mve_abs_predicated,
+                                    MVE_v8f16, 0>;
+defm MVE_VABSf32 : MVE_VABSNEG_fp_m<"vabs", fabs, int_arm_mve_abs_predicated,
+                                    MVE_v4f32, 0>;
+defm MVE_VNEGf16 : MVE_VABSNEG_fp_m<"vneg", fneg, int_arm_mve_neg_predicated,
+                                    MVE_v8f16, 1>;
+defm MVE_VNEGf32 : MVE_VABSNEG_fp_m<"vneg", fneg, int_arm_mve_neg_predicated,
+                                    MVE_v4f32, 1>;
+
  class MVE_VMAXMINNMA<string iname, string suffix, bit size, bit bit_12,
                       list<dag> pattern=[]>
    : MVE_f<(outs MQPR:$Qd), (ins MQPR:$Qd_src, MQPR:$Qm),
@@ -4427,23 +4415,42 @@ defm MVE_VQMOVUNs16 : MVE_VxMOVxN_halves<"vqmovun", "s16", 0b0, 0b0, 0b00>;
  defm MVE_VQMOVUNs32 : MVE_VxMOVxN_halves<"vqmovun", "s32", 0b0, 0b0, 0b01>;
  
  def MVEvmovn       : SDNode<"ARMISD::VMOVN", SDTARMVEXT>;
-let Predicates = [HasMVEInt] in {
-  def : Pat<(v8i16 (MVEvmovn (v8i16 MQPR:$Qd_src), (v8i16 MQPR:$Qm), (i32 0))),
-            (v8i16 (MVE_VMOVNi32bh (v8i16 MQPR:$Qd_src), (v8i16 MQPR:$Qm)))>;
-  def : Pat<(v8i16 (MVEvmovn (v8i16 MQPR:$Qd_src), (v8i16 MQPR:$Qm), (i32 1))),
-            (v8i16 (MVE_VMOVNi32th (v8i16 MQPR:$Qd_src), (v8i16 MQPR:$Qm)))>;
-  def : Pat<(v16i8 (MVEvmovn (v16i8 MQPR:$Qd_src), (v16i8 MQPR:$Qm), (i32 0))),
-            (v16i8 (MVE_VMOVNi16bh (v16i8 MQPR:$Qd_src), (v16i8 MQPR:$Qm)))>;
-  def : Pat<(v16i8 (MVEvmovn (v16i8 MQPR:$Qd_src), (v16i8 MQPR:$Qm), (i32 1))),
-            (v16i8 (MVE_VMOVNi16th (v16i8 MQPR:$Qd_src), (v16i8 MQPR:$Qm)))>;
-
-  def : Pat<(v8i16 (MVEvmovn (v8i16 MQPR:$Qm),
-                             (v8i16 (ARMvrev32 MQPR:$Qd_src)), (i32 1))),
-            (v8i16 (MVE_VMOVNi32bh (v8i16 MQPR:$Qd_src), (v8i16 MQPR:$Qm)))>;
-  def : Pat<(v16i8 (MVEvmovn (v16i8 MQPR:$Qm),
-                             (v16i8 (ARMvrev16 MQPR:$Qd_src)), (i32 1))),
-            (v16i8 (MVE_VMOVNi16bh (v16i8 MQPR:$Qd_src), (v16i8 MQPR:$Qm)))>;
-}
+
+multiclass MVE_VMOVN_p<Instruction Inst, bit top,
+                       MVEVectorVTInfo VTI, MVEVectorVTInfo InVTI> {
+  // Match the most obvious MVEvmovn(a,b,t), which overwrites the odd or even
+  // lanes of a (depending on t) with the even lanes of b.
+  def : Pat<(VTI.Vec (MVEvmovn (VTI.Vec MQPR:$Qd_src),
+                               (VTI.Vec MQPR:$Qm), (i32 top))),
+            (VTI.Vec (Inst (VTI.Vec MQPR:$Qd_src), (VTI.Vec MQPR:$Qm)))>;
+
+  if !eq(top, 0) then {
+    // If we see MVEvmovn(a,ARMvrev(b),1), that wants to overwrite the odd
+    // lanes of a with the odd lanes of b. In other words, the lanes we're
+    // _keeping_ from a are the even ones. So we can flip it round and say that
+    // this is the same as overwriting the even lanes of b with the even lanes
+    // of a, i.e. it's a VMOVNB with the operands reversed.
+    defvar vrev = !cast<SDNode>("ARMvrev" # InVTI.LaneBits);
+    def : Pat<(VTI.Vec (MVEvmovn (VTI.Vec MQPR:$Qm),
+                                 (VTI.Vec (vrev MQPR:$Qd_src)), (i32 1))),
+              (VTI.Vec (Inst (VTI.Vec MQPR:$Qd_src), (VTI.Vec MQPR:$Qm)))>;
+  }
+
+  // Match the IR intrinsic for a predicated VMOVN. This regards the Qm input
+  // as having wider lanes that we're narrowing, instead of already-narrow
+  // lanes that we're taking every other one of.
+  def : Pat<(VTI.Vec (int_arm_mve_vmovn_predicated (VTI.Vec MQPR:$Qd_src),
+                                  (InVTI.Vec MQPR:$Qm), (i32 top),
+                                  (InVTI.Pred VCCR:$pred))),
+            (VTI.Vec (Inst (VTI.Vec MQPR:$Qd_src),
+                              (InVTI.Vec MQPR:$Qm),
+                              ARMVCCThen, (InVTI.Pred VCCR:$pred)))>;
+}
+
+defm : MVE_VMOVN_p<MVE_VMOVNi32bh, 0, MVE_v8i16, MVE_v4i32>;
+defm : MVE_VMOVN_p<MVE_VMOVNi32th, 1, MVE_v8i16, MVE_v4i32>;
+defm : MVE_VMOVN_p<MVE_VMOVNi16bh, 0, MVE_v16i8, MVE_v8i16>;
+defm : MVE_VMOVN_p<MVE_VMOVNi16th, 1, MVE_v16i8, MVE_v8i16>;
  
  
  class MVE_VCVT_ff<string iname, string suffix, bit op, bit T,
diff --git a/llvm/test/CodeGen/Thumb2/mve-intrinsics/absneg-predicated.ll b/llvm/test/CodeGen/Thumb2/mve-intrinsics/absneg-predicated.ll

new file mode 100644 (file)

index 0000000..ceac5e7
--- /dev/null
+++ b/llvm/test/CodeGen/Thumb2/mve-intrinsics/absneg-predicated.ll
@@ -0,0 +1,335 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s
+; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s
+
+define arm_aapcs_vfpcc <16 x i8> @test_vmvnq_m_s8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vmvnq_m_s8:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vmvnt q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
+  %2 = tail call <16 x i8> @llvm.arm.mve.mvn.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i1> %1, <16 x i8> %inactive)
+  ret <16 x i8> %2
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vmvnq_m_s16(<8 x i16> %inactive, <8 x i16> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vmvnq_m_s16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vmvnt q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+  %2 = tail call <8 x i16> @llvm.arm.mve.mvn.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i1> %1, <8 x i16> %inactive)
+  ret <8 x i16> %2
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vmvnq_m_s32(<4 x i32> %inactive, <4 x i32> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vmvnq_m_s32:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vmvnt q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+  %2 = tail call <4 x i32> @llvm.arm.mve.mvn.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i1> %1, <4 x i32> %inactive)
+  ret <4 x i32> %2
+}
+
+define arm_aapcs_vfpcc <16 x i8> @test_vmvnq_m_u8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vmvnq_m_u8:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vmvnt q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
+  %2 = tail call <16 x i8> @llvm.arm.mve.mvn.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i1> %1, <16 x i8> %inactive)
+  ret <16 x i8> %2
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vmvnq_m_u16(<8 x i16> %inactive, <8 x i16> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vmvnq_m_u16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vmvnt q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+  %2 = tail call <8 x i16> @llvm.arm.mve.mvn.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i1> %1, <8 x i16> %inactive)
+  ret <8 x i16> %2
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vmvnq_m_u32(<4 x i32> %inactive, <4 x i32> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vmvnq_m_u32:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vmvnt q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+  %2 = tail call <4 x i32> @llvm.arm.mve.mvn.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i1> %1, <4 x i32> %inactive)
+  ret <4 x i32> %2
+}
+
+define arm_aapcs_vfpcc <8 x half> @test_vnegq_m_f16(<8 x half> %inactive, <8 x half> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vnegq_m_f16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vnegt.f16 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+  %2 = tail call <8 x half> @llvm.arm.mve.neg.predicated.v8f16.v8i1(<8 x half> %a, <8 x i1> %1, <8 x half> %inactive)
+  ret <8 x half> %2
+}
+
+define arm_aapcs_vfpcc <4 x float> @test_vnegq_m_f32(<4 x float> %inactive, <4 x float> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vnegq_m_f32:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vnegt.f32 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+  %2 = tail call <4 x float> @llvm.arm.mve.neg.predicated.v4f32.v4i1(<4 x float> %a, <4 x i1> %1, <4 x float> %inactive)
+  ret <4 x float> %2
+}
+
+define arm_aapcs_vfpcc <16 x i8> @test_vnegq_m_s8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vnegq_m_s8:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vnegt.s8 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
+  %2 = tail call <16 x i8> @llvm.arm.mve.neg.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i1> %1, <16 x i8> %inactive)
+  ret <16 x i8> %2
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vnegq_m_s16(<8 x i16> %inactive, <8 x i16> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vnegq_m_s16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vnegt.s16 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+  %2 = tail call <8 x i16> @llvm.arm.mve.neg.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i1> %1, <8 x i16> %inactive)
+  ret <8 x i16> %2
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vnegq_m_s32(<4 x i32> %inactive, <4 x i32> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vnegq_m_s32:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vnegt.s32 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+  %2 = tail call <4 x i32> @llvm.arm.mve.neg.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i1> %1, <4 x i32> %inactive)
+  ret <4 x i32> %2
+}
+
+define arm_aapcs_vfpcc <8 x half> @test_vabsq_m_f16(<8 x half> %inactive, <8 x half> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vabsq_m_f16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vabst.f16 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+  %2 = tail call <8 x half> @llvm.arm.mve.abs.predicated.v8f16.v8i1(<8 x half> %a, <8 x i1> %1, <8 x half> %inactive)
+  ret <8 x half> %2
+}
+
+define arm_aapcs_vfpcc <4 x float> @test_vabsq_m_f32(<4 x float> %inactive, <4 x float> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vabsq_m_f32:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vabst.f32 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+  %2 = tail call <4 x float> @llvm.arm.mve.abs.predicated.v4f32.v4i1(<4 x float> %a, <4 x i1> %1, <4 x float> %inactive)
+  ret <4 x float> %2
+}
+
+define arm_aapcs_vfpcc <16 x i8> @test_vabsq_m_s8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vabsq_m_s8:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vabst.s8 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
+  %2 = tail call <16 x i8> @llvm.arm.mve.abs.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i1> %1, <16 x i8> %inactive)
+  ret <16 x i8> %2
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vabsq_m_s16(<8 x i16> %inactive, <8 x i16> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vabsq_m_s16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vabst.s16 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+  %2 = tail call <8 x i16> @llvm.arm.mve.abs.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i1> %1, <8 x i16> %inactive)
+  ret <8 x i16> %2
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vabsq_m_s32(<4 x i32> %inactive, <4 x i32> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vabsq_m_s32:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vabst.s32 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+  %2 = tail call <4 x i32> @llvm.arm.mve.abs.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i1> %1, <4 x i32> %inactive)
+  ret <4 x i32> %2
+}
+
+define arm_aapcs_vfpcc <16 x i8> @test_vqnegq_m_s8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vqnegq_m_s8:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vqnegt.s8 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
+  %2 = tail call <16 x i8> @llvm.arm.mve.qneg.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i1> %1, <16 x i8> %inactive)
+  ret <16 x i8> %2
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vqnegq_m_s16(<8 x i16> %inactive, <8 x i16> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vqnegq_m_s16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vqnegt.s16 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+  %2 = tail call <8 x i16> @llvm.arm.mve.qneg.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i1> %1, <8 x i16> %inactive)
+  ret <8 x i16> %2
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vqnegq_m_s32(<4 x i32> %inactive, <4 x i32> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vqnegq_m_s32:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vqnegt.s32 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+  %2 = tail call <4 x i32> @llvm.arm.mve.qneg.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i1> %1, <4 x i32> %inactive)
+  ret <4 x i32> %2
+}
+
+define arm_aapcs_vfpcc <16 x i8> @test_vqabsq_m_s8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vqabsq_m_s8:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vqabst.s8 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
+  %2 = tail call <16 x i8> @llvm.arm.mve.qabs.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i1> %1, <16 x i8> %inactive)
+  ret <16 x i8> %2
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vqabsq_m_s16(<8 x i16> %inactive, <8 x i16> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vqabsq_m_s16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vqabst.s16 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+  %2 = tail call <8 x i16> @llvm.arm.mve.qabs.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i1> %1, <8 x i16> %inactive)
+  ret <8 x i16> %2
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vqabsq_m_s32(<4 x i32> %inactive, <4 x i32> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vqabsq_m_s32:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vqabst.s32 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+  %2 = tail call <4 x i32> @llvm.arm.mve.qabs.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i1> %1, <4 x i32> %inactive)
+  ret <4 x i32> %2
+}
+
+declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32)
+declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32)
+declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32)
+
+declare <16 x i8> @llvm.arm.mve.mvn.predicated.v16i8.v16i1(<16 x i8>, <16 x i1>, <16 x i8>)
+declare <8 x i16> @llvm.arm.mve.mvn.predicated.v8i16.v8i1(<8 x i16>, <8 x i1>, <8 x i16>)
+declare <4 x i32> @llvm.arm.mve.mvn.predicated.v4i32.v4i1(<4 x i32>, <4 x i1>, <4 x i32>)
+declare <8 x half> @llvm.arm.mve.neg.predicated.v8f16.v8i1(<8 x half>, <8 x i1>, <8 x half>)
+declare <4 x float> @llvm.arm.mve.neg.predicated.v4f32.v4i1(<4 x float>, <4 x i1>, <4 x float>)
+declare <16 x i8> @llvm.arm.mve.neg.predicated.v16i8.v16i1(<16 x i8>, <16 x i1>, <16 x i8>)
+declare <8 x i16> @llvm.arm.mve.neg.predicated.v8i16.v8i1(<8 x i16>, <8 x i1>, <8 x i16>)
+declare <4 x i32> @llvm.arm.mve.neg.predicated.v4i32.v4i1(<4 x i32>, <4 x i1>, <4 x i32>)
+declare <8 x half> @llvm.arm.mve.abs.predicated.v8f16.v8i1(<8 x half>, <8 x i1>, <8 x half>)
+declare <4 x float> @llvm.arm.mve.abs.predicated.v4f32.v4i1(<4 x float>, <4 x i1>, <4 x float>)
+declare <16 x i8> @llvm.arm.mve.abs.predicated.v16i8.v16i1(<16 x i8>, <16 x i1>, <16 x i8>)
+declare <8 x i16> @llvm.arm.mve.abs.predicated.v8i16.v8i1(<8 x i16>, <8 x i1>, <8 x i16>)
+declare <4 x i32> @llvm.arm.mve.abs.predicated.v4i32.v4i1(<4 x i32>, <4 x i1>, <4 x i32>)
+declare <16 x i8> @llvm.arm.mve.qneg.predicated.v16i8.v16i1(<16 x i8>, <16 x i1>, <16 x i8>)
+declare <8 x i16> @llvm.arm.mve.qneg.predicated.v8i16.v8i1(<8 x i16>, <8 x i1>, <8 x i16>)
+declare <4 x i32> @llvm.arm.mve.qneg.predicated.v4i32.v4i1(<4 x i32>, <4 x i1>, <4 x i32>)
+declare <16 x i8> @llvm.arm.mve.qabs.predicated.v16i8.v16i1(<16 x i8>, <16 x i1>, <16 x i8>)
+declare <8 x i16> @llvm.arm.mve.qabs.predicated.v8i16.v8i1(<8 x i16>, <8 x i1>, <8 x i16>)
+declare <4 x i32> @llvm.arm.mve.qabs.predicated.v4i32.v4i1(<4 x i32>, <4 x i1>, <4 x i32>)
diff --git a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vclzcls-predicated.ll b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vclzcls-predicated.ll

new file mode 100644 (file)

index 0000000..1f0f588
--- /dev/null
+++ b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vclzcls-predicated.ll
@@ -0,0 +1,138 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s
+
+define arm_aapcs_vfpcc <16 x i8> @test_vclsq_m_s8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vclsq_m_s8:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vclst.s8 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
+  %2 = tail call <16 x i8> @llvm.arm.mve.cls.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i1> %1, <16 x i8> %inactive)
+  ret <16 x i8> %2
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vclsq_m_s16(<8 x i16> %inactive, <8 x i16> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vclsq_m_s16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vclst.s16 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+  %2 = tail call <8 x i16> @llvm.arm.mve.cls.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i1> %1, <8 x i16> %inactive)
+  ret <8 x i16> %2
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vclsq_m_s32(<4 x i32> %inactive, <4 x i32> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vclsq_m_s32:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vclst.s32 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+  %2 = tail call <4 x i32> @llvm.arm.mve.cls.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i1> %1, <4 x i32> %inactive)
+  ret <4 x i32> %2
+}
+
+define arm_aapcs_vfpcc <16 x i8> @test_vclzq_m_s8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vclzq_m_s8:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vclzt.i8 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
+  %2 = tail call <16 x i8> @llvm.arm.mve.clz.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i1> %1, <16 x i8> %inactive)
+  ret <16 x i8> %2
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vclzq_m_s16(<8 x i16> %inactive, <8 x i16> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vclzq_m_s16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vclzt.i16 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+  %2 = tail call <8 x i16> @llvm.arm.mve.clz.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i1> %1, <8 x i16> %inactive)
+  ret <8 x i16> %2
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vclzq_m_s32(<4 x i32> %inactive, <4 x i32> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vclzq_m_s32:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vclzt.i32 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+  %2 = tail call <4 x i32> @llvm.arm.mve.clz.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i1> %1, <4 x i32> %inactive)
+  ret <4 x i32> %2
+}
+
+define arm_aapcs_vfpcc <16 x i8> @test_vclzq_m_u8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vclzq_m_u8:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vclzt.i8 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
+  %2 = tail call <16 x i8> @llvm.arm.mve.clz.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i1> %1, <16 x i8> %inactive)
+  ret <16 x i8> %2
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vclzq_m_u16(<8 x i16> %inactive, <8 x i16> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vclzq_m_u16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vclzt.i16 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+  %2 = tail call <8 x i16> @llvm.arm.mve.clz.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i1> %1, <8 x i16> %inactive)
+  ret <8 x i16> %2
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vclzq_m_u32(<4 x i32> %inactive, <4 x i32> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vclzq_m_u32:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vclzt.i32 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+  %2 = tail call <4 x i32> @llvm.arm.mve.clz.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i1> %1, <4 x i32> %inactive)
+  ret <4 x i32> %2
+}
+
+declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32)
+declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32)
+declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32)
+declare <16 x i8> @llvm.arm.mve.cls.predicated.v16i8.v16i1(<16 x i8>, <16 x i1>, <16 x i8>)
+declare <8 x i16> @llvm.arm.mve.cls.predicated.v8i16.v8i1(<8 x i16>, <8 x i1>, <8 x i16>)
+declare <4 x i32> @llvm.arm.mve.cls.predicated.v4i32.v4i1(<4 x i32>, <4 x i1>, <4 x i32>)
+declare <16 x i8> @llvm.arm.mve.clz.predicated.v16i8.v16i1(<16 x i8>, <16 x i1>, <16 x i8>)
+declare <8 x i16> @llvm.arm.mve.clz.predicated.v8i16.v8i1(<8 x i16>, <8 x i1>, <8 x i16>)
+declare <4 x i32> @llvm.arm.mve.clz.predicated.v4i32.v4i1(<4 x i32>, <4 x i1>, <4 x i32>)
diff --git a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vcvt-fp-int.ll b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vcvt-fp-int.ll

new file mode 100644 (file)

index 0000000..d3373b3
--- /dev/null
+++ b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vcvt-fp-int.ll
@@ -0,0 +1,122 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s
+
+define arm_aapcs_vfpcc <8 x half> @test_vcvtq_m_f16_s16(<8 x half> %inactive, <8 x i16> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vcvtq_m_f16_s16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vcvtt.f16.s16 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+  %2 = tail call <8 x half> @llvm.arm.mve.vcvt.fp.int.predicated.v8f16.v8i16.v8i1(<8 x i16> %a, i32 0, <8 x i1> %1, <8 x half> %inactive)
+  ret <8 x half> %2
+}
+
+define arm_aapcs_vfpcc <8 x half> @test_vcvtq_m_f16_u16(<8 x half> %inactive, <8 x i16> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vcvtq_m_f16_u16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vcvtt.f16.u16 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+  %2 = tail call <8 x half> @llvm.arm.mve.vcvt.fp.int.predicated.v8f16.v8i16.v8i1(<8 x i16> %a, i32 1, <8 x i1> %1, <8 x half> %inactive)
+  ret <8 x half> %2
+}
+
+define arm_aapcs_vfpcc <4 x float> @test_vcvtq_m_f32_s32(<4 x float> %inactive, <4 x i32> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vcvtq_m_f32_s32:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vcvtt.f32.s32 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+  %2 = tail call <4 x float> @llvm.arm.mve.vcvt.fp.int.predicated.v4f32.v4i32.v4i1(<4 x i32> %a, i32 0, <4 x i1> %1, <4 x float> %inactive)
+  ret <4 x float> %2
+}
+
+define arm_aapcs_vfpcc <4 x float> @test_vcvtq_m_f32_u32(<4 x float> %inactive, <4 x i32> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vcvtq_m_f32_u32:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vcvtt.f32.u32 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+  %2 = tail call <4 x float> @llvm.arm.mve.vcvt.fp.int.predicated.v4f32.v4i32.v4i1(<4 x i32> %a, i32 1, <4 x i1> %1, <4 x float> %inactive)
+  ret <4 x float> %2
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vcvtq_m_s16_f16(<8 x i16> %inactive, <8 x half> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vcvtq_m_s16_f16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vcvtt.s16.f16 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+  %2 = tail call <8 x i16> @llvm.arm.mve.vcvt.fp.int.predicated.v8i16.v8f16.v8i1(<8 x half> %a, i32 0, <8 x i1> %1, <8 x i16> %inactive)
+  ret <8 x i16> %2
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vcvtq_m_s32_f32(<4 x i32> %inactive, <4 x float> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vcvtq_m_s32_f32:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vcvtt.s32.f32 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+  %2 = tail call <4 x i32> @llvm.arm.mve.vcvt.fp.int.predicated.v4i32.v4f32.v4i1(<4 x float> %a, i32 0, <4 x i1> %1, <4 x i32> %inactive)
+  ret <4 x i32> %2
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vcvtq_m_u16_f16(<8 x i16> %inactive, <8 x half> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vcvtq_m_u16_f16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vcvtt.u16.f16 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+  %2 = tail call <8 x i16> @llvm.arm.mve.vcvt.fp.int.predicated.v8i16.v8f16.v8i1(<8 x half> %a, i32 1, <8 x i1> %1, <8 x i16> %inactive)
+  ret <8 x i16> %2
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vcvtq_m_u32_f32(<4 x i32> %inactive, <4 x float> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vcvtq_m_u32_f32:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vcvtt.u32.f32 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+  %2 = tail call <4 x i32> @llvm.arm.mve.vcvt.fp.int.predicated.v4i32.v4f32.v4i1(<4 x float> %a, i32 1, <4 x i1> %1, <4 x i32> %inactive)
+  ret <4 x i32> %2
+}
+
+declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32)
+declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32)
+
+declare <8 x half> @llvm.arm.mve.vcvt.fp.int.predicated.v8f16.v8i16.v8i1(<8 x i16>, i32, <8 x i1>, <8 x half>)
+declare <4 x float> @llvm.arm.mve.vcvt.fp.int.predicated.v4f32.v4i32.v4i1(<4 x i32>, i32, <4 x i1>, <4 x float>)
+declare <8 x i16> @llvm.arm.mve.vcvt.fp.int.predicated.v8i16.v8f16.v8i1(<8 x half>, i32, <8 x i1>, <8 x i16>)
+declare <4 x i32> @llvm.arm.mve.vcvt.fp.int.predicated.v4i32.v4f32.v4i1(<4 x float>, i32, <4 x i1>, <4 x i32>)
diff --git a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vmovl.ll b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vmovl.ll

index c9ce38f9d6127f0fecac85033b44f686757c1e13..fd33dddc685e5ec39adeee7ee65890a76246e1f1 100644 (file)
--- a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vmovl.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vmovl.ll
@@ -145,3 +145,200 @@ entry:
    %1 = zext <4 x i16> %0 to <4 x i32>
    ret <4 x i32> %1
  }
+
+define arm_aapcs_vfpcc <8 x i16> @test_vmovlbq_m_s8(<8 x i16> %inactive, <16 x i8> %a, i16 zeroext %p) {
+; LE-LABEL: test_vmovlbq_m_s8:
+; LE:       @ %bb.0: @ %entry
+; LE-NEXT:    vmsr p0, r0
+; LE-NEXT:    vpst
+; LE-NEXT:    vmovlbt.s8 q0, q1
+; LE-NEXT:    bx lr
+;
+; BE-LABEL: test_vmovlbq_m_s8:
+; BE:       @ %bb.0: @ %entry
+; BE-NEXT:    vrev64.16 q2, q0
+; BE-NEXT:    vrev64.8 q0, q1
+; BE-NEXT:    vmsr p0, r0
+; BE-NEXT:    vpst
+; BE-NEXT:    vmovlbt.s8 q2, q0
+; BE-NEXT:    vrev64.16 q0, q2
+; BE-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+  %2 = tail call <8 x i16> @llvm.arm.mve.vmovl.predicated.v8i16.v16i8.v8i1(<16 x i8> %a, i32 0, i32 0, <8 x i1> %1, <8 x i16> %inactive)
+  ret <8 x i16> %2
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vmovlbq_m_s16(<4 x i32> %inactive, <8 x i16> %a, i16 zeroext %p) {
+; LE-LABEL: test_vmovlbq_m_s16:
+; LE:       @ %bb.0: @ %entry
+; LE-NEXT:    vmsr p0, r0
+; LE-NEXT:    vpst
+; LE-NEXT:    vmovlbt.s16 q0, q1
+; LE-NEXT:    bx lr
+;
+; BE-LABEL: test_vmovlbq_m_s16:
+; BE:       @ %bb.0: @ %entry
+; BE-NEXT:    vrev64.32 q2, q0
+; BE-NEXT:    vrev64.16 q0, q1
+; BE-NEXT:    vmsr p0, r0
+; BE-NEXT:    vpst
+; BE-NEXT:    vmovlbt.s16 q2, q0
+; BE-NEXT:    vrev64.32 q0, q2
+; BE-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+  %2 = tail call <4 x i32> @llvm.arm.mve.vmovl.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, i32 0, i32 0, <4 x i1> %1, <4 x i32> %inactive)
+  ret <4 x i32> %2
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vmovlbq_m_u8(<8 x i16> %inactive, <16 x i8> %a, i16 zeroext %p) {
+; LE-LABEL: test_vmovlbq_m_u8:
+; LE:       @ %bb.0: @ %entry
+; LE-NEXT:    vmsr p0, r0
+; LE-NEXT:    vpst
+; LE-NEXT:    vmovlbt.u8 q0, q1
+; LE-NEXT:    bx lr
+;
+; BE-LABEL: test_vmovlbq_m_u8:
+; BE:       @ %bb.0: @ %entry
+; BE-NEXT:    vrev64.16 q2, q0
+; BE-NEXT:    vrev64.8 q0, q1
+; BE-NEXT:    vmsr p0, r0
+; BE-NEXT:    vpst
+; BE-NEXT:    vmovlbt.u8 q2, q0
+; BE-NEXT:    vrev64.16 q0, q2
+; BE-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+  %2 = tail call <8 x i16> @llvm.arm.mve.vmovl.predicated.v8i16.v16i8.v8i1(<16 x i8> %a, i32 1, i32 0, <8 x i1> %1, <8 x i16> %inactive)
+  ret <8 x i16> %2
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vmovlbq_m_u16(<4 x i32> %inactive, <8 x i16> %a, i16 zeroext %p) {
+; LE-LABEL: test_vmovlbq_m_u16:
+; LE:       @ %bb.0: @ %entry
+; LE-NEXT:    vmsr p0, r0
+; LE-NEXT:    vpst
+; LE-NEXT:    vmovlbt.u16 q0, q1
+; LE-NEXT:    bx lr
+;
+; BE-LABEL: test_vmovlbq_m_u16:
+; BE:       @ %bb.0: @ %entry
+; BE-NEXT:    vrev64.32 q2, q0
+; BE-NEXT:    vrev64.16 q0, q1
+; BE-NEXT:    vmsr p0, r0
+; BE-NEXT:    vpst
+; BE-NEXT:    vmovlbt.u16 q2, q0
+; BE-NEXT:    vrev64.32 q0, q2
+; BE-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+  %2 = tail call <4 x i32> @llvm.arm.mve.vmovl.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, i32 1, i32 0, <4 x i1> %1, <4 x i32> %inactive)
+  ret <4 x i32> %2
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vmovltq_m_s8(<8 x i16> %inactive, <16 x i8> %a, i16 zeroext %p) {
+; LE-LABEL: test_vmovltq_m_s8:
+; LE:       @ %bb.0: @ %entry
+; LE-NEXT:    vmsr p0, r0
+; LE-NEXT:    vpst
+; LE-NEXT:    vmovltt.s8 q0, q1
+; LE-NEXT:    bx lr
+;
+; BE-LABEL: test_vmovltq_m_s8:
+; BE:       @ %bb.0: @ %entry
+; BE-NEXT:    vrev64.16 q2, q0
+; BE-NEXT:    vrev64.8 q0, q1
+; BE-NEXT:    vmsr p0, r0
+; BE-NEXT:    vpst
+; BE-NEXT:    vmovltt.s8 q2, q0
+; BE-NEXT:    vrev64.16 q0, q2
+; BE-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+  %2 = tail call <8 x i16> @llvm.arm.mve.vmovl.predicated.v8i16.v16i8.v8i1(<16 x i8> %a, i32 0, i32 1, <8 x i1> %1, <8 x i16> %inactive)
+  ret <8 x i16> %2
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vmovltq_m_s16(<4 x i32> %inactive, <8 x i16> %a, i16 zeroext %p) {
+; LE-LABEL: test_vmovltq_m_s16:
+; LE:       @ %bb.0: @ %entry
+; LE-NEXT:    vmsr p0, r0
+; LE-NEXT:    vpst
+; LE-NEXT:    vmovltt.s16 q0, q1
+; LE-NEXT:    bx lr
+;
+; BE-LABEL: test_vmovltq_m_s16:
+; BE:       @ %bb.0: @ %entry
+; BE-NEXT:    vrev64.32 q2, q0
+; BE-NEXT:    vrev64.16 q0, q1
+; BE-NEXT:    vmsr p0, r0
+; BE-NEXT:    vpst
+; BE-NEXT:    vmovltt.s16 q2, q0
+; BE-NEXT:    vrev64.32 q0, q2
+; BE-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+  %2 = tail call <4 x i32> @llvm.arm.mve.vmovl.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, i32 0, i32 1, <4 x i1> %1, <4 x i32> %inactive)
+  ret <4 x i32> %2
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vmovltq_m_u8(<8 x i16> %inactive, <16 x i8> %a, i16 zeroext %p) {
+; LE-LABEL: test_vmovltq_m_u8:
+; LE:       @ %bb.0: @ %entry
+; LE-NEXT:    vmsr p0, r0
+; LE-NEXT:    vpst
+; LE-NEXT:    vmovltt.u8 q0, q1
+; LE-NEXT:    bx lr
+;
+; BE-LABEL: test_vmovltq_m_u8:
+; BE:       @ %bb.0: @ %entry
+; BE-NEXT:    vrev64.16 q2, q0
+; BE-NEXT:    vrev64.8 q0, q1
+; BE-NEXT:    vmsr p0, r0
+; BE-NEXT:    vpst
+; BE-NEXT:    vmovltt.u8 q2, q0
+; BE-NEXT:    vrev64.16 q0, q2
+; BE-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+  %2 = tail call <8 x i16> @llvm.arm.mve.vmovl.predicated.v8i16.v16i8.v8i1(<16 x i8> %a, i32 1, i32 1, <8 x i1> %1, <8 x i16> %inactive)
+  ret <8 x i16> %2
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vmovltq_m_u16(<4 x i32> %inactive, <8 x i16> %a, i16 zeroext %p) {
+; LE-LABEL: test_vmovltq_m_u16:
+; LE:       @ %bb.0: @ %entry
+; LE-NEXT:    vmsr p0, r0
+; LE-NEXT:    vpst
+; LE-NEXT:    vmovltt.u16 q0, q1
+; LE-NEXT:    bx lr
+;
+; BE-LABEL: test_vmovltq_m_u16:
+; BE:       @ %bb.0: @ %entry
+; BE-NEXT:    vrev64.32 q2, q0
+; BE-NEXT:    vrev64.16 q0, q1
+; BE-NEXT:    vmsr p0, r0
+; BE-NEXT:    vpst
+; BE-NEXT:    vmovltt.u16 q2, q0
+; BE-NEXT:    vrev64.32 q0, q2
+; BE-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+  %2 = tail call <4 x i32> @llvm.arm.mve.vmovl.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, i32 1, i32 1, <4 x i1> %1, <4 x i32> %inactive)
+  ret <4 x i32> %2
+}
+
+declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32)
+declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32)
+declare <8 x i16> @llvm.arm.mve.vmovl.predicated.v8i16.v16i8.v8i1(<16 x i8>, i32, i32, <8 x i1>, <8 x i16>)
+declare <4 x i32> @llvm.arm.mve.vmovl.predicated.v4i32.v8i16.v4i1(<8 x i16>, i32, i32, <4 x i1>, <4 x i32>)
diff --git a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vmovn.ll b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vmovn.ll

index f16e83a45b6f8d50d4dce63e9dbdf88c91bcf7a9..391b163f718a5094559392e782869d432ed4d6c6 100644 (file)
--- a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vmovn.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vmovn.ll
@@ -166,5 +166,201 @@ entry:
    ret <8 x i16> %2
  }
  
+define arm_aapcs_vfpcc <16 x i8> @test_vmovnbq_m_s16(<16 x i8> %a, <8 x i16> %b, i16 zeroext %p) {
+; LE-LABEL: test_vmovnbq_m_s16:
+; LE:       @ %bb.0: @ %entry
+; LE-NEXT:    vmsr p0, r0
+; LE-NEXT:    vpst
+; LE-NEXT:    vmovnbt.i16 q0, q1
+; LE-NEXT:    bx lr
+;
+; BE-LABEL: test_vmovnbq_m_s16:
+; BE:       @ %bb.0: @ %entry
+; BE-NEXT:    vrev64.16 q2, q1
+; BE-NEXT:    vrev64.8 q1, q0
+; BE-NEXT:    vmsr p0, r0
+; BE-NEXT:    vpst
+; BE-NEXT:    vmovnbt.i16 q1, q2
+; BE-NEXT:    vrev64.8 q0, q1
+; BE-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+  %2 = tail call <16 x i8> @llvm.arm.mve.vmovn.predicated.v16i8.v8i16.v8i1(<16 x i8> %a, <8 x i16> %b, i32 0, <8 x i1> %1)
+  ret <16 x i8> %2
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vmovnbq_m_s32(<8 x i16> %a, <4 x i32> %b, i16 zeroext %p) {
+; LE-LABEL: test_vmovnbq_m_s32:
+; LE:       @ %bb.0: @ %entry
+; LE-NEXT:    vmsr p0, r0
+; LE-NEXT:    vpst
+; LE-NEXT:    vmovnbt.i32 q0, q1
+; LE-NEXT:    bx lr
+;
+; BE-LABEL: test_vmovnbq_m_s32:
+; BE:       @ %bb.0: @ %entry
+; BE-NEXT:    vrev64.32 q2, q1
+; BE-NEXT:    vrev64.16 q1, q0
+; BE-NEXT:    vmsr p0, r0
+; BE-NEXT:    vpst
+; BE-NEXT:    vmovnbt.i32 q1, q2
+; BE-NEXT:    vrev64.16 q0, q1
+; BE-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+  %2 = tail call <8 x i16> @llvm.arm.mve.vmovn.predicated.v8i16.v4i32.v4i1(<8 x i16> %a, <4 x i32> %b, i32 0, <4 x i1> %1)
+  ret <8 x i16> %2
+}
+
+define arm_aapcs_vfpcc <16 x i8> @test_vmovnbq_m_u16(<16 x i8> %a, <8 x i16> %b, i16 zeroext %p) {
+; LE-LABEL: test_vmovnbq_m_u16:
+; LE:       @ %bb.0: @ %entry
+; LE-NEXT:    vmsr p0, r0
+; LE-NEXT:    vpst
+; LE-NEXT:    vmovnbt.i16 q0, q1
+; LE-NEXT:    bx lr
+;
+; BE-LABEL: test_vmovnbq_m_u16:
+; BE:       @ %bb.0: @ %entry
+; BE-NEXT:    vrev64.16 q2, q1
+; BE-NEXT:    vrev64.8 q1, q0
+; BE-NEXT:    vmsr p0, r0
+; BE-NEXT:    vpst
+; BE-NEXT:    vmovnbt.i16 q1, q2
+; BE-NEXT:    vrev64.8 q0, q1
+; BE-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+  %2 = tail call <16 x i8> @llvm.arm.mve.vmovn.predicated.v16i8.v8i16.v8i1(<16 x i8> %a, <8 x i16> %b, i32 0, <8 x i1> %1)
+  ret <16 x i8> %2
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vmovnbq_m_u32(<8 x i16> %a, <4 x i32> %b, i16 zeroext %p) {
+; LE-LABEL: test_vmovnbq_m_u32:
+; LE:       @ %bb.0: @ %entry
+; LE-NEXT:    vmsr p0, r0
+; LE-NEXT:    vpst
+; LE-NEXT:    vmovnbt.i32 q0, q1
+; LE-NEXT:    bx lr
+;
+; BE-LABEL: test_vmovnbq_m_u32:
+; BE:       @ %bb.0: @ %entry
+; BE-NEXT:    vrev64.32 q2, q1
+; BE-NEXT:    vrev64.16 q1, q0
+; BE-NEXT:    vmsr p0, r0
+; BE-NEXT:    vpst
+; BE-NEXT:    vmovnbt.i32 q1, q2
+; BE-NEXT:    vrev64.16 q0, q1
+; BE-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+  %2 = tail call <8 x i16> @llvm.arm.mve.vmovn.predicated.v8i16.v4i32.v4i1(<8 x i16> %a, <4 x i32> %b, i32 0, <4 x i1> %1)
+  ret <8 x i16> %2
+}
+
+define arm_aapcs_vfpcc <16 x i8> @test_vmovntq_m_s16(<16 x i8> %a, <8 x i16> %b, i16 zeroext %p) {
+; LE-LABEL: test_vmovntq_m_s16:
+; LE:       @ %bb.0: @ %entry
+; LE-NEXT:    vmsr p0, r0
+; LE-NEXT:    vpst
+; LE-NEXT:    vmovntt.i16 q0, q1
+; LE-NEXT:    bx lr
+;
+; BE-LABEL: test_vmovntq_m_s16:
+; BE:       @ %bb.0: @ %entry
+; BE-NEXT:    vrev64.16 q2, q1
+; BE-NEXT:    vrev64.8 q1, q0
+; BE-NEXT:    vmsr p0, r0
+; BE-NEXT:    vpst
+; BE-NEXT:    vmovntt.i16 q1, q2
+; BE-NEXT:    vrev64.8 q0, q1
+; BE-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+  %2 = tail call <16 x i8> @llvm.arm.mve.vmovn.predicated.v16i8.v8i16.v8i1(<16 x i8> %a, <8 x i16> %b, i32 1, <8 x i1> %1)
+  ret <16 x i8> %2
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vmovntq_m_s32(<8 x i16> %a, <4 x i32> %b, i16 zeroext %p) {
+; LE-LABEL: test_vmovntq_m_s32:
+; LE:       @ %bb.0: @ %entry
+; LE-NEXT:    vmsr p0, r0
+; LE-NEXT:    vpst
+; LE-NEXT:    vmovntt.i32 q0, q1
+; LE-NEXT:    bx lr
+;
+; BE-LABEL: test_vmovntq_m_s32:
+; BE:       @ %bb.0: @ %entry
+; BE-NEXT:    vrev64.32 q2, q1
+; BE-NEXT:    vrev64.16 q1, q0
+; BE-NEXT:    vmsr p0, r0
+; BE-NEXT:    vpst
+; BE-NEXT:    vmovntt.i32 q1, q2
+; BE-NEXT:    vrev64.16 q0, q1
+; BE-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+  %2 = tail call <8 x i16> @llvm.arm.mve.vmovn.predicated.v8i16.v4i32.v4i1(<8 x i16> %a, <4 x i32> %b, i32 1, <4 x i1> %1)
+  ret <8 x i16> %2
+}
+
+define arm_aapcs_vfpcc <16 x i8> @test_vmovntq_m_u16(<16 x i8> %a, <8 x i16> %b, i16 zeroext %p) {
+; LE-LABEL: test_vmovntq_m_u16:
+; LE:       @ %bb.0: @ %entry
+; LE-NEXT:    vmsr p0, r0
+; LE-NEXT:    vpst
+; LE-NEXT:    vmovntt.i16 q0, q1
+; LE-NEXT:    bx lr
+;
+; BE-LABEL: test_vmovntq_m_u16:
+; BE:       @ %bb.0: @ %entry
+; BE-NEXT:    vrev64.16 q2, q1
+; BE-NEXT:    vrev64.8 q1, q0
+; BE-NEXT:    vmsr p0, r0
+; BE-NEXT:    vpst
+; BE-NEXT:    vmovntt.i16 q1, q2
+; BE-NEXT:    vrev64.8 q0, q1
+; BE-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+  %2 = tail call <16 x i8> @llvm.arm.mve.vmovn.predicated.v16i8.v8i16.v8i1(<16 x i8> %a, <8 x i16> %b, i32 1, <8 x i1> %1)
+  ret <16 x i8> %2
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vmovntq_m_u32(<8 x i16> %a, <4 x i32> %b, i16 zeroext %p) {
+; LE-LABEL: test_vmovntq_m_u32:
+; LE:       @ %bb.0: @ %entry
+; LE-NEXT:    vmsr p0, r0
+; LE-NEXT:    vpst
+; LE-NEXT:    vmovntt.i32 q0, q1
+; LE-NEXT:    bx lr
+;
+; BE-LABEL: test_vmovntq_m_u32:
+; BE:       @ %bb.0: @ %entry
+; BE-NEXT:    vrev64.32 q2, q1
+; BE-NEXT:    vrev64.16 q1, q0
+; BE-NEXT:    vmsr p0, r0
+; BE-NEXT:    vpst
+; BE-NEXT:    vmovntt.i32 q1, q2
+; BE-NEXT:    vrev64.16 q0, q1
+; BE-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+  %2 = tail call <8 x i16> @llvm.arm.mve.vmovn.predicated.v8i16.v4i32.v4i1(<8 x i16> %a, <4 x i32> %b, i32 1, <4 x i1> %1)
+  ret <8 x i16> %2
+}
+
  declare <8 x i16> @llvm.arm.mve.vreinterpretq.v8i16.v16i8(<16 x i8>)
  declare <4 x i32> @llvm.arm.mve.vreinterpretq.v4i32.v8i16(<8 x i16>)
+declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32)
+declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32)
+declare <16 x i8> @llvm.arm.mve.vmovn.predicated.v16i8.v8i16.v8i1(<16 x i8>, <8 x i16>, i32, <8 x i1>)
+declare <8 x i16> @llvm.arm.mve.vmovn.predicated.v8i16.v4i32.v4i1(<8 x i16>, <4 x i32>, i32, <4 x i1>)
diff --git a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vrev.ll b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vrev.ll

new file mode 100644 (file)

index 0000000..841291e
--- /dev/null
+++ b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vrev.ll
@@ -0,0 +1,138 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s
+
+define arm_aapcs_vfpcc <16 x i8> @test_vrev16q_m_i8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vrev16q_m_i8:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vrev16t.8 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
+  %2 = tail call <16 x i8> @llvm.arm.mve.vrev.predicated.v16i8.v16i1(<16 x i8> %a, i32 16, <16 x i1> %1, <16 x i8> %inactive)
+  ret <16 x i8> %2
+}
+
+define arm_aapcs_vfpcc <16 x i8> @test_vrev32q_m_i8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vrev32q_m_i8:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vrev32t.8 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
+  %2 = tail call <16 x i8> @llvm.arm.mve.vrev.predicated.v16i8.v16i1(<16 x i8> %a, i32 32, <16 x i1> %1, <16 x i8> %inactive)
+  ret <16 x i8> %2
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vrev32q_m_i16(<8 x i16> %inactive, <8 x i16> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vrev32q_m_i16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vrev32t.16 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+  %2 = tail call <8 x i16> @llvm.arm.mve.vrev.predicated.v8i16.v8i1(<8 x i16> %a, i32 32, <8 x i1> %1, <8 x i16> %inactive)
+  ret <8 x i16> %2
+}
+
+define arm_aapcs_vfpcc <8 x half> @test_vrev32q_m_f16(<8 x half> %inactive, <8 x half> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vrev32q_m_f16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vrev32t.16 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+  %2 = tail call <8 x half> @llvm.arm.mve.vrev.predicated.v8f16.v8i1(<8 x half> %a, i32 32, <8 x i1> %1, <8 x half> %inactive)
+  ret <8 x half> %2
+}
+
+define arm_aapcs_vfpcc <16 x i8> @test_vrev64q_m_i8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vrev64q_m_i8:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vrev64t.8 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
+  %2 = tail call <16 x i8> @llvm.arm.mve.vrev.predicated.v16i8.v16i1(<16 x i8> %a, i32 64, <16 x i1> %1, <16 x i8> %inactive)
+  ret <16 x i8> %2
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vrev64q_m_i16(<8 x i16> %inactive, <8 x i16> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vrev64q_m_i16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vrev64t.16 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+  %2 = tail call <8 x i16> @llvm.arm.mve.vrev.predicated.v8i16.v8i1(<8 x i16> %a, i32 64, <8 x i1> %1, <8 x i16> %inactive)
+  ret <8 x i16> %2
+}
+
+define arm_aapcs_vfpcc <8 x half> @test_vrev64q_m_f16(<8 x half> %inactive, <8 x half> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vrev64q_m_f16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vrev64t.16 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+  %2 = tail call <8 x half> @llvm.arm.mve.vrev.predicated.v8f16.v8i1(<8 x half> %a, i32 64, <8 x i1> %1, <8 x half> %inactive)
+  ret <8 x half> %2
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vrev64q_m_i32(<4 x i32> %inactive, <4 x i32> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vrev64q_m_i32:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vrev64t.32 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+  %2 = tail call <4 x i32> @llvm.arm.mve.vrev.predicated.v4i32.v4i1(<4 x i32> %a, i32 64, <4 x i1> %1, <4 x i32> %inactive)
+  ret <4 x i32> %2
+}
+
+define arm_aapcs_vfpcc <4 x float> @test_vrev64q_m_f32(<4 x float> %inactive, <4 x float> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vrev64q_m_f32:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vrev64t.32 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+  %2 = tail call <4 x float> @llvm.arm.mve.vrev.predicated.v4f32.v4i1(<4 x float> %a, i32 64, <4 x i1> %1, <4 x float> %inactive)
+  ret <4 x float> %2
+}
+
+declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32)
+declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32)
+declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32)
+
+declare <16 x i8> @llvm.arm.mve.vrev.predicated.v16i8.v16i1(<16 x i8>, i32, <16 x i1>, <16 x i8>)
+declare <8 x i16> @llvm.arm.mve.vrev.predicated.v8i16.v8i1(<8 x i16>, i32, <8 x i1>, <8 x i16>)
+declare <8 x half> @llvm.arm.mve.vrev.predicated.v8f16.v8i1(<8 x half>, i32, <8 x i1>, <8 x half>)
+declare <4 x i32> @llvm.arm.mve.vrev.predicated.v4i32.v4i1(<4 x i32>, i32, <4 x i1>, <4 x i32>)
+declare <4 x float> @llvm.arm.mve.vrev.predicated.v4f32.v4i1(<4 x float>, i32, <4 x i1>, <4 x float>)
diff --git a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vrint-predicated.ll b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vrint-predicated.ll

new file mode 100644 (file)

index 0000000..c24cc87
--- /dev/null
+++ b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vrint-predicated.ll
@@ -0,0 +1,185 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s
+
+define arm_aapcs_vfpcc <8 x half> @test_vrndaq_m_f16(<8 x half> %inactive, <8 x half> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vrndaq_m_f16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vrintat.f16 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+  %2 = tail call <8 x half> @llvm.arm.mve.vrinta.predicated.v8f16.v8i1(<8 x half> %a, <8 x i1> %1, <8 x half> %inactive)
+  ret <8 x half> %2
+}
+
+define arm_aapcs_vfpcc <4 x float> @test_vrndaq_m_f32(<4 x float> %inactive, <4 x float> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vrndaq_m_f32:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vrintat.f32 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+  %2 = tail call <4 x float> @llvm.arm.mve.vrinta.predicated.v4f32.v4i1(<4 x float> %a, <4 x i1> %1, <4 x float> %inactive)
+  ret <4 x float> %2
+}
+
+define arm_aapcs_vfpcc <8 x half> @test_vrndmq_m_f16(<8 x half> %inactive, <8 x half> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vrndmq_m_f16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vrintmt.f16 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+  %2 = tail call <8 x half> @llvm.arm.mve.vrintm.predicated.v8f16.v8i1(<8 x half> %a, <8 x i1> %1, <8 x half> %inactive)
+  ret <8 x half> %2
+}
+
+define arm_aapcs_vfpcc <4 x float> @test_vrndmq_m_f32(<4 x float> %inactive, <4 x float> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vrndmq_m_f32:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vrintmt.f32 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+  %2 = tail call <4 x float> @llvm.arm.mve.vrintm.predicated.v4f32.v4i1(<4 x float> %a, <4 x i1> %1, <4 x float> %inactive)
+  ret <4 x float> %2
+}
+
+define arm_aapcs_vfpcc <8 x half> @test_vrndnq_m_f16(<8 x half> %inactive, <8 x half> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vrndnq_m_f16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vrintnt.f16 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+  %2 = tail call <8 x half> @llvm.arm.mve.vrintn.predicated.v8f16.v8i1(<8 x half> %a, <8 x i1> %1, <8 x half> %inactive)
+  ret <8 x half> %2
+}
+
+define arm_aapcs_vfpcc <4 x float> @test_vrndnq_m_f32(<4 x float> %inactive, <4 x float> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vrndnq_m_f32:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vrintnt.f32 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+  %2 = tail call <4 x float> @llvm.arm.mve.vrintn.predicated.v4f32.v4i1(<4 x float> %a, <4 x i1> %1, <4 x float> %inactive)
+  ret <4 x float> %2
+}
+
+define arm_aapcs_vfpcc <8 x half> @test_vrndpq_m_f16(<8 x half> %inactive, <8 x half> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vrndpq_m_f16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vrintpt.f16 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+  %2 = tail call <8 x half> @llvm.arm.mve.vrintp.predicated.v8f16.v8i1(<8 x half> %a, <8 x i1> %1, <8 x half> %inactive)
+  ret <8 x half> %2
+}
+
+define arm_aapcs_vfpcc <4 x float> @test_vrndpq_m_f32(<4 x float> %inactive, <4 x float> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vrndpq_m_f32:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vrintpt.f32 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+  %2 = tail call <4 x float> @llvm.arm.mve.vrintp.predicated.v4f32.v4i1(<4 x float> %a, <4 x i1> %1, <4 x float> %inactive)
+  ret <4 x float> %2
+}
+
+define arm_aapcs_vfpcc <8 x half> @test_vrndq_m_f16(<8 x half> %inactive, <8 x half> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vrndq_m_f16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vrintzt.f16 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+  %2 = tail call <8 x half> @llvm.arm.mve.vrintz.predicated.v8f16.v8i1(<8 x half> %a, <8 x i1> %1, <8 x half> %inactive)
+  ret <8 x half> %2
+}
+
+define arm_aapcs_vfpcc <4 x float> @test_vrndq_m_f32(<4 x float> %inactive, <4 x float> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vrndq_m_f32:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vrintzt.f32 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+  %2 = tail call <4 x float> @llvm.arm.mve.vrintz.predicated.v4f32.v4i1(<4 x float> %a, <4 x i1> %1, <4 x float> %inactive)
+  ret <4 x float> %2
+}
+
+define arm_aapcs_vfpcc <8 x half> @test_vrndxq_m_f16(<8 x half> %inactive, <8 x half> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vrndxq_m_f16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vrintxt.f16 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+  %2 = tail call <8 x half> @llvm.arm.mve.vrintx.predicated.v8f16.v8i1(<8 x half> %a, <8 x i1> %1, <8 x half> %inactive)
+  ret <8 x half> %2
+}
+
+define arm_aapcs_vfpcc <4 x float> @test_vrndxq_m_f32(<4 x float> %inactive, <4 x float> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vrndxq_m_f32:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vrintxt.f32 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+  %2 = tail call <4 x float> @llvm.arm.mve.vrintx.predicated.v4f32.v4i1(<4 x float> %a, <4 x i1> %1, <4 x float> %inactive)
+  ret <4 x float> %2
+}
+
+declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32)
+declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32)
+declare <8 x half> @llvm.arm.mve.vrinta.predicated.v8f16.v8i1(<8 x half>, <8 x i1>, <8 x half>)
+declare <4 x float> @llvm.arm.mve.vrinta.predicated.v4f32.v4i1(<4 x float>, <4 x i1>, <4 x float>)
+declare <8 x half> @llvm.arm.mve.vrintm.predicated.v8f16.v8i1(<8 x half>, <8 x i1>, <8 x half>)
+declare <4 x float> @llvm.arm.mve.vrintm.predicated.v4f32.v4i1(<4 x float>, <4 x i1>, <4 x float>)
+declare <8 x half> @llvm.arm.mve.vrintn.predicated.v8f16.v8i1(<8 x half>, <8 x i1>, <8 x half>)
+declare <4 x float> @llvm.arm.mve.vrintn.predicated.v4f32.v4i1(<4 x float>, <4 x i1>, <4 x float>)
+declare <8 x half> @llvm.arm.mve.vrintp.predicated.v8f16.v8i1(<8 x half>, <8 x i1>, <8 x half>)
+declare <4 x float> @llvm.arm.mve.vrintp.predicated.v4f32.v4i1(<4 x float>, <4 x i1>, <4 x float>)
+declare <8 x half> @llvm.arm.mve.vrintz.predicated.v8f16.v8i1(<8 x half>, <8 x i1>, <8 x half>)
+declare <4 x float> @llvm.arm.mve.vrintz.predicated.v4f32.v4i1(<4 x float>, <4 x i1>, <4 x float>)
+declare <8 x half> @llvm.arm.mve.vrintx.predicated.v8f16.v8i1(<8 x half>, <8 x i1>, <8 x half>)
+declare <4 x float> @llvm.arm.mve.vrintx.predicated.v4f32.v4i1(<4 x float>, <4 x i1>, <4 x float>)
author	Simon Tatham <simon.tatham@arm.com>
	Wed, 26 Feb 2020 14:10:43 +0000 (14:10 +0000)
committer	Simon Tatham <simon.tatham@arm.com>
	Wed, 26 Feb 2020 15:12:07 +0000 (15:12 +0000)
clang/include/clang/Basic/arm_mve.td		patch \| blob \| history
clang/include/clang/Basic/arm_mve_defs.td		patch \| blob \| history
clang/test/CodeGen/arm-mve-intrinsics/absneg.c		patch \| blob \| history
clang/test/CodeGen/arm-mve-intrinsics/vclz.c		patch \| blob \| history
clang/test/CodeGen/arm-mve-intrinsics/vcvt.c		patch \| blob \| history
clang/test/CodeGen/arm-mve-intrinsics/vmovl.c		patch \| blob \| history
clang/test/CodeGen/arm-mve-intrinsics/vmovn.c		patch \| blob \| history
clang/test/CodeGen/arm-mve-intrinsics/vrev.c		patch \| blob \| history
clang/test/CodeGen/arm-mve-intrinsics/vrnd.c		patch \| blob \| history
llvm/include/llvm/IR/IntrinsicsARM.td		patch \| blob \| history
llvm/lib/Target/ARM/ARMInstrMVE.td		patch \| blob \| history
llvm/test/CodeGen/Thumb2/mve-intrinsics/absneg-predicated.ll	[new file with mode: 0644]	patch \| blob
llvm/test/CodeGen/Thumb2/mve-intrinsics/vclzcls-predicated.ll	[new file with mode: 0644]	patch \| blob
llvm/test/CodeGen/Thumb2/mve-intrinsics/vcvt-fp-int.ll	[new file with mode: 0644]	patch \| blob
llvm/test/CodeGen/Thumb2/mve-intrinsics/vmovl.ll		patch \| blob \| history
llvm/test/CodeGen/Thumb2/mve-intrinsics/vmovn.ll		patch \| blob \| history
llvm/test/CodeGen/Thumb2/mve-intrinsics/vrev.ll	[new file with mode: 0644]	patch \| blob
llvm/test/CodeGen/Thumb2/mve-intrinsics/vrint-predicated.ll	[new file with mode: 0644]	patch \| blob