From 9eb3cc10b2c6f78ccf033cb264113fc904651cd0 Mon Sep 17 00:00:00 2001
From: Simon Tatham <simon.tatham@arm.com>
Date: Wed, 26 Feb 2020 14:10:43 +0000
Subject: [PATCH] [ARM,MVE] Add predicated intrinsics for many unary functions.

Summary:
This commit adds the predicated MVE intrinsics for the same set of
unary operations that I added in their unpredicated forms in

* D74333 (vrint)
* D74334 (vrev)
* D74335 (vclz, vcls)
* D74336 (vmovl)
* D74337 (vmovn)

but since the predicated versions are a lot more similar to each
other, I've kept them all together in a single big patch. Everything
here is done in the standard way we've been doing other predicated
operations: an IR intrinsic called `@llvm.arm.mve.foo.predicated` and
some isel rules that match that alongside whatever they accept for the
unpredicated version of the same instruction.

In order to write the isel rules conveniently, I've refactored the
existing isel rules for the affected instructions into multiclasses
parametrised by a vector-type class, in the usual way. All those
refactorings are intended to leave the existing isel rules unchanged:
the only difference should be that new ones for the predicated
intrinsics are introduced.

The only tiny infrastructure change I needed in this commit was to
change the implementation of `IntrinsicMX` in `arm_mve_defs.td` so
that the records it defines are anonymous rather than named (and use
`NameOverride` to set the output intrinsic name), which allows me to
call it twice in two multiclasses with the same `NAME` without a
tablegen-time error.

Reviewers: dmgreen, MarkMurrayARM, miyuki, ostannard

Reviewed By: MarkMurrayARM

Subscribers: kristof.beyls, hiraditya, cfe-commits, llvm-commits

Tags: #clang, #llvm

Differential Revision: https://reviews.llvm.org/D75165
---
 clang/include/clang/Basic/arm_mve.td          | 139 ++--
 clang/include/clang/Basic/arm_mve_defs.td     |   8 +-
 .../test/CodeGen/arm-mve-intrinsics/absneg.c  | 615 ++++++++++++++++++
 clang/test/CodeGen/arm-mve-intrinsics/vclz.c  | 287 ++++++++
 clang/test/CodeGen/arm-mve-intrinsics/vcvt.c  | 240 +++++++
 clang/test/CodeGen/arm-mve-intrinsics/vmovl.c | 256 ++++++++
 clang/test/CodeGen/arm-mve-intrinsics/vmovn.c | 184 ++++++
 clang/test/CodeGen/arm-mve-intrinsics/vrev.c  | 480 ++++++++++++++
 clang/test/CodeGen/arm-mve-intrinsics/vrnd.c  | 385 +++++++++++
 llvm/include/llvm/IR/IntrinsicsARM.td         |  33 +
 llvm/lib/Target/ARM/ARMInstrMVE.td            | 419 ++++++------
 .../mve-intrinsics/absneg-predicated.ll       | 335 ++++++++++
 .../mve-intrinsics/vclzcls-predicated.ll      | 138 ++++
 .../Thumb2/mve-intrinsics/vcvt-fp-int.ll      | 122 ++++
 .../CodeGen/Thumb2/mve-intrinsics/vmovl.ll    | 197 ++++++
 .../CodeGen/Thumb2/mve-intrinsics/vmovn.ll    | 196 ++++++
 .../CodeGen/Thumb2/mve-intrinsics/vrev.ll     | 138 ++++
 .../Thumb2/mve-intrinsics/vrint-predicated.ll | 185 ++++++
 18 files changed, 4107 insertions(+), 250 deletions(-)
 create mode 100644 llvm/test/CodeGen/Thumb2/mve-intrinsics/absneg-predicated.ll
 create mode 100644 llvm/test/CodeGen/Thumb2/mve-intrinsics/vclzcls-predicated.ll
 create mode 100644 llvm/test/CodeGen/Thumb2/mve-intrinsics/vcvt-fp-int.ll
 create mode 100644 llvm/test/CodeGen/Thumb2/mve-intrinsics/vrev.ll
 create mode 100644 llvm/test/CodeGen/Thumb2/mve-intrinsics/vrint-predicated.ll

diff --git a/clang/include/clang/Basic/arm_mve.td b/clang/include/clang/Basic/arm_mve.td
index 7150852d7004..efc6be1158b8 100644
--- a/clang/include/clang/Basic/arm_mve.td
+++ b/clang/include/clang/Basic/arm_mve.td
@@ -237,11 +237,18 @@ let params = T.Unsigned in {
 let params = T.Int in {
   def vmvnq: Intrinsic<Vector, (args Vector:$a),
                        (xor $a, (uint_max Vector))>;
+  defm vmvnq: IntrinsicMX<Vector, (args Vector:$a, Predicate:$pred),
+    (IRInt<"mvn_predicated", [Vector, Predicate]> $a, $pred, $inactive)>;
   def vclzq: Intrinsic<Vector, (args Vector:$a),
                        (IRIntBase<"ctlz", [Vector]> $a, (i1 0))>;
+  defm vclzq: IntrinsicMX<Vector, (args Vector:$a, Predicate:$pred),
+    (IRInt<"clz_predicated", [Vector, Predicate]> $a, $pred, $inactive)>;
 }
 let params = T.Signed in {
   def vclsq: Intrinsic<Vector, (args Vector:$a), (IRInt<"vcls", [Vector]> $a)>;
+  defm vclsq: IntrinsicMX<Vector, (args Vector:$a, Predicate:$pred),
+    (IRInt<"cls_predicated", [Vector, Predicate]> $a, $pred, $inactive)>;
+
   def vnegq: Intrinsic<Vector, (args Vector:$a),
                        (sub (zeroinit Vector), $a)>;
   def vabsq: Intrinsic<Vector, (args Vector:$a),
@@ -256,6 +263,18 @@ let params = T.Signed in {
                                 (select (icmp_eq $a, (int_min Vector)),
                                         (int_max Vector),
                                         (sub (zeroinit Vector), $a)))>;
+
+  foreach name = ["qneg", "qabs"] in {
+    defm v#name#q: IntrinsicMX<Vector, (args Vector:$a, Predicate:$pred),
+      (IRInt<name#"_predicated", [Vector, Predicate]> $a, $pred, $inactive),
+      0 /* no _x variant for saturating intrinsics */>;
+  }
+}
+let params = !listconcat(T.Signed, T.Float) in {
+  foreach name = ["neg", "abs"] in {
+    defm v#name#q: IntrinsicMX<Vector, (args Vector:$a, Predicate:$pred),
+      (IRInt<name#"_predicated", [Vector, Predicate]> $a, $pred, $inactive)>;
+  }
 }
 let params = T.Float in {
   def vnegq_f: Intrinsic<Vector, (args Vector:$a), (fneg $a)>,
@@ -440,49 +459,77 @@ multiclass float_int_conversions<Type FScalar, Type IScalar, IRBuilderBase ftoi,
   defvar FVector = VecOf<FScalar>;
   defvar IVector = VecOf<IScalar>;
 
-  let params = [IScalar], pnt = PNT_2Type in
-    def : Intrinsic<FVector, (args IVector:$a), (itof $a, FVector)>,
-          NameOverride<"vcvtq_" # FScalar>;
-  let params = [FScalar], pnt = PNT_None in
-    def : Intrinsic<IVector, (args FVector:$a), (ftoi $a, IVector)>,
-          NameOverride<"vcvtq_" # IScalar>;
+  let params = [IScalar] in {
+    let pnt = PNT_2Type in {
+      def : Intrinsic<FVector, (args IVector:$a), (itof $a, FVector)>,
+            NameOverride<"vcvtq_" # FScalar>;
+    }
+    defm vcvtq: IntrinsicMX<FVector, (args IVector:$a, Predicate:$pred),
+        (IRInt<"vcvt_fp_int_predicated", [FVector, IVector, Predicate]>
+            $a, (unsignedflag IScalar), $pred, $inactive),
+        1, "_" # FScalar, PNT_2Type, PNT_2Type>;
+  }
+  let params = [FScalar] in {
+    let pnt = PNT_None in {
+      def : Intrinsic<IVector, (args FVector:$a), (ftoi $a, IVector)>,
+            NameOverride<"vcvtq_" # IScalar>;
+    }
+    defm vcvtq: IntrinsicMX<IVector, (args FVector:$a, Predicate:$pred),
+        (IRInt<"vcvt_fp_int_predicated", [IVector, FVector, Predicate]>
+            $a, (unsignedflag IScalar), $pred, $inactive),
+        1, "_" # IScalar, PNT_2Type, PNT_None>;
+  }
 }
 
-defm : float_int_conversions<f32, u32, fptoui, uitofp>;
-defm : float_int_conversions<f16, u16, fptoui, uitofp>;
-defm : float_int_conversions<f32, s32, fptosi, sitofp>;
-defm : float_int_conversions<f16, s16, fptosi, sitofp>;
+defm "" : float_int_conversions<f32, u32, fptoui, uitofp>;
+defm "" : float_int_conversions<f16, u16, fptoui, uitofp>;
+defm "" : float_int_conversions<f32, s32, fptosi, sitofp>;
+defm "" : float_int_conversions<f16, s16, fptosi, sitofp>;
 
-let params = [s8, u8, s16, u16] in {
-  def vmovlbq: Intrinsic<DblVector, (args Vector:$a),
-    (extend (unzip $a, 0), DblVector, (unsignedflag Scalar))>;
-  def vmovltq: Intrinsic<DblVector, (args Vector:$a),
-    (extend (unzip $a, 1), DblVector, (unsignedflag Scalar))>;
+multiclass vmovl<bit top> {
+  let params = [s8, u8, s16, u16] in {
+    def "": Intrinsic<DblVector, (args Vector:$a),
+      (extend (unzip $a, top), DblVector, (unsignedflag Scalar))>;
+    defm "": IntrinsicMX<DblVector, (args Vector:$a, DblPredicate:$pred),
+        (IRInt<"vmovl_predicated", [DblVector, Vector, DblPredicate]>
+         $a, (unsignedflag Scalar), top, $pred, $inactive)>;
+  }
 }
 
-let params = [s16, u16, s32, u32] in {
-  def vmovnbq: Intrinsic<HalfVector, (args HalfVector:$inactive, Vector:$a),
-    (trunc (zip $a, (vreinterpret (vrev $inactive, (bitsize Scalar)), Vector)),
-           HalfVector)>;
-  def vmovntq: Intrinsic<HalfVector, (args HalfVector:$inactive, Vector:$a),
-    (trunc (zip (vreinterpret $inactive, Vector), $a), HalfVector)>;
+defm vmovlbq: vmovl<0>;
+defm vmovltq: vmovl<1>;
+
+multiclass vmovn<bit top, dag wide_result> {
+  let params = [s16, u16, s32, u32] in {
+    def "": Intrinsic<HalfVector, (args HalfVector:$inactive, Vector:$a),
+      (trunc wide_result, HalfVector)>;
+    def _m: Intrinsic<HalfVector, (args HalfVector:$inactive, Vector:$a,
+                                        Predicate:$pred),
+      (IRInt<"vmovn_predicated", [HalfVector, Vector, Predicate]>
+          $inactive, $a, top, $pred)>;
+  }
 }
 
-let params = T.Float in {
-  def vrndq: Intrinsic<Vector, (args Vector:$a),
-      (IRIntBase<"trunc", [Vector]> $a)>;
-  def vrndmq: Intrinsic<Vector, (args Vector:$a),
-      (IRIntBase<"floor", [Vector]> $a)>;
-  def vrndpq: Intrinsic<Vector, (args Vector:$a),
-      (IRIntBase<"ceil", [Vector]> $a)>;
-  def vrndaq: Intrinsic<Vector, (args Vector:$a),
-      (IRIntBase<"round", [Vector]> $a)>;
-  def vrndxq: Intrinsic<Vector, (args Vector:$a),
-      (IRIntBase<"rint", [Vector]> $a)>;
-  def vrndnq: Intrinsic<Vector, (args Vector:$a),
-      (IRInt<"vrintn", [Vector]> $a)>;
+defm vmovntq: vmovn<1, (zip (vreinterpret $inactive, Vector), $a)>;
+defm vmovnbq: vmovn<0,
+   (zip $a, (vreinterpret (vrev $inactive, (bitsize Scalar)), Vector))>;
+
+multiclass vrnd<IRIntBase ir_int, string suffix> {
+  let params = T.Float in {
+    def "": Intrinsic<Vector, (args Vector:$a), (ir_int $a)>;
+    defm "": IntrinsicMX<Vector, (args Vector:$a, Predicate:$pred),
+        (IRInt<"vrint"#suffix#"_predicated", [Vector, Predicate]>
+            $a, $pred, $inactive)>;
+  }
 }
 
+defm vrndq:  vrnd<IRIntBase<"trunc", [Vector]>, "z">;
+defm vrndmq: vrnd<IRIntBase<"floor", [Vector]>, "m">;
+defm vrndpq: vrnd<IRIntBase<"ceil",  [Vector]>, "p">;
+defm vrndaq: vrnd<IRIntBase<"round", [Vector]>, "a">;
+defm vrndxq: vrnd<IRIntBase<"rint",  [Vector]>, "x">;
+defm vrndnq: vrnd<IRInt<"vrintn",    [Vector]>, "n">;
+
 multiclass compare_with_pred<string condname, dag arguments,
                              dag cmp, string suffix> {
   // Make the predicated and unpredicated versions of a single comparison.
@@ -1231,12 +1278,24 @@ defm vrmlsldavh : MVEBinaryVectorHoriz64R<V.True, V.False, "">;
 defm vrmlsldavh : MVEBinaryVectorHoriz64R<V.True, V.True, "x">;
 }
 
-let params = T.All8 in
-def vrev16q : Intrinsic<Vector, (args Vector:$a), (vrev $a, 16)>;
-let params = !listconcat(T.All8, T.All16) in
-def vrev32q : Intrinsic<Vector, (args Vector:$a), (vrev $a, 32)>;
-let params = T.Usual in
-def vrev64q : Intrinsic<Vector, (args Vector:$a), (vrev $a, 64)>;
+multiclass vrev_predicated<int revsize> {
+  defm "" : IntrinsicMX<Vector, (args Vector:$a, Predicate:$pred),
+      (IRInt<"vrev_predicated", [Vector, Predicate]>
+          $a, revsize, $pred, $inactive)>;
+}
+
+let params = T.All8 in {
+  def vrev16q : Intrinsic<Vector, (args Vector:$a), (vrev $a, 16)>;
+  defm vrev16q: vrev_predicated<16>;
+}
+let params = !listconcat(T.All8, T.All16) in {
+  def vrev32q : Intrinsic<Vector, (args Vector:$a), (vrev $a, 32)>;
+  defm vrev32q: vrev_predicated<32>;
+}
+let params = T.Usual in {
+  def vrev64q : Intrinsic<Vector, (args Vector:$a), (vrev $a, 64)>;
+  defm vrev64q: vrev_predicated<64>;
+}
 
 foreach desttype = T.All in {
   // We want a vreinterpretq between every pair of supported vector types
diff --git a/clang/include/clang/Basic/arm_mve_defs.td b/clang/include/clang/Basic/arm_mve_defs.td
index 776dc9c73da4..dbcad78cce75 100644
--- a/clang/include/clang/Basic/arm_mve_defs.td
+++ b/clang/include/clang/Basic/arm_mve_defs.td
@@ -506,8 +506,8 @@ multiclass IntrinsicMX<Type rettype, dag arguments, dag cg,
   // provides the input value of the output register, i.e. all the
   // inactive lanes in the predicated operation take their values from
   // this.
-  def "_m" # nameSuffix:
-     Intrinsic<rettype, !con((args rettype:$inactive), arguments), cg> {
+  def : Intrinsic<rettype, !con((args rettype:$inactive), arguments), cg>,
+        NameOverride<NAME # "_m" # nameSuffix> {
     let pnt = pnt_m;
   }
 
@@ -515,8 +515,8 @@ multiclass IntrinsicMX<Type rettype, dag arguments, dag cg,
     // The _x variant leaves off that parameter, and simply uses an
     // undef value of the same type.
 
-    def "_x" # nameSuffix:
-      Intrinsic<rettype, arguments, (seq (undef rettype):$inactive, cg)> {
+    def : Intrinsic<rettype, arguments, (seq (undef rettype):$inactive, cg)>,
+          NameOverride<NAME # "_x" # nameSuffix> {
       let pnt = pnt_x;
     }
   }
diff --git a/clang/test/CodeGen/arm-mve-intrinsics/absneg.c b/clang/test/CodeGen/arm-mve-intrinsics/absneg.c
index db4253f3590b..94339c834809 100644
--- a/clang/test/CodeGen/arm-mve-intrinsics/absneg.c
+++ b/clang/test/CodeGen/arm-mve-intrinsics/absneg.c
@@ -164,6 +164,198 @@ uint32x4_t test_vmvnq_u32(uint32x4_t a)
 #endif /* POLYMORPHIC */
 }
 
+// CHECK-LABEL: @test_vmvnq_m_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.mvn.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <16 x i8> [[TMP2]]
+//
+int8x16_t test_vmvnq_m_s8(int8x16_t inactive, int8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vmvnq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vmvnq_m_s8(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmvnq_m_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.mvn.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+//
+int16x8_t test_vmvnq_m_s16(int16x8_t inactive, int16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vmvnq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vmvnq_m_s16(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmvnq_m_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.mvn.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+int32x4_t test_vmvnq_m_s32(int32x4_t inactive, int32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vmvnq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vmvnq_m_s32(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmvnq_m_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.mvn.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <16 x i8> [[TMP2]]
+//
+uint8x16_t test_vmvnq_m_u8(uint8x16_t inactive, uint8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vmvnq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vmvnq_m_u8(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmvnq_m_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.mvn.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+//
+uint16x8_t test_vmvnq_m_u16(uint16x8_t inactive, uint16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vmvnq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vmvnq_m_u16(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmvnq_m_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.mvn.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+uint32x4_t test_vmvnq_m_u32(uint32x4_t inactive, uint32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vmvnq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vmvnq_m_u32(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmvnq_x_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.mvn.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i1> [[TMP1]], <16 x i8> undef)
+// CHECK-NEXT:    ret <16 x i8> [[TMP2]]
+//
+int8x16_t test_vmvnq_x_s8(int8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vmvnq_x(a, p);
+#else /* POLYMORPHIC */
+    return vmvnq_x_s8(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmvnq_x_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.mvn.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i1> [[TMP1]], <8 x i16> undef)
+// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+//
+int16x8_t test_vmvnq_x_s16(int16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vmvnq_x(a, p);
+#else /* POLYMORPHIC */
+    return vmvnq_x_s16(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmvnq_x_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.mvn.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i1> [[TMP1]], <4 x i32> undef)
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+int32x4_t test_vmvnq_x_s32(int32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vmvnq_x(a, p);
+#else /* POLYMORPHIC */
+    return vmvnq_x_s32(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmvnq_x_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.mvn.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i1> [[TMP1]], <16 x i8> undef)
+// CHECK-NEXT:    ret <16 x i8> [[TMP2]]
+//
+uint8x16_t test_vmvnq_x_u8(uint8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vmvnq_x(a, p);
+#else /* POLYMORPHIC */
+    return vmvnq_x_u8(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmvnq_x_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.mvn.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i1> [[TMP1]], <8 x i16> undef)
+// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+//
+uint16x8_t test_vmvnq_x_u16(uint16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vmvnq_x(a, p);
+#else /* POLYMORPHIC */
+    return vmvnq_x_u16(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmvnq_x_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.mvn.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i1> [[TMP1]], <4 x i32> undef)
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+uint32x4_t test_vmvnq_x_u32(uint32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vmvnq_x(a, p);
+#else /* POLYMORPHIC */
+    return vmvnq_x_u32(a, p);
+#endif /* POLYMORPHIC */
+}
+
 // CHECK-LABEL: @test_vnegq_f16(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = fneg <8 x half> [[A:%.*]]
@@ -335,4 +527,427 @@ int32x4_t test_vqnegq_s32(int32x4_t a)
     return vqnegq_s32(a);
 #endif /* POLYMORPHIC */
 }
+#include <arm_mve.h>
+
+// CHECK-LABEL: @test_vnegq_m_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.neg.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <8 x half> [[TMP2]]
+//
+float16x8_t test_vnegq_m_f16(float16x8_t inactive, float16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vnegq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vnegq_m_f16(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vnegq_m_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.neg.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <4 x float> [[TMP2]]
+//
+float32x4_t test_vnegq_m_f32(float32x4_t inactive, float32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vnegq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vnegq_m_f32(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vnegq_m_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.neg.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <16 x i8> [[TMP2]]
+//
+int8x16_t test_vnegq_m_s8(int8x16_t inactive, int8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vnegq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vnegq_m_s8(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vnegq_m_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.neg.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+//
+int16x8_t test_vnegq_m_s16(int16x8_t inactive, int16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vnegq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vnegq_m_s16(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vnegq_m_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.neg.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+int32x4_t test_vnegq_m_s32(int32x4_t inactive, int32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vnegq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vnegq_m_s32(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vnegq_x_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.neg.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> undef)
+// CHECK-NEXT:    ret <8 x half> [[TMP2]]
+//
+float16x8_t test_vnegq_x_f16(float16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vnegq_x(a, p);
+#else /* POLYMORPHIC */
+    return vnegq_x_f16(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vnegq_x_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.neg.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> undef)
+// CHECK-NEXT:    ret <4 x float> [[TMP2]]
+//
+float32x4_t test_vnegq_x_f32(float32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vnegq_x(a, p);
+#else /* POLYMORPHIC */
+    return vnegq_x_f32(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vnegq_x_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.neg.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i1> [[TMP1]], <16 x i8> undef)
+// CHECK-NEXT:    ret <16 x i8> [[TMP2]]
+//
+int8x16_t test_vnegq_x_s8(int8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vnegq_x(a, p);
+#else /* POLYMORPHIC */
+    return vnegq_x_s8(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vnegq_x_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.neg.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i1> [[TMP1]], <8 x i16> undef)
+// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+//
+int16x8_t test_vnegq_x_s16(int16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vnegq_x(a, p);
+#else /* POLYMORPHIC */
+    return vnegq_x_s16(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vnegq_x_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.neg.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i1> [[TMP1]], <4 x i32> undef)
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+int32x4_t test_vnegq_x_s32(int32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vnegq_x(a, p);
+#else /* POLYMORPHIC */
+    return vnegq_x_s32(a, p);
+#endif /* POLYMORPHIC */
+}
+
+#include <arm_mve.h>
+
+// CHECK-LABEL: @test_vabsq_m_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.abs.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <8 x half> [[TMP2]]
+//
+float16x8_t test_vabsq_m_f16(float16x8_t inactive, float16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vabsq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vabsq_m_f16(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vabsq_m_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.abs.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <4 x float> [[TMP2]]
+//
+float32x4_t test_vabsq_m_f32(float32x4_t inactive, float32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vabsq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vabsq_m_f32(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vabsq_m_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.abs.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <16 x i8> [[TMP2]]
+//
+int8x16_t test_vabsq_m_s8(int8x16_t inactive, int8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vabsq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vabsq_m_s8(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vabsq_m_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.abs.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+//
+int16x8_t test_vabsq_m_s16(int16x8_t inactive, int16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vabsq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vabsq_m_s16(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vabsq_m_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.abs.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+int32x4_t test_vabsq_m_s32(int32x4_t inactive, int32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vabsq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vabsq_m_s32(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vabsq_x_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.abs.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> undef)
+// CHECK-NEXT:    ret <8 x half> [[TMP2]]
+//
+float16x8_t test_vabsq_x_f16(float16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vabsq_x(a, p);
+#else /* POLYMORPHIC */
+    return vabsq_x_f16(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vabsq_x_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.abs.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> undef)
+// CHECK-NEXT:    ret <4 x float> [[TMP2]]
+//
+float32x4_t test_vabsq_x_f32(float32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vabsq_x(a, p);
+#else /* POLYMORPHIC */
+    return vabsq_x_f32(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vabsq_x_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.abs.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i1> [[TMP1]], <16 x i8> undef)
+// CHECK-NEXT:    ret <16 x i8> [[TMP2]]
+//
+int8x16_t test_vabsq_x_s8(int8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vabsq_x(a, p);
+#else /* POLYMORPHIC */
+    return vabsq_x_s8(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vabsq_x_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.abs.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i1> [[TMP1]], <8 x i16> undef)
+// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+//
+int16x8_t test_vabsq_x_s16(int16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vabsq_x(a, p);
+#else /* POLYMORPHIC */
+    return vabsq_x_s16(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vabsq_x_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.abs.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i1> [[TMP1]], <4 x i32> undef)
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+int32x4_t test_vabsq_x_s32(int32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vabsq_x(a, p);
+#else /* POLYMORPHIC */
+    return vabsq_x_s32(a, p);
+#endif /* POLYMORPHIC */
+}
+
+#include <arm_mve.h>
+
+// CHECK-LABEL: @test_vqnegq_m_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.qneg.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <16 x i8> [[TMP2]]
+//
+int8x16_t test_vqnegq_m_s8(int8x16_t inactive, int8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vqnegq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vqnegq_m_s8(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vqnegq_m_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.qneg.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+//
+int16x8_t test_vqnegq_m_s16(int16x8_t inactive, int16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vqnegq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vqnegq_m_s16(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vqnegq_m_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.qneg.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+int32x4_t test_vqnegq_m_s32(int32x4_t inactive, int32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vqnegq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vqnegq_m_s32(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+#include <arm_mve.h>
+
+// CHECK-LABEL: @test_vqabsq_m_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.qabs.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <16 x i8> [[TMP2]]
+//
+int8x16_t test_vqabsq_m_s8(int8x16_t inactive, int8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vqabsq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vqabsq_m_s8(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vqabsq_m_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.qabs.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+//
+int16x8_t test_vqabsq_m_s16(int16x8_t inactive, int16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vqabsq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vqabsq_m_s16(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vqabsq_m_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.qabs.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+int32x4_t test_vqabsq_m_s32(int32x4_t inactive, int32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vqabsq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vqabsq_m_s32(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
 
diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vclz.c b/clang/test/CodeGen/arm-mve-intrinsics/vclz.c
index 7a2ebe0a627a..b39ac36eb340 100644
--- a/clang/test/CodeGen/arm-mve-intrinsics/vclz.c
+++ b/clang/test/CodeGen/arm-mve-intrinsics/vclz.c
@@ -130,3 +130,290 @@ int32x4_t test_vclsq_s32(int32x4_t a)
 #endif /* POLYMORPHIC */
 }
 
+// CHECK-LABEL: @test_vclsq_m_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.cls.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <16 x i8> [[TMP2]]
+//
+int8x16_t test_vclsq_m_s8(int8x16_t inactive, int8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vclsq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vclsq_m_s8(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vclsq_m_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.cls.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+//
+int16x8_t test_vclsq_m_s16(int16x8_t inactive, int16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vclsq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vclsq_m_s16(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vclsq_m_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.cls.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+int32x4_t test_vclsq_m_s32(int32x4_t inactive, int32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vclsq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vclsq_m_s32(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vclzq_m_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.clz.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <16 x i8> [[TMP2]]
+//
+int8x16_t test_vclzq_m_s8(int8x16_t inactive, int8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vclzq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vclzq_m_s8(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vclzq_m_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.clz.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+//
+int16x8_t test_vclzq_m_s16(int16x8_t inactive, int16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vclzq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vclzq_m_s16(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vclzq_m_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.clz.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+int32x4_t test_vclzq_m_s32(int32x4_t inactive, int32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vclzq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vclzq_m_s32(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vclzq_m_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.clz.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <16 x i8> [[TMP2]]
+//
+uint8x16_t test_vclzq_m_u8(uint8x16_t inactive, uint8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vclzq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vclzq_m_u8(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vclzq_m_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.clz.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+//
+uint16x8_t test_vclzq_m_u16(uint16x8_t inactive, uint16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vclzq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vclzq_m_u16(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vclzq_m_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.clz.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+uint32x4_t test_vclzq_m_u32(uint32x4_t inactive, uint32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vclzq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vclzq_m_u32(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vclsq_x_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.cls.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i1> [[TMP1]], <16 x i8> undef)
+// CHECK-NEXT:    ret <16 x i8> [[TMP2]]
+//
+int8x16_t test_vclsq_x_s8(int8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vclsq_x(a, p);
+#else /* POLYMORPHIC */
+    return vclsq_x_s8(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vclsq_x_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.cls.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i1> [[TMP1]], <8 x i16> undef)
+// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+//
+int16x8_t test_vclsq_x_s16(int16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vclsq_x(a, p);
+#else /* POLYMORPHIC */
+    return vclsq_x_s16(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vclsq_x_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.cls.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i1> [[TMP1]], <4 x i32> undef)
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+int32x4_t test_vclsq_x_s32(int32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vclsq_x(a, p);
+#else /* POLYMORPHIC */
+    return vclsq_x_s32(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vclzq_x_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.clz.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i1> [[TMP1]], <16 x i8> undef)
+// CHECK-NEXT:    ret <16 x i8> [[TMP2]]
+//
+int8x16_t test_vclzq_x_s8(int8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vclzq_x(a, p);
+#else /* POLYMORPHIC */
+    return vclzq_x_s8(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vclzq_x_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.clz.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i1> [[TMP1]], <8 x i16> undef)
+// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+//
+int16x8_t test_vclzq_x_s16(int16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vclzq_x(a, p);
+#else /* POLYMORPHIC */
+    return vclzq_x_s16(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vclzq_x_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.clz.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i1> [[TMP1]], <4 x i32> undef)
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+int32x4_t test_vclzq_x_s32(int32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vclzq_x(a, p);
+#else /* POLYMORPHIC */
+    return vclzq_x_s32(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vclzq_x_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.clz.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i1> [[TMP1]], <16 x i8> undef)
+// CHECK-NEXT:    ret <16 x i8> [[TMP2]]
+//
+uint8x16_t test_vclzq_x_u8(uint8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vclzq_x(a, p);
+#else /* POLYMORPHIC */
+    return vclzq_x_u8(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vclzq_x_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.clz.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i1> [[TMP1]], <8 x i16> undef)
+// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+//
+uint16x8_t test_vclzq_x_u16(uint16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vclzq_x(a, p);
+#else /* POLYMORPHIC */
+    return vclzq_x_u16(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vclzq_x_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.clz.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i1> [[TMP1]], <4 x i32> undef)
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+uint32x4_t test_vclzq_x_u32(uint32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vclzq_x(a, p);
+#else /* POLYMORPHIC */
+    return vclzq_x_u32(a, p);
+#endif /* POLYMORPHIC */
+}
diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vcvt.c b/clang/test/CodeGen/arm-mve-intrinsics/vcvt.c
index 3220100d7b89..0391b77e365f 100644
--- a/clang/test/CodeGen/arm-mve-intrinsics/vcvt.c
+++ b/clang/test/CodeGen/arm-mve-intrinsics/vcvt.c
@@ -100,6 +100,246 @@ uint32x4_t test_vcvtq_u32_f32(float32x4_t a)
     return vcvtq_u32_f32(a);
 }
 
+// CHECK-LABEL: @test_vcvtq_m_f16_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vcvt.fp.int.predicated.v8f16.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <8 x half> [[TMP2]]
+//
+float16x8_t test_vcvtq_m_f16_s16(float16x8_t inactive, int16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vcvtq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vcvtq_m_f16_s16(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vcvtq_m_f16_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vcvt.fp.int.predicated.v8f16.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 1, <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <8 x half> [[TMP2]]
+//
+float16x8_t test_vcvtq_m_f16_u16(float16x8_t inactive, uint16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vcvtq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vcvtq_m_f16_u16(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vcvtq_m_f32_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vcvt.fp.int.predicated.v4f32.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <4 x float> [[TMP2]]
+//
+float32x4_t test_vcvtq_m_f32_s32(float32x4_t inactive, int32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vcvtq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vcvtq_m_f32_s32(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vcvtq_m_f32_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vcvt.fp.int.predicated.v4f32.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 1, <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <4 x float> [[TMP2]]
+//
+float32x4_t test_vcvtq_m_f32_u32(float32x4_t inactive, uint32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vcvtq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vcvtq_m_f32_u32(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vcvtq_m_s16_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vcvt.fp.int.predicated.v8i16.v8f16.v8i1(<8 x half> [[A:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+//
+int16x8_t test_vcvtq_m_s16_f16(int16x8_t inactive, float16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vcvtq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vcvtq_m_s16_f16(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vcvtq_m_s32_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vcvt.fp.int.predicated.v4i32.v4f32.v4i1(<4 x float> [[A:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+int32x4_t test_vcvtq_m_s32_f32(int32x4_t inactive, float32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vcvtq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vcvtq_m_s32_f32(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vcvtq_m_u16_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vcvt.fp.int.predicated.v8i16.v8f16.v8i1(<8 x half> [[A:%.*]], i32 1, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+//
+uint16x8_t test_vcvtq_m_u16_f16(uint16x8_t inactive, float16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vcvtq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vcvtq_m_u16_f16(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vcvtq_m_u32_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vcvt.fp.int.predicated.v4i32.v4f32.v4i1(<4 x float> [[A:%.*]], i32 1, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+uint32x4_t test_vcvtq_m_u32_f32(uint32x4_t inactive, float32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vcvtq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vcvtq_m_u32_f32(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vcvtq_x_f16_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vcvt.fp.int.predicated.v8f16.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x half> undef)
+// CHECK-NEXT:    ret <8 x half> [[TMP2]]
+//
+float16x8_t test_vcvtq_x_f16_s16(int16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vcvtq_x(a, p);
+#else /* POLYMORPHIC */
+    return vcvtq_x_f16_s16(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vcvtq_x_f16_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vcvt.fp.int.predicated.v8f16.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 1, <8 x i1> [[TMP1]], <8 x half> undef)
+// CHECK-NEXT:    ret <8 x half> [[TMP2]]
+//
+float16x8_t test_vcvtq_x_f16_u16(uint16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vcvtq_x(a, p);
+#else /* POLYMORPHIC */
+    return vcvtq_x_f16_u16(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vcvtq_x_f32_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vcvt.fp.int.predicated.v4f32.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x float> undef)
+// CHECK-NEXT:    ret <4 x float> [[TMP2]]
+//
+float32x4_t test_vcvtq_x_f32_s32(int32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vcvtq_x(a, p);
+#else /* POLYMORPHIC */
+    return vcvtq_x_f32_s32(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vcvtq_x_f32_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vcvt.fp.int.predicated.v4f32.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 1, <4 x i1> [[TMP1]], <4 x float> undef)
+// CHECK-NEXT:    ret <4 x float> [[TMP2]]
+//
+float32x4_t test_vcvtq_x_f32_u32(uint32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vcvtq_x(a, p);
+#else /* POLYMORPHIC */
+    return vcvtq_x_f32_u32(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vcvtq_x_s16_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vcvt.fp.int.predicated.v8i16.v8f16.v8i1(<8 x half> [[A:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x i16> undef)
+// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+//
+int16x8_t test_vcvtq_x_s16_f16(float16x8_t a, mve_pred16_t p)
+{
+    return vcvtq_x_s16_f16(a, p);
+}
+
+// CHECK-LABEL: @test_vcvtq_x_s32_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vcvt.fp.int.predicated.v4i32.v4f32.v4i1(<4 x float> [[A:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x i32> undef)
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+int32x4_t test_vcvtq_x_s32_f32(float32x4_t a, mve_pred16_t p)
+{
+    return vcvtq_x_s32_f32(a, p);
+}
+
+// CHECK-LABEL: @test_vcvtq_x_u16_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vcvt.fp.int.predicated.v8i16.v8f16.v8i1(<8 x half> [[A:%.*]], i32 1, <8 x i1> [[TMP1]], <8 x i16> undef)
+// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+//
+uint16x8_t test_vcvtq_x_u16_f16(float16x8_t a, mve_pred16_t p)
+{
+    return vcvtq_x_u16_f16(a, p);
+}
+
+// CHECK-LABEL: @test_vcvtq_x_u32_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vcvt.fp.int.predicated.v4i32.v4f32.v4i1(<4 x float> [[A:%.*]], i32 1, <4 x i1> [[TMP1]], <4 x i32> undef)
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+uint32x4_t test_vcvtq_x_u32_f32(float32x4_t a, mve_pred16_t p)
+{
+    return vcvtq_x_u32_f32(a, p);
+}
+
 // CHECK-LABEL: @test_vcvttq_f16_f32(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = call <8 x half> @llvm.arm.mve.vcvt.narrow(<8 x half> [[A:%.*]], <4 x float> [[B:%.*]], i32 1)
diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vmovl.c b/clang/test/CodeGen/arm-mve-intrinsics/vmovl.c
index 0b8ef596faed..e66e67c49976 100644
--- a/clang/test/CodeGen/arm-mve-intrinsics/vmovl.c
+++ b/clang/test/CodeGen/arm-mve-intrinsics/vmovl.c
@@ -124,3 +124,259 @@ uint32x4_t test_vmovltq_u16(uint16x8_t a)
 #endif /* POLYMORPHIC */
 }
 
+// CHECK-LABEL: @test_vmovlbq_m_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vmovl.predicated.v8i16.v16i8.v8i1(<16 x i8> [[A:%.*]], i32 0, i32 0, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+//
+int16x8_t test_vmovlbq_m_s8(int16x8_t inactive, int8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vmovlbq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vmovlbq_m_s8(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmovlbq_m_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vmovl.predicated.v4i32.v8i16.v4i1(<8 x i16> [[A:%.*]], i32 0, i32 0, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+int32x4_t test_vmovlbq_m_s16(int32x4_t inactive, int16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vmovlbq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vmovlbq_m_s16(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmovlbq_m_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vmovl.predicated.v8i16.v16i8.v8i1(<16 x i8> [[A:%.*]], i32 1, i32 0, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+//
+uint16x8_t test_vmovlbq_m_u8(uint16x8_t inactive, uint8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vmovlbq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vmovlbq_m_u8(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmovlbq_m_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vmovl.predicated.v4i32.v8i16.v4i1(<8 x i16> [[A:%.*]], i32 1, i32 0, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+uint32x4_t test_vmovlbq_m_u16(uint32x4_t inactive, uint16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vmovlbq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vmovlbq_m_u16(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmovltq_m_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vmovl.predicated.v8i16.v16i8.v8i1(<16 x i8> [[A:%.*]], i32 0, i32 1, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+//
+int16x8_t test_vmovltq_m_s8(int16x8_t inactive, int8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vmovltq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vmovltq_m_s8(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmovltq_m_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vmovl.predicated.v4i32.v8i16.v4i1(<8 x i16> [[A:%.*]], i32 0, i32 1, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+int32x4_t test_vmovltq_m_s16(int32x4_t inactive, int16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vmovltq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vmovltq_m_s16(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmovltq_m_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vmovl.predicated.v8i16.v16i8.v8i1(<16 x i8> [[A:%.*]], i32 1, i32 1, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+//
+uint16x8_t test_vmovltq_m_u8(uint16x8_t inactive, uint8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vmovltq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vmovltq_m_u8(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmovltq_m_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vmovl.predicated.v4i32.v8i16.v4i1(<8 x i16> [[A:%.*]], i32 1, i32 1, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+uint32x4_t test_vmovltq_m_u16(uint32x4_t inactive, uint16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vmovltq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vmovltq_m_u16(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmovlbq_x_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vmovl.predicated.v8i16.v16i8.v8i1(<16 x i8> [[A:%.*]], i32 0, i32 0, <8 x i1> [[TMP1]], <8 x i16> undef)
+// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+//
+int16x8_t test_vmovlbq_x_s8(int8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vmovlbq_x(a, p);
+#else /* POLYMORPHIC */
+    return vmovlbq_x_s8(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmovlbq_x_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vmovl.predicated.v4i32.v8i16.v4i1(<8 x i16> [[A:%.*]], i32 0, i32 0, <4 x i1> [[TMP1]], <4 x i32> undef)
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+int32x4_t test_vmovlbq_x_s16(int16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vmovlbq_x(a, p);
+#else /* POLYMORPHIC */
+    return vmovlbq_x_s16(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmovlbq_x_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vmovl.predicated.v8i16.v16i8.v8i1(<16 x i8> [[A:%.*]], i32 1, i32 0, <8 x i1> [[TMP1]], <8 x i16> undef)
+// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+//
+uint16x8_t test_vmovlbq_x_u8(uint8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vmovlbq_x(a, p);
+#else /* POLYMORPHIC */
+    return vmovlbq_x_u8(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmovlbq_x_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vmovl.predicated.v4i32.v8i16.v4i1(<8 x i16> [[A:%.*]], i32 1, i32 0, <4 x i1> [[TMP1]], <4 x i32> undef)
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+uint32x4_t test_vmovlbq_x_u16(uint16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vmovlbq_x(a, p);
+#else /* POLYMORPHIC */
+    return vmovlbq_x_u16(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmovltq_x_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vmovl.predicated.v8i16.v16i8.v8i1(<16 x i8> [[A:%.*]], i32 0, i32 1, <8 x i1> [[TMP1]], <8 x i16> undef)
+// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+//
+int16x8_t test_vmovltq_x_s8(int8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vmovltq_x(a, p);
+#else /* POLYMORPHIC */
+    return vmovltq_x_s8(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmovltq_x_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vmovl.predicated.v4i32.v8i16.v4i1(<8 x i16> [[A:%.*]], i32 0, i32 1, <4 x i1> [[TMP1]], <4 x i32> undef)
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+int32x4_t test_vmovltq_x_s16(int16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vmovltq_x(a, p);
+#else /* POLYMORPHIC */
+    return vmovltq_x_s16(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmovltq_x_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vmovl.predicated.v8i16.v16i8.v8i1(<16 x i8> [[A:%.*]], i32 1, i32 1, <8 x i1> [[TMP1]], <8 x i16> undef)
+// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+//
+uint16x8_t test_vmovltq_x_u8(uint8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vmovltq_x(a, p);
+#else /* POLYMORPHIC */
+    return vmovltq_x_u8(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmovltq_x_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vmovl.predicated.v4i32.v8i16.v4i1(<8 x i16> [[A:%.*]], i32 1, i32 1, <4 x i1> [[TMP1]], <4 x i32> undef)
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+uint32x4_t test_vmovltq_x_u16(uint16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vmovltq_x(a, p);
+#else /* POLYMORPHIC */
+    return vmovltq_x_u16(a, p);
+#endif /* POLYMORPHIC */
+}
+
diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vmovn.c b/clang/test/CodeGen/arm-mve-intrinsics/vmovn.c
index 5d157de0feb8..ed414b52ade8 100644
--- a/clang/test/CodeGen/arm-mve-intrinsics/vmovn.c
+++ b/clang/test/CodeGen/arm-mve-intrinsics/vmovn.c
@@ -197,3 +197,187 @@ uint16x8_t test_vmovntq_u32(uint16x8_t a, uint32x4_t b)
     return vmovntq_u32(a, b);
 #endif /* POLYMORPHIC */
 }
+
+// LE-LABEL: @test_vmovnbq_m_s16(
+// LE-NEXT:  entry:
+// LE-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// LE-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// LE-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vmovn.predicated.v16i8.v8i16.v8i1(<16 x i8> [[A:%.*]], <8 x i16> [[B:%.*]], i32 0, <8 x i1> [[TMP1]])
+// LE-NEXT:    ret <16 x i8> [[TMP2]]
+//
+// BE-LABEL: @test_vmovnbq_m_s16(
+// BE-NEXT:  entry:
+// BE-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// BE-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// BE-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vmovn.predicated.v16i8.v8i16.v8i1(<16 x i8> [[A:%.*]], <8 x i16> [[B:%.*]], i32 0, <8 x i1> [[TMP1]])
+// BE-NEXT:    ret <16 x i8> [[TMP2]]
+//
+int8x16_t test_vmovnbq_m_s16(int8x16_t a, int16x8_t b, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vmovnbq_m(a, b, p);
+#else /* POLYMORPHIC */
+    return vmovnbq_m_s16(a, b, p);
+#endif /* POLYMORPHIC */
+}
+
+// LE-LABEL: @test_vmovnbq_m_s32(
+// LE-NEXT:  entry:
+// LE-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// LE-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// LE-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vmovn.predicated.v8i16.v4i32.v4i1(<8 x i16> [[A:%.*]], <4 x i32> [[B:%.*]], i32 0, <4 x i1> [[TMP1]])
+// LE-NEXT:    ret <8 x i16> [[TMP2]]
+//
+// BE-LABEL: @test_vmovnbq_m_s32(
+// BE-NEXT:  entry:
+// BE-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// BE-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// BE-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vmovn.predicated.v8i16.v4i32.v4i1(<8 x i16> [[A:%.*]], <4 x i32> [[B:%.*]], i32 0, <4 x i1> [[TMP1]])
+// BE-NEXT:    ret <8 x i16> [[TMP2]]
+//
+int16x8_t test_vmovnbq_m_s32(int16x8_t a, int32x4_t b, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vmovnbq_m(a, b, p);
+#else /* POLYMORPHIC */
+    return vmovnbq_m_s32(a, b, p);
+#endif /* POLYMORPHIC */
+}
+
+// LE-LABEL: @test_vmovnbq_m_u16(
+// LE-NEXT:  entry:
+// LE-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// LE-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// LE-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vmovn.predicated.v16i8.v8i16.v8i1(<16 x i8> [[A:%.*]], <8 x i16> [[B:%.*]], i32 0, <8 x i1> [[TMP1]])
+// LE-NEXT:    ret <16 x i8> [[TMP2]]
+//
+// BE-LABEL: @test_vmovnbq_m_u16(
+// BE-NEXT:  entry:
+// BE-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// BE-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// BE-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vmovn.predicated.v16i8.v8i16.v8i1(<16 x i8> [[A:%.*]], <8 x i16> [[B:%.*]], i32 0, <8 x i1> [[TMP1]])
+// BE-NEXT:    ret <16 x i8> [[TMP2]]
+//
+uint8x16_t test_vmovnbq_m_u16(uint8x16_t a, uint16x8_t b, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vmovnbq_m(a, b, p);
+#else /* POLYMORPHIC */
+    return vmovnbq_m_u16(a, b, p);
+#endif /* POLYMORPHIC */
+}
+
+// LE-LABEL: @test_vmovnbq_m_u32(
+// LE-NEXT:  entry:
+// LE-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// LE-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// LE-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vmovn.predicated.v8i16.v4i32.v4i1(<8 x i16> [[A:%.*]], <4 x i32> [[B:%.*]], i32 0, <4 x i1> [[TMP1]])
+// LE-NEXT:    ret <8 x i16> [[TMP2]]
+//
+// BE-LABEL: @test_vmovnbq_m_u32(
+// BE-NEXT:  entry:
+// BE-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// BE-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// BE-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vmovn.predicated.v8i16.v4i32.v4i1(<8 x i16> [[A:%.*]], <4 x i32> [[B:%.*]], i32 0, <4 x i1> [[TMP1]])
+// BE-NEXT:    ret <8 x i16> [[TMP2]]
+//
+uint16x8_t test_vmovnbq_m_u32(uint16x8_t a, uint32x4_t b, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vmovnbq_m(a, b, p);
+#else /* POLYMORPHIC */
+    return vmovnbq_m_u32(a, b, p);
+#endif /* POLYMORPHIC */
+}
+
+// LE-LABEL: @test_vmovntq_m_s16(
+// LE-NEXT:  entry:
+// LE-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// LE-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// LE-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vmovn.predicated.v16i8.v8i16.v8i1(<16 x i8> [[A:%.*]], <8 x i16> [[B:%.*]], i32 1, <8 x i1> [[TMP1]])
+// LE-NEXT:    ret <16 x i8> [[TMP2]]
+//
+// BE-LABEL: @test_vmovntq_m_s16(
+// BE-NEXT:  entry:
+// BE-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// BE-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// BE-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vmovn.predicated.v16i8.v8i16.v8i1(<16 x i8> [[A:%.*]], <8 x i16> [[B:%.*]], i32 1, <8 x i1> [[TMP1]])
+// BE-NEXT:    ret <16 x i8> [[TMP2]]
+//
+int8x16_t test_vmovntq_m_s16(int8x16_t a, int16x8_t b, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vmovntq_m(a, b, p);
+#else /* POLYMORPHIC */
+    return vmovntq_m_s16(a, b, p);
+#endif /* POLYMORPHIC */
+}
+
+// LE-LABEL: @test_vmovntq_m_s32(
+// LE-NEXT:  entry:
+// LE-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// LE-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// LE-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vmovn.predicated.v8i16.v4i32.v4i1(<8 x i16> [[A:%.*]], <4 x i32> [[B:%.*]], i32 1, <4 x i1> [[TMP1]])
+// LE-NEXT:    ret <8 x i16> [[TMP2]]
+//
+// BE-LABEL: @test_vmovntq_m_s32(
+// BE-NEXT:  entry:
+// BE-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// BE-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// BE-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vmovn.predicated.v8i16.v4i32.v4i1(<8 x i16> [[A:%.*]], <4 x i32> [[B:%.*]], i32 1, <4 x i1> [[TMP1]])
+// BE-NEXT:    ret <8 x i16> [[TMP2]]
+//
+int16x8_t test_vmovntq_m_s32(int16x8_t a, int32x4_t b, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vmovntq_m(a, b, p);
+#else /* POLYMORPHIC */
+    return vmovntq_m_s32(a, b, p);
+#endif /* POLYMORPHIC */
+}
+
+// LE-LABEL: @test_vmovntq_m_u16(
+// LE-NEXT:  entry:
+// LE-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// LE-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// LE-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vmovn.predicated.v16i8.v8i16.v8i1(<16 x i8> [[A:%.*]], <8 x i16> [[B:%.*]], i32 1, <8 x i1> [[TMP1]])
+// LE-NEXT:    ret <16 x i8> [[TMP2]]
+//
+// BE-LABEL: @test_vmovntq_m_u16(
+// BE-NEXT:  entry:
+// BE-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// BE-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// BE-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vmovn.predicated.v16i8.v8i16.v8i1(<16 x i8> [[A:%.*]], <8 x i16> [[B:%.*]], i32 1, <8 x i1> [[TMP1]])
+// BE-NEXT:    ret <16 x i8> [[TMP2]]
+//
+uint8x16_t test_vmovntq_m_u16(uint8x16_t a, uint16x8_t b, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vmovntq_m(a, b, p);
+#else /* POLYMORPHIC */
+    return vmovntq_m_u16(a, b, p);
+#endif /* POLYMORPHIC */
+}
+
+// LE-LABEL: @test_vmovntq_m_u32(
+// LE-NEXT:  entry:
+// LE-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// LE-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// LE-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vmovn.predicated.v8i16.v4i32.v4i1(<8 x i16> [[A:%.*]], <4 x i32> [[B:%.*]], i32 1, <4 x i1> [[TMP1]])
+// LE-NEXT:    ret <8 x i16> [[TMP2]]
+//
+// BE-LABEL: @test_vmovntq_m_u32(
+// BE-NEXT:  entry:
+// BE-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// BE-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// BE-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vmovn.predicated.v8i16.v4i32.v4i1(<8 x i16> [[A:%.*]], <4 x i32> [[B:%.*]], i32 1, <4 x i1> [[TMP1]])
+// BE-NEXT:    ret <8 x i16> [[TMP2]]
+//
+uint16x8_t test_vmovntq_m_u32(uint16x8_t a, uint32x4_t b, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vmovntq_m(a, b, p);
+#else /* POLYMORPHIC */
+    return vmovntq_m_u32(a, b, p);
+#endif /* POLYMORPHIC */
+}
diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vrev.c b/clang/test/CodeGen/arm-mve-intrinsics/vrev.c
index 384d736d2a6d..73675cc005b2 100644
--- a/clang/test/CodeGen/arm-mve-intrinsics/vrev.c
+++ b/clang/test/CodeGen/arm-mve-intrinsics/vrev.c
@@ -213,3 +213,483 @@ uint32x4_t test_vrev64q_u32(uint32x4_t a)
     return vrev64q_u32(a);
 #endif /* POLYMORPHIC */
 }
+
+// CHECK-LABEL: @test_vrev16q_m_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vrev.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 16, <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <16 x i8> [[TMP2]]
+//
+int8x16_t test_vrev16q_m_s8(int8x16_t inactive, int8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrev16q_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vrev16q_m_s8(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrev16q_m_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vrev.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 16, <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <16 x i8> [[TMP2]]
+//
+uint8x16_t test_vrev16q_m_u8(uint8x16_t inactive, uint8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrev16q_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vrev16q_m_u8(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrev32q_m_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrev.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], i32 32, <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <8 x half> [[TMP2]]
+//
+float16x8_t test_vrev32q_m_f16(float16x8_t inactive, float16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrev32q_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vrev32q_m_f16(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrev32q_m_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vrev.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 32, <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <16 x i8> [[TMP2]]
+//
+int8x16_t test_vrev32q_m_s8(int8x16_t inactive, int8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrev32q_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vrev32q_m_s8(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrev32q_m_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vrev.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 32, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+//
+int16x8_t test_vrev32q_m_s16(int16x8_t inactive, int16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrev32q_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vrev32q_m_s16(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrev32q_m_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vrev.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 32, <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <16 x i8> [[TMP2]]
+//
+uint8x16_t test_vrev32q_m_u8(uint8x16_t inactive, uint8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrev32q_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vrev32q_m_u8(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrev32q_m_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vrev.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 32, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+//
+uint16x8_t test_vrev32q_m_u16(uint16x8_t inactive, uint16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrev32q_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vrev32q_m_u16(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrev64q_m_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrev.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], i32 64, <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <8 x half> [[TMP2]]
+//
+float16x8_t test_vrev64q_m_f16(float16x8_t inactive, float16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrev64q_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vrev64q_m_f16(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrev64q_m_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vrev.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], i32 64, <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <4 x float> [[TMP2]]
+//
+float32x4_t test_vrev64q_m_f32(float32x4_t inactive, float32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrev64q_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vrev64q_m_f32(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrev64q_m_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vrev.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 64, <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <16 x i8> [[TMP2]]
+//
+int8x16_t test_vrev64q_m_s8(int8x16_t inactive, int8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrev64q_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vrev64q_m_s8(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrev64q_m_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vrev.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 64, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+//
+int16x8_t test_vrev64q_m_s16(int16x8_t inactive, int16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrev64q_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vrev64q_m_s16(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrev64q_m_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vrev.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 64, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+int32x4_t test_vrev64q_m_s32(int32x4_t inactive, int32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrev64q_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vrev64q_m_s32(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrev64q_m_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vrev.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 64, <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <16 x i8> [[TMP2]]
+//
+uint8x16_t test_vrev64q_m_u8(uint8x16_t inactive, uint8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrev64q_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vrev64q_m_u8(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrev64q_m_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vrev.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 64, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+//
+uint16x8_t test_vrev64q_m_u16(uint16x8_t inactive, uint16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrev64q_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vrev64q_m_u16(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrev64q_m_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vrev.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 64, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+uint32x4_t test_vrev64q_m_u32(uint32x4_t inactive, uint32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrev64q_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vrev64q_m_u32(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrev16q_x_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vrev.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 16, <16 x i1> [[TMP1]], <16 x i8> undef)
+// CHECK-NEXT:    ret <16 x i8> [[TMP2]]
+//
+int8x16_t test_vrev16q_x_s8(int8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrev16q_x(a, p);
+#else /* POLYMORPHIC */
+    return vrev16q_x_s8(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrev16q_x_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vrev.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 16, <16 x i1> [[TMP1]], <16 x i8> undef)
+// CHECK-NEXT:    ret <16 x i8> [[TMP2]]
+//
+uint8x16_t test_vrev16q_x_u8(uint8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrev16q_x(a, p);
+#else /* POLYMORPHIC */
+    return vrev16q_x_u8(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrev32q_x_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrev.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], i32 32, <8 x i1> [[TMP1]], <8 x half> undef)
+// CHECK-NEXT:    ret <8 x half> [[TMP2]]
+//
+float16x8_t test_vrev32q_x_f16(float16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrev32q_x(a, p);
+#else /* POLYMORPHIC */
+    return vrev32q_x_f16(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrev32q_x_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vrev.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 32, <16 x i1> [[TMP1]], <16 x i8> undef)
+// CHECK-NEXT:    ret <16 x i8> [[TMP2]]
+//
+int8x16_t test_vrev32q_x_s8(int8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrev32q_x(a, p);
+#else /* POLYMORPHIC */
+    return vrev32q_x_s8(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrev32q_x_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vrev.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 32, <8 x i1> [[TMP1]], <8 x i16> undef)
+// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+//
+int16x8_t test_vrev32q_x_s16(int16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrev32q_x(a, p);
+#else /* POLYMORPHIC */
+    return vrev32q_x_s16(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrev32q_x_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vrev.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 32, <16 x i1> [[TMP1]], <16 x i8> undef)
+// CHECK-NEXT:    ret <16 x i8> [[TMP2]]
+//
+uint8x16_t test_vrev32q_x_u8(uint8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrev32q_x(a, p);
+#else /* POLYMORPHIC */
+    return vrev32q_x_u8(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrev32q_x_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vrev.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 32, <8 x i1> [[TMP1]], <8 x i16> undef)
+// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+//
+uint16x8_t test_vrev32q_x_u16(uint16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrev32q_x(a, p);
+#else /* POLYMORPHIC */
+    return vrev32q_x_u16(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrev64q_x_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrev.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], i32 64, <8 x i1> [[TMP1]], <8 x half> undef)
+// CHECK-NEXT:    ret <8 x half> [[TMP2]]
+//
+float16x8_t test_vrev64q_x_f16(float16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrev64q_x(a, p);
+#else /* POLYMORPHIC */
+    return vrev64q_x_f16(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrev64q_x_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vrev.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], i32 64, <4 x i1> [[TMP1]], <4 x float> undef)
+// CHECK-NEXT:    ret <4 x float> [[TMP2]]
+//
+float32x4_t test_vrev64q_x_f32(float32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrev64q_x(a, p);
+#else /* POLYMORPHIC */
+    return vrev64q_x_f32(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrev64q_x_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vrev.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 64, <16 x i1> [[TMP1]], <16 x i8> undef)
+// CHECK-NEXT:    ret <16 x i8> [[TMP2]]
+//
+int8x16_t test_vrev64q_x_s8(int8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrev64q_x(a, p);
+#else /* POLYMORPHIC */
+    return vrev64q_x_s8(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrev64q_x_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vrev.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 64, <8 x i1> [[TMP1]], <8 x i16> undef)
+// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+//
+int16x8_t test_vrev64q_x_s16(int16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrev64q_x(a, p);
+#else /* POLYMORPHIC */
+    return vrev64q_x_s16(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrev64q_x_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vrev.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 64, <4 x i1> [[TMP1]], <4 x i32> undef)
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+int32x4_t test_vrev64q_x_s32(int32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrev64q_x(a, p);
+#else /* POLYMORPHIC */
+    return vrev64q_x_s32(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrev64q_x_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vrev.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 64, <16 x i1> [[TMP1]], <16 x i8> undef)
+// CHECK-NEXT:    ret <16 x i8> [[TMP2]]
+//
+uint8x16_t test_vrev64q_x_u8(uint8x16_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrev64q_x(a, p);
+#else /* POLYMORPHIC */
+    return vrev64q_x_u8(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrev64q_x_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vrev.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 64, <8 x i1> [[TMP1]], <8 x i16> undef)
+// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+//
+uint16x8_t test_vrev64q_x_u16(uint16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrev64q_x(a, p);
+#else /* POLYMORPHIC */
+    return vrev64q_x_u16(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrev64q_x_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vrev.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 64, <4 x i1> [[TMP1]], <4 x i32> undef)
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+uint32x4_t test_vrev64q_x_u32(uint32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrev64q_x(a, p);
+#else /* POLYMORPHIC */
+    return vrev64q_x_u32(a, p);
+#endif /* POLYMORPHIC */
+}
diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vrnd.c b/clang/test/CodeGen/arm-mve-intrinsics/vrnd.c
index a324c36ed838..8ad5f48b5856 100644
--- a/clang/test/CodeGen/arm-mve-intrinsics/vrnd.c
+++ b/clang/test/CodeGen/arm-mve-intrinsics/vrnd.c
@@ -171,3 +171,388 @@ float32x4_t test_vrndnq_f32(float32x4_t a)
     return vrndnq_f32(a);
 #endif /* POLYMORPHIC */
 }
+
+// CHECK-LABEL: @test_vrndaq_m_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrinta.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <8 x half> [[TMP2]]
+//
+float16x8_t test_vrndaq_m_f16(float16x8_t inactive, float16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrndaq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vrndaq_m_f16(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrndaq_m_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vrinta.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <4 x float> [[TMP2]]
+//
+float32x4_t test_vrndaq_m_f32(float32x4_t inactive, float32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrndaq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vrndaq_m_f32(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrndmq_m_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrintm.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <8 x half> [[TMP2]]
+//
+float16x8_t test_vrndmq_m_f16(float16x8_t inactive, float16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrndmq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vrndmq_m_f16(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrndmq_m_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vrintm.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <4 x float> [[TMP2]]
+//
+float32x4_t test_vrndmq_m_f32(float32x4_t inactive, float32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrndmq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vrndmq_m_f32(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrndnq_m_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrintn.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <8 x half> [[TMP2]]
+//
+float16x8_t test_vrndnq_m_f16(float16x8_t inactive, float16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrndnq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vrndnq_m_f16(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrndnq_m_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vrintn.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <4 x float> [[TMP2]]
+//
+float32x4_t test_vrndnq_m_f32(float32x4_t inactive, float32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrndnq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vrndnq_m_f32(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrndpq_m_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrintp.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <8 x half> [[TMP2]]
+//
+float16x8_t test_vrndpq_m_f16(float16x8_t inactive, float16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrndpq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vrndpq_m_f16(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrndpq_m_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vrintp.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <4 x float> [[TMP2]]
+//
+float32x4_t test_vrndpq_m_f32(float32x4_t inactive, float32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrndpq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vrndpq_m_f32(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrndq_m_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrintz.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <8 x half> [[TMP2]]
+//
+float16x8_t test_vrndq_m_f16(float16x8_t inactive, float16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrndq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vrndq_m_f16(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrndq_m_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vrintz.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <4 x float> [[TMP2]]
+//
+float32x4_t test_vrndq_m_f32(float32x4_t inactive, float32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrndq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vrndq_m_f32(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrndxq_m_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrintx.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <8 x half> [[TMP2]]
+//
+float16x8_t test_vrndxq_m_f16(float16x8_t inactive, float16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrndxq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vrndxq_m_f16(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrndxq_m_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vrintx.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]])
+// CHECK-NEXT:    ret <4 x float> [[TMP2]]
+//
+float32x4_t test_vrndxq_m_f32(float32x4_t inactive, float32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrndxq_m(inactive, a, p);
+#else /* POLYMORPHIC */
+    return vrndxq_m_f32(inactive, a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrndaq_x_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrinta.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> undef)
+// CHECK-NEXT:    ret <8 x half> [[TMP2]]
+//
+float16x8_t test_vrndaq_x_f16(float16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrndaq_x(a, p);
+#else /* POLYMORPHIC */
+    return vrndaq_x_f16(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrndaq_x_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vrinta.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> undef)
+// CHECK-NEXT:    ret <4 x float> [[TMP2]]
+//
+float32x4_t test_vrndaq_x_f32(float32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrndaq_x(a, p);
+#else /* POLYMORPHIC */
+    return vrndaq_x_f32(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrndmq_x_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrintm.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> undef)
+// CHECK-NEXT:    ret <8 x half> [[TMP2]]
+//
+float16x8_t test_vrndmq_x_f16(float16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrndmq_x(a, p);
+#else /* POLYMORPHIC */
+    return vrndmq_x_f16(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrndmq_x_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vrintm.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> undef)
+// CHECK-NEXT:    ret <4 x float> [[TMP2]]
+//
+float32x4_t test_vrndmq_x_f32(float32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrndmq_x(a, p);
+#else /* POLYMORPHIC */
+    return vrndmq_x_f32(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrndnq_x_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrintn.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> undef)
+// CHECK-NEXT:    ret <8 x half> [[TMP2]]
+//
+float16x8_t test_vrndnq_x_f16(float16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrndnq_x(a, p);
+#else /* POLYMORPHIC */
+    return vrndnq_x_f16(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrndnq_x_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vrintn.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> undef)
+// CHECK-NEXT:    ret <4 x float> [[TMP2]]
+//
+float32x4_t test_vrndnq_x_f32(float32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrndnq_x(a, p);
+#else /* POLYMORPHIC */
+    return vrndnq_x_f32(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrndpq_x_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrintp.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> undef)
+// CHECK-NEXT:    ret <8 x half> [[TMP2]]
+//
+float16x8_t test_vrndpq_x_f16(float16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrndpq_x(a, p);
+#else /* POLYMORPHIC */
+    return vrndpq_x_f16(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrndpq_x_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vrintp.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> undef)
+// CHECK-NEXT:    ret <4 x float> [[TMP2]]
+//
+float32x4_t test_vrndpq_x_f32(float32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrndpq_x(a, p);
+#else /* POLYMORPHIC */
+    return vrndpq_x_f32(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrndq_x_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrintz.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> undef)
+// CHECK-NEXT:    ret <8 x half> [[TMP2]]
+//
+float16x8_t test_vrndq_x_f16(float16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrndq_x(a, p);
+#else /* POLYMORPHIC */
+    return vrndq_x_f16(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrndq_x_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vrintz.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> undef)
+// CHECK-NEXT:    ret <4 x float> [[TMP2]]
+//
+float32x4_t test_vrndq_x_f32(float32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrndq_x(a, p);
+#else /* POLYMORPHIC */
+    return vrndq_x_f32(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrndxq_x_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrintx.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> undef)
+// CHECK-NEXT:    ret <8 x half> [[TMP2]]
+//
+float16x8_t test_vrndxq_x_f16(float16x8_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrndxq_x(a, p);
+#else /* POLYMORPHIC */
+    return vrndxq_x_f16(a, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrndxq_x_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vrintx.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> undef)
+// CHECK-NEXT:    ret <4 x float> [[TMP2]]
+//
+float32x4_t test_vrndxq_x_f32(float32x4_t a, mve_pred16_t p)
+{
+#ifdef POLYMORPHIC
+    return vrndxq_x(a, p);
+#else /* POLYMORPHIC */
+    return vrndxq_x_f32(a, p);
+#endif /* POLYMORPHIC */
+}
+
diff --git a/llvm/include/llvm/IR/IntrinsicsARM.td b/llvm/include/llvm/IR/IntrinsicsARM.td
index 68af4ae82579..4e939fb4bc3a 100644
--- a/llvm/include/llvm/IR/IntrinsicsARM.td
+++ b/llvm/include/llvm/IR/IntrinsicsARM.td
@@ -1159,6 +1159,11 @@ defm int_arm_mve_vcvt_fix: MVEMXPredicated<
   [llvm_anyvector_ty /* input vector */, llvm_i32_ty /* scale */],
   LLVMMatchType<0>, llvm_anyvector_ty>;
 
+def int_arm_mve_vcvt_fp_int_predicated: Intrinsic<
+  [llvm_anyvector_ty], [llvm_anyvector_ty, llvm_i32_ty /* unsigned */,
+   llvm_anyvector_ty /* predicate */, LLVMMatchType<0> /* inactive */],
+  [IntrNoMem]>;
+
 def int_arm_mve_vrintn: Intrinsic<
   [llvm_anyvector_ty], [LLVMMatchType<0>], [IntrNoMem]>;
 def int_arm_mve_vcls: Intrinsic<
@@ -1178,4 +1183,32 @@ def int_arm_mve_vqdmull_predicated: Intrinsic<
    LLVMMatchType<0>],
   [IntrNoMem]>;
 
+class MVESimpleUnaryPredicated: Intrinsic<[llvm_anyvector_ty],
+   [LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>], [IntrNoMem]>;
+
+def int_arm_mve_mvn_predicated: MVESimpleUnaryPredicated;
+def int_arm_mve_abs_predicated: MVESimpleUnaryPredicated;
+def int_arm_mve_neg_predicated: MVESimpleUnaryPredicated;
+def int_arm_mve_qabs_predicated: MVESimpleUnaryPredicated;
+def int_arm_mve_qneg_predicated: MVESimpleUnaryPredicated;
+def int_arm_mve_clz_predicated: MVESimpleUnaryPredicated;
+def int_arm_mve_cls_predicated: MVESimpleUnaryPredicated;
+def int_arm_mve_vrintz_predicated: MVESimpleUnaryPredicated;
+def int_arm_mve_vrintm_predicated: MVESimpleUnaryPredicated;
+def int_arm_mve_vrintp_predicated: MVESimpleUnaryPredicated;
+def int_arm_mve_vrinta_predicated: MVESimpleUnaryPredicated;
+def int_arm_mve_vrintx_predicated: MVESimpleUnaryPredicated;
+def int_arm_mve_vrintn_predicated: MVESimpleUnaryPredicated;
+
+def int_arm_mve_vrev_predicated: Intrinsic<[llvm_anyvector_ty],
+   [LLVMMatchType<0>, llvm_i32_ty /* size to reverse */,
+    llvm_anyvector_ty, LLVMMatchType<0>], [IntrNoMem]>;
+
+def int_arm_mve_vmovl_predicated: Intrinsic<[llvm_anyvector_ty],
+   [llvm_anyvector_ty, llvm_i32_ty /* unsigned */, llvm_i32_ty /* top half */,
+    llvm_anyvector_ty /* predicate */, LLVMMatchType<0>], [IntrNoMem]>;
+def int_arm_mve_vmovn_predicated: Intrinsic<[llvm_anyvector_ty],
+   [LLVMMatchType<0>, llvm_anyvector_ty, llvm_i32_ty /* top half */,
+    llvm_anyvector_ty /* predicate */], [IntrNoMem]>;
+
 } // end TargetPrefix
diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td
index 9a7886d6ecc5..07cea414a172 100644
--- a/llvm/lib/Target/ARM/ARMInstrMVE.td
+++ b/llvm/lib/Target/ARM/ARMInstrMVE.td
@@ -1320,28 +1320,29 @@ let Predicates = [HasMVEInt] in {
             (v4i32 (MVE_VREV32_8 (v4i32 MQPR:$src)))>;
 }
 
-let Predicates = [HasMVEInt] in {
-  def : Pat<(v4i32 (ARMvrev64 (v4i32 MQPR:$src))),
-            (v4i32 (MVE_VREV64_32 (v4i32 MQPR:$src)))>;
-  def : Pat<(v8i16 (ARMvrev64 (v8i16 MQPR:$src))),
-            (v8i16 (MVE_VREV64_16 (v8i16 MQPR:$src)))>;
-  def : Pat<(v16i8 (ARMvrev64 (v16i8 MQPR:$src))),
-            (v16i8 (MVE_VREV64_8  (v16i8 MQPR:$src)))>;
+multiclass MVE_VREV_basic_patterns<int revbits, list<MVEVectorVTInfo> VTIs,
+                                   Instruction Inst> {
+  defvar unpred_op = !cast<SDNode>("ARMvrev" # revbits);
+
+  foreach VTI = VTIs in {
+    def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$src))),
+              (VTI.Vec (Inst (VTI.Vec MQPR:$src)))>;
+    def : Pat<(VTI.Vec (int_arm_mve_vrev_predicated (VTI.Vec MQPR:$src),
+                  revbits, (VTI.Pred VCCR:$pred), (VTI.Vec MQPR:$inactive))),
+              (VTI.Vec (Inst (VTI.Vec MQPR:$src), ARMVCCThen,
+                  (VTI.Pred VCCR:$pred), (VTI.Vec MQPR:$inactive)))>;
+  }
+}
 
-  def : Pat<(v8i16 (ARMvrev32 (v8i16 MQPR:$src))),
-            (v8i16 (MVE_VREV32_16 (v8i16 MQPR:$src)))>;
-  def : Pat<(v16i8 (ARMvrev32 (v16i8 MQPR:$src))),
-            (v16i8 (MVE_VREV32_8  (v16i8 MQPR:$src)))>;
+let Predicates = [HasMVEInt] in {
+  defm: MVE_VREV_basic_patterns<64, [MVE_v4i32, MVE_v4f32], MVE_VREV64_32>;
+  defm: MVE_VREV_basic_patterns<64, [MVE_v8i16, MVE_v8f16], MVE_VREV64_16>;
+  defm: MVE_VREV_basic_patterns<64, [MVE_v16i8           ], MVE_VREV64_8>;
 
-  def : Pat<(v16i8 (ARMvrev16 (v16i8 MQPR:$src))),
-            (v16i8 (MVE_VREV16_8  (v16i8 MQPR:$src)))>;
+  defm: MVE_VREV_basic_patterns<32, [MVE_v8i16, MVE_v8f16], MVE_VREV32_16>;
+  defm: MVE_VREV_basic_patterns<32, [MVE_v16i8           ], MVE_VREV32_8>;
 
-  def : Pat<(v4f32 (ARMvrev64 (v4f32 MQPR:$src))),
-            (v4f32 (MVE_VREV64_32 (v4f32 MQPR:$src)))>;
-  def : Pat<(v8f16 (ARMvrev64 (v8f16 MQPR:$src))),
-            (v8f16 (MVE_VREV64_16 (v8f16 MQPR:$src)))>;
-  def : Pat<(v8f16 (ARMvrev32 (v8f16 MQPR:$src))),
-            (v8f16 (MVE_VREV32_16 (v8f16 MQPR:$src)))>;
+  defm: MVE_VREV_basic_patterns<16, [MVE_v16i8           ], MVE_VREV16_8>;
 }
 
 def MVE_VMVN : MVE_bit_arith<(outs MQPR:$Qd), (ins MQPR:$Qm),
@@ -1356,14 +1357,14 @@ def MVE_VMVN : MVE_bit_arith<(outs MQPR:$Qd), (ins MQPR:$Qm),
 }
 
 let Predicates = [HasMVEInt] in {
-  def : Pat<(v16i8 (vnotq  (v16i8 MQPR:$val1))),
-            (v16i8 (MVE_VMVN (v16i8 MQPR:$val1)))>;
-  def : Pat<(v8i16 (vnotq  (v8i16 MQPR:$val1))),
-            (v8i16 (MVE_VMVN (v8i16 MQPR:$val1)))>;
-  def : Pat<(v4i32 (vnotq  (v4i32 MQPR:$val1))),
-            (v4i32 (MVE_VMVN (v4i32 MQPR:$val1)))>;
-  def : Pat<(v2i64 (vnotq  (v2i64 MQPR:$val1))),
-            (v2i64 (MVE_VMVN (v2i64 MQPR:$val1)))>;
+  foreach VTI = [ MVE_v16i8, MVE_v8i16, MVE_v4i32, MVE_v2i64 ] in {
+    def : Pat<(VTI.Vec (vnotq    (VTI.Vec MQPR:$val1))),
+              (VTI.Vec (MVE_VMVN (VTI.Vec MQPR:$val1)))>;
+    def : Pat<(VTI.Vec (int_arm_mve_mvn_predicated (VTI.Vec MQPR:$val1),
+                       (VTI.Pred VCCR:$pred), (VTI.Vec MQPR:$inactive))),
+              (VTI.Vec (MVE_VMVN (VTI.Vec MQPR:$val1), ARMVCCThen,
+                       (VTI.Pred VCCR:$pred), (VTI.Vec MQPR:$inactive)))>;
+  }
 }
 
 class MVE_bit_ops<string iname, bits<2> bit_21_20, bit bit_28>
@@ -2175,39 +2176,43 @@ class MVE_VCLSCLZ<string iname, string suffix, bits<2> size,
   let validForTailPredication = 1;
 }
 
-def MVE_VCLSs8  : MVE_VCLSCLZ<"vcls", "s8",  0b00, 0b0>;
-def MVE_VCLSs16 : MVE_VCLSCLZ<"vcls", "s16", 0b01, 0b0>;
-def MVE_VCLSs32 : MVE_VCLSCLZ<"vcls", "s32", 0b10, 0b0>;
+multiclass MVE_VCLSCLZ_p<string opname, bit opcode, MVEVectorVTInfo VTI,
+                         SDNode unpred_op> {
+  def "": MVE_VCLSCLZ<"v"#opname, VTI.Suffix, VTI.Size, opcode>;
 
-def MVE_VCLZs8  : MVE_VCLSCLZ<"vclz", "i8",  0b00, 0b1>;
-def MVE_VCLZs16 : MVE_VCLSCLZ<"vclz", "i16", 0b01, 0b1>;
-def MVE_VCLZs32 : MVE_VCLSCLZ<"vclz", "i32", 0b10, 0b1>;
+  defvar Inst     = !cast<Instruction>(NAME);
+  defvar pred_int = !cast<Intrinsic>("int_arm_mve_"#opname#"_predicated");
 
-let Predicates = [HasMVEInt] in {
-  def : Pat<(v16i8 ( ctlz (v16i8 MQPR:$val1))),
-            (v16i8 ( MVE_VCLZs8 (v16i8 MQPR:$val1)))>;
-  def : Pat<(v4i32 ( ctlz (v4i32 MQPR:$val1))),
-            (v4i32 ( MVE_VCLZs32 (v4i32 MQPR:$val1)))>;
-  def : Pat<(v8i16 ( ctlz (v8i16 MQPR:$val1))),
-            (v8i16 ( MVE_VCLZs16 (v8i16 MQPR:$val1)))>;
-
-  def : Pat<(v16i8 ( int_arm_mve_vcls (v16i8 MQPR:$val1))),
-            (v16i8 ( MVE_VCLSs8 (v16i8 MQPR:$val1)))>;
-  def : Pat<(v4i32 ( int_arm_mve_vcls (v4i32 MQPR:$val1))),
-            (v4i32 ( MVE_VCLSs32 (v4i32 MQPR:$val1)))>;
-  def : Pat<(v8i16 ( int_arm_mve_vcls (v8i16 MQPR:$val1))),
-            (v8i16 ( MVE_VCLSs16 (v8i16 MQPR:$val1)))>;
+  let Predicates = [HasMVEInt] in {
+    def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$val))),
+              (VTI.Vec (Inst (VTI.Vec MQPR:$val)))>;
+    def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$val), (VTI.Pred VCCR:$pred),
+                                 (VTI.Vec MQPR:$inactive))),
+              (VTI.Vec (Inst (VTI.Vec MQPR:$val), ARMVCCThen,
+                             (VTI.Pred VCCR:$pred), (VTI.Vec MQPR:$inactive)))>;
+  }
 }
 
+defm MVE_VCLSs8  : MVE_VCLSCLZ_p<"cls", 0, MVE_v16s8, int_arm_mve_vcls>;
+defm MVE_VCLSs16 : MVE_VCLSCLZ_p<"cls", 0, MVE_v8s16, int_arm_mve_vcls>;
+defm MVE_VCLSs32 : MVE_VCLSCLZ_p<"cls", 0, MVE_v4s32, int_arm_mve_vcls>;
+
+defm MVE_VCLZs8  : MVE_VCLSCLZ_p<"clz", 1, MVE_v16i8, ctlz>;
+defm MVE_VCLZs16 : MVE_VCLSCLZ_p<"clz", 1, MVE_v8i16, ctlz>;
+defm MVE_VCLZs32 : MVE_VCLSCLZ_p<"clz", 1, MVE_v4i32, ctlz>;
+
 class MVE_VABSNEG_int<string iname, string suffix, bits<2> size, bit negate,
-                      list<dag> pattern=[]>
+                      bit saturate, list<dag> pattern=[]>
   : MVEIntSingleSrc<iname, suffix, size, pattern> {
 
   let Inst{28} = 0b1;
   let Inst{25-23} = 0b111;
   let Inst{21-20} = 0b11;
-  let Inst{17-16} = 0b01;
-  let Inst{12-8} = 0b00011;
+  let Inst{17} = 0b0;
+  let Inst{16} = !eq(saturate, 0);
+  let Inst{12-11} = 0b00;
+  let Inst{10} = saturate;
+  let Inst{9-8} = 0b11;
   let Inst{7} = negate;
   let Inst{6} = 0b1;
   let Inst{4} = 0b0;
@@ -2215,61 +2220,40 @@ class MVE_VABSNEG_int<string iname, string suffix, bits<2> size, bit negate,
   let validForTailPredication = 1;
 }
 
-def MVE_VABSs8  : MVE_VABSNEG_int<"vabs", "s8",  0b00, 0b0>;
-def MVE_VABSs16 : MVE_VABSNEG_int<"vabs", "s16", 0b01, 0b0>;
-def MVE_VABSs32 : MVE_VABSNEG_int<"vabs", "s32", 0b10, 0b0>;
-
-let Predicates = [HasMVEInt] in {
-  def : Pat<(v16i8 (abs (v16i8 MQPR:$v))),
-            (v16i8 (MVE_VABSs8 $v))>;
-  def : Pat<(v8i16 (abs (v8i16 MQPR:$v))),
-            (v8i16 (MVE_VABSs16 $v))>;
-  def : Pat<(v4i32 (abs (v4i32 MQPR:$v))),
-            (v4i32 (MVE_VABSs32 $v))>;
-}
+multiclass MVE_VABSNEG_int_m<string iname, bit negate, bit saturate,
+                             SDNode unpred_op, Intrinsic pred_int,
+                             MVEVectorVTInfo VTI> {
+  def "" : MVE_VABSNEG_int<iname, VTI.Suffix, VTI.Size, negate, saturate>;
+  defvar Inst = !cast<Instruction>(NAME);
 
-def MVE_VNEGs8  : MVE_VABSNEG_int<"vneg", "s8",  0b00, 0b1>;
-def MVE_VNEGs16 : MVE_VABSNEG_int<"vneg", "s16", 0b01, 0b1>;
-def MVE_VNEGs32 : MVE_VABSNEG_int<"vneg", "s32", 0b10, 0b1>;
+  let Predicates = [HasMVEInt] in {
+    // VQABS and VQNEG have more difficult isel patterns defined elsewhere
+    if !eq(saturate, 0) then {
+      def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$v))), (VTI.Vec (Inst $v))>;
+    }
 
-let Predicates = [HasMVEInt] in {
-  def : Pat<(v16i8 (vnegq (v16i8 MQPR:$v))),
-            (v16i8 (MVE_VNEGs8 $v))>;
-  def : Pat<(v8i16 (vnegq (v8i16 MQPR:$v))),
-            (v8i16 (MVE_VNEGs16 $v))>;
-  def : Pat<(v4i32 (vnegq (v4i32 MQPR:$v))),
-            (v4i32 (MVE_VNEGs32 $v))>;
+    def : Pat<(VTI.Vec (pred_int  (VTI.Vec MQPR:$v), (VTI.Pred VCCR:$mask),
+                                  (VTI.Vec MQPR:$inactive))),
+              (VTI.Vec (Inst $v, ARMVCCThen, $mask, $inactive))>;
+  }
 }
 
-class MVE_VQABSNEG<string iname, string suffix, bits<2> size,
-                   bit negate, list<dag> pattern=[]>
-  : MVEIntSingleSrc<iname, suffix, size, pattern> {
-
-  let Inst{28} = 0b1;
-  let Inst{25-23} = 0b111;
-  let Inst{21-20} = 0b11;
-  let Inst{17-16} = 0b00;
-  let Inst{12-8} = 0b00111;
-  let Inst{7} = negate;
-  let Inst{6} = 0b1;
-  let Inst{4} = 0b0;
-  let Inst{0} = 0b0;
-  let validForTailPredication = 1;
+foreach VTI = [ MVE_v16s8, MVE_v8s16, MVE_v4s32 ] in {
+  defm "MVE_VABS" # VTI.Suffix : MVE_VABSNEG_int_m<
+     "vabs",  0, 0, abs,   int_arm_mve_abs_predicated,  VTI>;
+  defm "MVE_VQABS" # VTI.Suffix : MVE_VABSNEG_int_m<
+     "vqabs", 0, 1, ?,     int_arm_mve_qabs_predicated, VTI>;
+  defm "MVE_VNEG" # VTI.Suffix : MVE_VABSNEG_int_m<
+     "vneg",  1, 0, vnegq, int_arm_mve_neg_predicated,  VTI>;
+  defm "MVE_VQNEG" # VTI.Suffix : MVE_VABSNEG_int_m<
+     "vqneg", 1, 1, ?,     int_arm_mve_qneg_predicated, VTI>;
 }
 
-def MVE_VQABSs8  : MVE_VQABSNEG<"vqabs", "s8",  0b00, 0b0>;
-def MVE_VQABSs16 : MVE_VQABSNEG<"vqabs", "s16", 0b01, 0b0>;
-def MVE_VQABSs32 : MVE_VQABSNEG<"vqabs", "s32", 0b10, 0b0>;
-
-def MVE_VQNEGs8  : MVE_VQABSNEG<"vqneg", "s8",  0b00, 0b1>;
-def MVE_VQNEGs16 : MVE_VQABSNEG<"vqneg", "s16", 0b01, 0b1>;
-def MVE_VQNEGs32 : MVE_VQABSNEG<"vqneg", "s32", 0b10, 0b1>;
-
 // int_min/int_max: vector containing INT_MIN/INT_MAX VTI.Size times
 // zero_vec: v4i32-initialized zero vector, potentially wrapped in a bitconvert
 multiclass vqabsneg_pattern<MVEVectorVTInfo VTI, dag int_min, dag int_max,
-                         dag zero_vec,  MVE_VQABSNEG vqabs_instruction,
-                         MVE_VQABSNEG vqneg_instruction> {
+                         dag zero_vec,  MVE_VABSNEG_int vqabs_instruction,
+                         MVE_VABSNEG_int vqneg_instruction> {
   let Predicates = [HasMVEInt] in {
     // The below tree can be replaced by a vqabs instruction, as it represents
     // the following vectorized expression (r being the value in $reg):
@@ -2470,7 +2454,7 @@ class MVE_shift_imm<dag oops, dag iops, string iname, string suffix,
   let Inst{3-1} = Qm{2-0};
 }
 
-class MVE_VMOVL<string iname, string suffix, bits<2> sz, bit U,
+class MVE_VMOVL<string iname, string suffix, bits<2> sz, bit U, bit top,
               list<dag> pattern=[]>
   : MVE_shift_imm<(outs MQPR:$Qd), (ins MQPR:$Qm),
                   iname, suffix, "$Qd, $Qm", vpred_r, "",
@@ -2480,25 +2464,35 @@ class MVE_VMOVL<string iname, string suffix, bits<2> sz, bit U,
   let Inst{21} = 0b1;
   let Inst{20-19} = sz{1-0};
   let Inst{18-16} = 0b000;
+  let Inst{12} = top;
   let Inst{11-6} = 0b111101;
   let Inst{4} = 0b0;
   let Inst{0} = 0b0;
 }
 
-multiclass MVE_VMOVL_shift_half<string iname, string suffix, bits<2> sz, bit U,
-                                list<dag> pattern=[]> {
-  def bh : MVE_VMOVL<!strconcat(iname, "b"), suffix, sz, U, pattern> {
-    let Inst{12} = 0b0;
-  }
-  def th : MVE_VMOVL<!strconcat(iname, "t"), suffix, sz, U, pattern> {
-    let Inst{12} = 0b1;
-  }
+multiclass MVE_VMOVL_m<bit top, string chr, MVEVectorVTInfo OutVTI,
+                       MVEVectorVTInfo InVTI> {
+  def "": MVE_VMOVL<"vmovl" # chr, InVTI.Suffix, OutVTI.Size,
+                    InVTI.Unsigned, top>;
+  defvar Inst = !cast<Instruction>(NAME);
+
+  def : Pat<(OutVTI.Vec (int_arm_mve_vmovl_predicated (InVTI.Vec MQPR:$src),
+                            (i32 InVTI.Unsigned), (i32 top),
+                            (OutVTI.Pred VCCR:$pred),
+                            (OutVTI.Vec MQPR:$inactive))),
+            (OutVTI.Vec (Inst (InVTI.Vec MQPR:$src), ARMVCCThen,
+                            (OutVTI.Pred VCCR:$pred),
+                            (OutVTI.Vec MQPR:$inactive)))>;
 }
 
-defm MVE_VMOVLs8 : MVE_VMOVL_shift_half<"vmovl", "s8", 0b01, 0b0>;
-defm MVE_VMOVLu8 : MVE_VMOVL_shift_half<"vmovl", "u8", 0b01, 0b1>;
-defm MVE_VMOVLs16 : MVE_VMOVL_shift_half<"vmovl", "s16", 0b10, 0b0>;
-defm MVE_VMOVLu16 : MVE_VMOVL_shift_half<"vmovl", "u16", 0b10, 0b1>;
+defm MVE_VMOVLs8bh  : MVE_VMOVL_m<0, "b", MVE_v8s16, MVE_v16s8>;
+defm MVE_VMOVLs8th  : MVE_VMOVL_m<1, "t", MVE_v8s16, MVE_v16s8>;
+defm MVE_VMOVLu8bh  : MVE_VMOVL_m<0, "b", MVE_v8u16, MVE_v16u8>;
+defm MVE_VMOVLu8th  : MVE_VMOVL_m<1, "t", MVE_v8u16, MVE_v16u8>;
+defm MVE_VMOVLs16bh : MVE_VMOVL_m<0, "b", MVE_v4s32, MVE_v8s16>;
+defm MVE_VMOVLs16th : MVE_VMOVL_m<1, "t", MVE_v4s32, MVE_v8s16>;
+defm MVE_VMOVLu16bh : MVE_VMOVL_m<0, "b", MVE_v4s32, MVE_v8u16>;
+defm MVE_VMOVLu16th : MVE_VMOVL_m<1, "t", MVE_v4s32, MVE_v8u16>;
 
 let Predicates = [HasMVEInt] in {
   def : Pat<(sext_inreg (v4i32 MQPR:$src), v4i16),
@@ -3277,45 +3271,34 @@ class MVE_VRINT<string rmode, bits<3> op, string suffix, bits<2> size,
 
 }
 
-multiclass MVE_VRINT_ops<string suffix, bits<2> size, list<dag> pattern=[]> {
-  def N : MVE_VRINT<"n", 0b000, suffix, size, pattern>;
-  def X : MVE_VRINT<"x", 0b001, suffix, size, pattern>;
-  def A : MVE_VRINT<"a", 0b010, suffix, size, pattern>;
-  def Z : MVE_VRINT<"z", 0b011, suffix, size, pattern>;
-  def M : MVE_VRINT<"m", 0b101, suffix, size, pattern>;
-  def P : MVE_VRINT<"p", 0b111, suffix, size, pattern>;
-}
+multiclass MVE_VRINT_m<MVEVectorVTInfo VTI, string suffix, bits<3> opcode,
+                       SDNode unpred_op> {
+  def "": MVE_VRINT<suffix, opcode, VTI.Suffix, VTI.Size>;
+  defvar Inst = !cast<Instruction>(NAME);
+  defvar pred_int = !cast<Intrinsic>("int_arm_mve_vrint"#suffix#"_predicated");
 
-defm MVE_VRINTf16 : MVE_VRINT_ops<"f16", 0b01>;
-defm MVE_VRINTf32 : MVE_VRINT_ops<"f32", 0b10>;
+  let Predicates = [HasMVEFloat] in {
+    def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$val))),
+              (VTI.Vec (Inst (VTI.Vec MQPR:$val)))>;
+    def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$val), (VTI.Pred VCCR:$pred),
+                                 (VTI.Vec MQPR:$inactive))),
+              (VTI.Vec (Inst (VTI.Vec MQPR:$val), ARMVCCThen,
+                             (VTI.Pred VCCR:$pred), (VTI.Vec MQPR:$inactive)))>;
+  }
+}
 
-let Predicates = [HasMVEFloat] in {
-  def : Pat<(v4f32 (frint (v4f32 MQPR:$val1))),
-            (v4f32 (MVE_VRINTf32X (v4f32 MQPR:$val1)))>;
-  def : Pat<(v8f16 (frint (v8f16 MQPR:$val1))),
-            (v8f16 (MVE_VRINTf16X (v8f16 MQPR:$val1)))>;
-  def : Pat<(v4f32 (fround (v4f32 MQPR:$val1))),
-            (v4f32 (MVE_VRINTf32A (v4f32 MQPR:$val1)))>;
-  def : Pat<(v8f16 (fround (v8f16 MQPR:$val1))),
-            (v8f16 (MVE_VRINTf16A (v8f16 MQPR:$val1)))>;
-  def : Pat<(v4f32 (ftrunc (v4f32 MQPR:$val1))),
-            (v4f32 (MVE_VRINTf32Z (v4f32 MQPR:$val1)))>;
-  def : Pat<(v8f16 (ftrunc (v8f16 MQPR:$val1))),
-            (v8f16 (MVE_VRINTf16Z (v8f16 MQPR:$val1)))>;
-  def : Pat<(v4f32 (ffloor (v4f32 MQPR:$val1))),
-            (v4f32 (MVE_VRINTf32M (v4f32 MQPR:$val1)))>;
-  def : Pat<(v8f16 (ffloor (v8f16 MQPR:$val1))),
-            (v8f16 (MVE_VRINTf16M (v8f16 MQPR:$val1)))>;
-  def : Pat<(v4f32 (fceil (v4f32 MQPR:$val1))),
-            (v4f32 (MVE_VRINTf32P (v4f32 MQPR:$val1)))>;
-  def : Pat<(v8f16 (fceil (v8f16 MQPR:$val1))),
-            (v8f16 (MVE_VRINTf16P (v8f16 MQPR:$val1)))>;
-  def : Pat<(v4f32 (int_arm_mve_vrintn (v4f32 MQPR:$val1))),
-            (v4f32 (MVE_VRINTf32N (v4f32 MQPR:$val1)))>;
-  def : Pat<(v8f16 (int_arm_mve_vrintn (v8f16 MQPR:$val1))),
-            (v8f16 (MVE_VRINTf16N (v8f16 MQPR:$val1)))>;
+multiclass MVE_VRINT_ops<MVEVectorVTInfo VTI> {
+  defm N : MVE_VRINT_m<VTI, "n", 0b000, int_arm_mve_vrintn>;
+  defm X : MVE_VRINT_m<VTI, "x", 0b001, frint>;
+  defm A : MVE_VRINT_m<VTI, "a", 0b010, fround>;
+  defm Z : MVE_VRINT_m<VTI, "z", 0b011, ftrunc>;
+  defm M : MVE_VRINT_m<VTI, "m", 0b101, ffloor>;
+  defm P : MVE_VRINT_m<VTI, "p", 0b111, fceil>;
 }
 
+defm MVE_VRINTf16 : MVE_VRINT_ops<MVE_v8f16>;
+defm MVE_VRINTf32 : MVE_VRINT_ops<MVE_v4f32>;
+
 class MVEFloatArithNeon<string iname, string suffix, bit size,
                            dag oops, dag iops, string ops,
                            vpred_ops vpred, string cstr, list<dag> pattern=[]>
@@ -3692,7 +3675,7 @@ defm MVE_VCVTu16f16 : MVE_VCVT_fp_int_anpm_multi<"u16.f16", 0b01, 0b1>;
 defm MVE_VCVTs32f32 : MVE_VCVT_fp_int_anpm_multi<"s32.f32", 0b10, 0b0>;
 defm MVE_VCVTu32f32 : MVE_VCVT_fp_int_anpm_multi<"u32.f32", 0b10, 0b1>;
 
-class MVE_VCVT_fp_int<string suffix, bits<2> size, bits<2> op,
+class MVE_VCVT_fp_int<string suffix, bits<2> size, bit toint, bit unsigned,
                       list<dag> pattern=[]>
   : MVE_float<"vcvt", suffix, (outs MQPR:$Qd),
               (ins MQPR:$Qm), "$Qd, $Qm", vpred_r, "", pattern> {
@@ -3706,41 +3689,43 @@ class MVE_VCVT_fp_int<string suffix, bits<2> size, bits<2> op,
   let Inst{17-16} = 0b11;
   let Inst{15-13} = Qd{2-0};
   let Inst{12-9} = 0b0011;
-  let Inst{8-7} = op;
+  let Inst{8} = toint;
+  let Inst{7} = unsigned;
   let Inst{4} = 0b0;
   let validForTailPredication = 1;
 }
 
+multiclass MVE_VCVT_fp_int_m<MVEVectorVTInfo Dest, MVEVectorVTInfo Src,
+                             SDNode unpred_op> {
+  defvar Unsigned = !or(!eq(Dest.SuffixLetter,"u"), !eq(Src.SuffixLetter,"u"));
+  defvar ToInt = !eq(Src.SuffixLetter,"f");
+
+  def "" : MVE_VCVT_fp_int<Dest.Suffix # "." # Src.Suffix, Dest.Size,
+                           ToInt, Unsigned>;
+  defvar Inst = !cast<Instruction>(NAME);
+
+  let Predicates = [HasMVEFloat] in {
+    def : Pat<(Dest.Vec (unpred_op (Src.Vec MQPR:$src))),
+              (Dest.Vec (Inst (Src.Vec MQPR:$src)))>;
+    def : Pat<(Dest.Vec (int_arm_mve_vcvt_fp_int_predicated
+                             (Src.Vec MQPR:$src), (i32 Unsigned),
+                             (Src.Pred VCCR:$mask), (Dest.Vec MQPR:$inactive))),
+              (Dest.Vec (Inst (Src.Vec MQPR:$src), ARMVCCThen,
+                              (Src.Pred VCCR:$mask),
+                              (Dest.Vec MQPR:$inactive)))>;
+  }
+}
 // The unsuffixed VCVT for float->int implicitly rounds toward zero,
 // which I reflect here in the llvm instruction names
-def MVE_VCVTs16f16z : MVE_VCVT_fp_int<"s16.f16", 0b01, 0b10>;
-def MVE_VCVTu16f16z : MVE_VCVT_fp_int<"u16.f16", 0b01, 0b11>;
-def MVE_VCVTs32f32z : MVE_VCVT_fp_int<"s32.f32", 0b10, 0b10>;
-def MVE_VCVTu32f32z : MVE_VCVT_fp_int<"u32.f32", 0b10, 0b11>;
+defm MVE_VCVTs16f16z : MVE_VCVT_fp_int_m<MVE_v8s16, MVE_v8f16, fp_to_sint>;
+defm MVE_VCVTu16f16z : MVE_VCVT_fp_int_m<MVE_v8u16, MVE_v8f16, fp_to_uint>;
+defm MVE_VCVTs32f32z : MVE_VCVT_fp_int_m<MVE_v4s32, MVE_v4f32, fp_to_sint>;
+defm MVE_VCVTu32f32z : MVE_VCVT_fp_int_m<MVE_v4u32, MVE_v4f32, fp_to_uint>;
 // Whereas VCVT for int->float rounds to nearest
-def MVE_VCVTf16s16n : MVE_VCVT_fp_int<"f16.s16", 0b01, 0b00>;
-def MVE_VCVTf16u16n : MVE_VCVT_fp_int<"f16.u16", 0b01, 0b01>;
-def MVE_VCVTf32s32n : MVE_VCVT_fp_int<"f32.s32", 0b10, 0b00>;
-def MVE_VCVTf32u32n : MVE_VCVT_fp_int<"f32.u32", 0b10, 0b01>;
-
-let Predicates = [HasMVEFloat] in {
-  def : Pat<(v4i32 (fp_to_sint (v4f32 MQPR:$src))),
-            (v4i32 (MVE_VCVTs32f32z (v4f32 MQPR:$src)))>;
-  def : Pat<(v4i32 (fp_to_uint (v4f32 MQPR:$src))),
-            (v4i32 (MVE_VCVTu32f32z (v4f32 MQPR:$src)))>;
-  def : Pat<(v8i16 (fp_to_sint (v8f16 MQPR:$src))),
-            (v8i16 (MVE_VCVTs16f16z (v8f16 MQPR:$src)))>;
-  def : Pat<(v8i16 (fp_to_uint (v8f16 MQPR:$src))),
-            (v8i16 (MVE_VCVTu16f16z (v8f16 MQPR:$src)))>;
-  def : Pat<(v4f32 (sint_to_fp (v4i32 MQPR:$src))),
-            (v4f32 (MVE_VCVTf32s32n (v4i32 MQPR:$src)))>;
-  def : Pat<(v4f32 (uint_to_fp (v4i32 MQPR:$src))),
-            (v4f32 (MVE_VCVTf32u32n (v4i32 MQPR:$src)))>;
-  def : Pat<(v8f16 (sint_to_fp (v8i16 MQPR:$src))),
-            (v8f16 (MVE_VCVTf16s16n (v8i16 MQPR:$src)))>;
-  def : Pat<(v8f16 (uint_to_fp (v8i16 MQPR:$src))),
-            (v8f16 (MVE_VCVTf16u16n (v8i16 MQPR:$src)))>;
-}
+defm MVE_VCVTf16s16n : MVE_VCVT_fp_int_m<MVE_v8f16, MVE_v8s16, sint_to_fp>;
+defm MVE_VCVTf16u16n : MVE_VCVT_fp_int_m<MVE_v8f16, MVE_v8u16, uint_to_fp>;
+defm MVE_VCVTf32s32n : MVE_VCVT_fp_int_m<MVE_v4f32, MVE_v4s32, sint_to_fp>;
+defm MVE_VCVTf32u32n : MVE_VCVT_fp_int_m<MVE_v4f32, MVE_v4u32, uint_to_fp>;
 
 class MVE_VABSNEG_fp<string iname, string suffix, bits<2> size, bit negate,
                    list<dag> pattern=[]>
@@ -3761,26 +3746,29 @@ class MVE_VABSNEG_fp<string iname, string suffix, bits<2> size, bit negate,
   let validForTailPredication = 1;
 }
 
-def MVE_VABSf16 : MVE_VABSNEG_fp<"vabs", "f16", 0b01, 0b0>;
-def MVE_VABSf32 : MVE_VABSNEG_fp<"vabs", "f32", 0b10, 0b0>;
-
-let Predicates = [HasMVEFloat] in {
-  def : Pat<(v8f16 (fabs MQPR:$src)),
-            (MVE_VABSf16 MQPR:$src)>;
-  def : Pat<(v4f32 (fabs MQPR:$src)),
-            (MVE_VABSf32 MQPR:$src)>;
-}
+multiclass MVE_VABSNEG_fp_m<string iname, SDNode unpred_op, Intrinsic pred_int,
+                            MVEVectorVTInfo VTI, bit opcode> {
+  def "" : MVE_VABSNEG_fp<iname, VTI.Suffix, VTI.Size, opcode>;
+  defvar Inst = !cast<Instruction>(NAME);
 
-def MVE_VNEGf16 : MVE_VABSNEG_fp<"vneg", "f16", 0b01, 0b1>;
-def MVE_VNEGf32 : MVE_VABSNEG_fp<"vneg", "f32", 0b10, 0b1>;
+  let Predicates = [HasMVEInt] in {
+    def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$v))), (VTI.Vec (Inst $v))>;
 
-let Predicates = [HasMVEFloat] in {
-  def : Pat<(v8f16 (fneg MQPR:$src)),
-            (MVE_VNEGf16 MQPR:$src)>;
-  def : Pat<(v4f32 (fneg MQPR:$src)),
-            (MVE_VNEGf32 MQPR:$src)>;
+    def : Pat<(VTI.Vec (pred_int  (VTI.Vec MQPR:$v), (VTI.Pred VCCR:$mask),
+                                  (VTI.Vec MQPR:$inactive))),
+              (VTI.Vec (Inst $v, ARMVCCThen, $mask, $inactive))>;
+  }
 }
 
+defm MVE_VABSf16 : MVE_VABSNEG_fp_m<"vabs", fabs, int_arm_mve_abs_predicated,
+                                    MVE_v8f16, 0>;
+defm MVE_VABSf32 : MVE_VABSNEG_fp_m<"vabs", fabs, int_arm_mve_abs_predicated,
+                                    MVE_v4f32, 0>;
+defm MVE_VNEGf16 : MVE_VABSNEG_fp_m<"vneg", fneg, int_arm_mve_neg_predicated,
+                                    MVE_v8f16, 1>;
+defm MVE_VNEGf32 : MVE_VABSNEG_fp_m<"vneg", fneg, int_arm_mve_neg_predicated,
+                                    MVE_v4f32, 1>;
+
 class MVE_VMAXMINNMA<string iname, string suffix, bit size, bit bit_12,
                      list<dag> pattern=[]>
   : MVE_f<(outs MQPR:$Qd), (ins MQPR:$Qd_src, MQPR:$Qm),
@@ -4427,23 +4415,42 @@ defm MVE_VQMOVUNs16 : MVE_VxMOVxN_halves<"vqmovun", "s16", 0b0, 0b0, 0b00>;
 defm MVE_VQMOVUNs32 : MVE_VxMOVxN_halves<"vqmovun", "s32", 0b0, 0b0, 0b01>;
 
 def MVEvmovn       : SDNode<"ARMISD::VMOVN", SDTARMVEXT>;
-let Predicates = [HasMVEInt] in {
-  def : Pat<(v8i16 (MVEvmovn (v8i16 MQPR:$Qd_src), (v8i16 MQPR:$Qm), (i32 0))),
-            (v8i16 (MVE_VMOVNi32bh (v8i16 MQPR:$Qd_src), (v8i16 MQPR:$Qm)))>;
-  def : Pat<(v8i16 (MVEvmovn (v8i16 MQPR:$Qd_src), (v8i16 MQPR:$Qm), (i32 1))),
-            (v8i16 (MVE_VMOVNi32th (v8i16 MQPR:$Qd_src), (v8i16 MQPR:$Qm)))>;
-  def : Pat<(v16i8 (MVEvmovn (v16i8 MQPR:$Qd_src), (v16i8 MQPR:$Qm), (i32 0))),
-            (v16i8 (MVE_VMOVNi16bh (v16i8 MQPR:$Qd_src), (v16i8 MQPR:$Qm)))>;
-  def : Pat<(v16i8 (MVEvmovn (v16i8 MQPR:$Qd_src), (v16i8 MQPR:$Qm), (i32 1))),
-            (v16i8 (MVE_VMOVNi16th (v16i8 MQPR:$Qd_src), (v16i8 MQPR:$Qm)))>;
-
-  def : Pat<(v8i16 (MVEvmovn (v8i16 MQPR:$Qm),
-                             (v8i16 (ARMvrev32 MQPR:$Qd_src)), (i32 1))),
-            (v8i16 (MVE_VMOVNi32bh (v8i16 MQPR:$Qd_src), (v8i16 MQPR:$Qm)))>;
-  def : Pat<(v16i8 (MVEvmovn (v16i8 MQPR:$Qm),
-                             (v16i8 (ARMvrev16 MQPR:$Qd_src)), (i32 1))),
-            (v16i8 (MVE_VMOVNi16bh (v16i8 MQPR:$Qd_src), (v16i8 MQPR:$Qm)))>;
-}
+
+multiclass MVE_VMOVN_p<Instruction Inst, bit top,
+                       MVEVectorVTInfo VTI, MVEVectorVTInfo InVTI> {
+  // Match the most obvious MVEvmovn(a,b,t), which overwrites the odd or even
+  // lanes of a (depending on t) with the even lanes of b.
+  def : Pat<(VTI.Vec (MVEvmovn (VTI.Vec MQPR:$Qd_src),
+                               (VTI.Vec MQPR:$Qm), (i32 top))),
+            (VTI.Vec (Inst (VTI.Vec MQPR:$Qd_src), (VTI.Vec MQPR:$Qm)))>;
+
+  if !eq(top, 0) then {
+    // If we see MVEvmovn(a,ARMvrev(b),1), that wants to overwrite the odd
+    // lanes of a with the odd lanes of b. In other words, the lanes we're
+    // _keeping_ from a are the even ones. So we can flip it round and say that
+    // this is the same as overwriting the even lanes of b with the even lanes
+    // of a, i.e. it's a VMOVNB with the operands reversed.
+    defvar vrev = !cast<SDNode>("ARMvrev" # InVTI.LaneBits);
+    def : Pat<(VTI.Vec (MVEvmovn (VTI.Vec MQPR:$Qm),
+                                 (VTI.Vec (vrev MQPR:$Qd_src)), (i32 1))),
+              (VTI.Vec (Inst (VTI.Vec MQPR:$Qd_src), (VTI.Vec MQPR:$Qm)))>;
+  }
+
+  // Match the IR intrinsic for a predicated VMOVN. This regards the Qm input
+  // as having wider lanes that we're narrowing, instead of already-narrow
+  // lanes that we're taking every other one of.
+  def : Pat<(VTI.Vec (int_arm_mve_vmovn_predicated (VTI.Vec MQPR:$Qd_src),
+                                  (InVTI.Vec MQPR:$Qm), (i32 top),
+                                  (InVTI.Pred VCCR:$pred))),
+            (VTI.Vec (Inst (VTI.Vec MQPR:$Qd_src),
+                              (InVTI.Vec MQPR:$Qm),
+                              ARMVCCThen, (InVTI.Pred VCCR:$pred)))>;
+}
+
+defm : MVE_VMOVN_p<MVE_VMOVNi32bh, 0, MVE_v8i16, MVE_v4i32>;
+defm : MVE_VMOVN_p<MVE_VMOVNi32th, 1, MVE_v8i16, MVE_v4i32>;
+defm : MVE_VMOVN_p<MVE_VMOVNi16bh, 0, MVE_v16i8, MVE_v8i16>;
+defm : MVE_VMOVN_p<MVE_VMOVNi16th, 1, MVE_v16i8, MVE_v8i16>;
 
 
 class MVE_VCVT_ff<string iname, string suffix, bit op, bit T,
diff --git a/llvm/test/CodeGen/Thumb2/mve-intrinsics/absneg-predicated.ll b/llvm/test/CodeGen/Thumb2/mve-intrinsics/absneg-predicated.ll
new file mode 100644
index 000000000000..ceac5e7e7e93
--- /dev/null
+++ b/llvm/test/CodeGen/Thumb2/mve-intrinsics/absneg-predicated.ll
@@ -0,0 +1,335 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s
+; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s
+
+define arm_aapcs_vfpcc <16 x i8> @test_vmvnq_m_s8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vmvnq_m_s8:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vmvnt q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
+  %2 = tail call <16 x i8> @llvm.arm.mve.mvn.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i1> %1, <16 x i8> %inactive)
+  ret <16 x i8> %2
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vmvnq_m_s16(<8 x i16> %inactive, <8 x i16> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vmvnq_m_s16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vmvnt q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+  %2 = tail call <8 x i16> @llvm.arm.mve.mvn.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i1> %1, <8 x i16> %inactive)
+  ret <8 x i16> %2
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vmvnq_m_s32(<4 x i32> %inactive, <4 x i32> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vmvnq_m_s32:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vmvnt q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+  %2 = tail call <4 x i32> @llvm.arm.mve.mvn.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i1> %1, <4 x i32> %inactive)
+  ret <4 x i32> %2
+}
+
+define arm_aapcs_vfpcc <16 x i8> @test_vmvnq_m_u8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vmvnq_m_u8:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vmvnt q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
+  %2 = tail call <16 x i8> @llvm.arm.mve.mvn.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i1> %1, <16 x i8> %inactive)
+  ret <16 x i8> %2
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vmvnq_m_u16(<8 x i16> %inactive, <8 x i16> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vmvnq_m_u16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vmvnt q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+  %2 = tail call <8 x i16> @llvm.arm.mve.mvn.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i1> %1, <8 x i16> %inactive)
+  ret <8 x i16> %2
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vmvnq_m_u32(<4 x i32> %inactive, <4 x i32> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vmvnq_m_u32:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vmvnt q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+  %2 = tail call <4 x i32> @llvm.arm.mve.mvn.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i1> %1, <4 x i32> %inactive)
+  ret <4 x i32> %2
+}
+
+define arm_aapcs_vfpcc <8 x half> @test_vnegq_m_f16(<8 x half> %inactive, <8 x half> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vnegq_m_f16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vnegt.f16 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+  %2 = tail call <8 x half> @llvm.arm.mve.neg.predicated.v8f16.v8i1(<8 x half> %a, <8 x i1> %1, <8 x half> %inactive)
+  ret <8 x half> %2
+}
+
+define arm_aapcs_vfpcc <4 x float> @test_vnegq_m_f32(<4 x float> %inactive, <4 x float> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vnegq_m_f32:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vnegt.f32 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+  %2 = tail call <4 x float> @llvm.arm.mve.neg.predicated.v4f32.v4i1(<4 x float> %a, <4 x i1> %1, <4 x float> %inactive)
+  ret <4 x float> %2
+}
+
+define arm_aapcs_vfpcc <16 x i8> @test_vnegq_m_s8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vnegq_m_s8:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vnegt.s8 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
+  %2 = tail call <16 x i8> @llvm.arm.mve.neg.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i1> %1, <16 x i8> %inactive)
+  ret <16 x i8> %2
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vnegq_m_s16(<8 x i16> %inactive, <8 x i16> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vnegq_m_s16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vnegt.s16 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+  %2 = tail call <8 x i16> @llvm.arm.mve.neg.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i1> %1, <8 x i16> %inactive)
+  ret <8 x i16> %2
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vnegq_m_s32(<4 x i32> %inactive, <4 x i32> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vnegq_m_s32:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vnegt.s32 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+  %2 = tail call <4 x i32> @llvm.arm.mve.neg.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i1> %1, <4 x i32> %inactive)
+  ret <4 x i32> %2
+}
+
+define arm_aapcs_vfpcc <8 x half> @test_vabsq_m_f16(<8 x half> %inactive, <8 x half> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vabsq_m_f16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vabst.f16 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+  %2 = tail call <8 x half> @llvm.arm.mve.abs.predicated.v8f16.v8i1(<8 x half> %a, <8 x i1> %1, <8 x half> %inactive)
+  ret <8 x half> %2
+}
+
+define arm_aapcs_vfpcc <4 x float> @test_vabsq_m_f32(<4 x float> %inactive, <4 x float> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vabsq_m_f32:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vabst.f32 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+  %2 = tail call <4 x float> @llvm.arm.mve.abs.predicated.v4f32.v4i1(<4 x float> %a, <4 x i1> %1, <4 x float> %inactive)
+  ret <4 x float> %2
+}
+
+define arm_aapcs_vfpcc <16 x i8> @test_vabsq_m_s8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vabsq_m_s8:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vabst.s8 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
+  %2 = tail call <16 x i8> @llvm.arm.mve.abs.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i1> %1, <16 x i8> %inactive)
+  ret <16 x i8> %2
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vabsq_m_s16(<8 x i16> %inactive, <8 x i16> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vabsq_m_s16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vabst.s16 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+  %2 = tail call <8 x i16> @llvm.arm.mve.abs.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i1> %1, <8 x i16> %inactive)
+  ret <8 x i16> %2
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vabsq_m_s32(<4 x i32> %inactive, <4 x i32> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vabsq_m_s32:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vabst.s32 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+  %2 = tail call <4 x i32> @llvm.arm.mve.abs.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i1> %1, <4 x i32> %inactive)
+  ret <4 x i32> %2
+}
+
+define arm_aapcs_vfpcc <16 x i8> @test_vqnegq_m_s8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vqnegq_m_s8:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vqnegt.s8 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
+  %2 = tail call <16 x i8> @llvm.arm.mve.qneg.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i1> %1, <16 x i8> %inactive)
+  ret <16 x i8> %2
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vqnegq_m_s16(<8 x i16> %inactive, <8 x i16> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vqnegq_m_s16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vqnegt.s16 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+  %2 = tail call <8 x i16> @llvm.arm.mve.qneg.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i1> %1, <8 x i16> %inactive)
+  ret <8 x i16> %2
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vqnegq_m_s32(<4 x i32> %inactive, <4 x i32> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vqnegq_m_s32:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vqnegt.s32 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+  %2 = tail call <4 x i32> @llvm.arm.mve.qneg.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i1> %1, <4 x i32> %inactive)
+  ret <4 x i32> %2
+}
+
+define arm_aapcs_vfpcc <16 x i8> @test_vqabsq_m_s8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vqabsq_m_s8:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vqabst.s8 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
+  %2 = tail call <16 x i8> @llvm.arm.mve.qabs.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i1> %1, <16 x i8> %inactive)
+  ret <16 x i8> %2
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vqabsq_m_s16(<8 x i16> %inactive, <8 x i16> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vqabsq_m_s16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vqabst.s16 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+  %2 = tail call <8 x i16> @llvm.arm.mve.qabs.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i1> %1, <8 x i16> %inactive)
+  ret <8 x i16> %2
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vqabsq_m_s32(<4 x i32> %inactive, <4 x i32> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vqabsq_m_s32:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vqabst.s32 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+  %2 = tail call <4 x i32> @llvm.arm.mve.qabs.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i1> %1, <4 x i32> %inactive)
+  ret <4 x i32> %2
+}
+
+declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32)
+declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32)
+declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32)
+
+declare <16 x i8> @llvm.arm.mve.mvn.predicated.v16i8.v16i1(<16 x i8>, <16 x i1>, <16 x i8>)
+declare <8 x i16> @llvm.arm.mve.mvn.predicated.v8i16.v8i1(<8 x i16>, <8 x i1>, <8 x i16>)
+declare <4 x i32> @llvm.arm.mve.mvn.predicated.v4i32.v4i1(<4 x i32>, <4 x i1>, <4 x i32>)
+declare <8 x half> @llvm.arm.mve.neg.predicated.v8f16.v8i1(<8 x half>, <8 x i1>, <8 x half>)
+declare <4 x float> @llvm.arm.mve.neg.predicated.v4f32.v4i1(<4 x float>, <4 x i1>, <4 x float>)
+declare <16 x i8> @llvm.arm.mve.neg.predicated.v16i8.v16i1(<16 x i8>, <16 x i1>, <16 x i8>)
+declare <8 x i16> @llvm.arm.mve.neg.predicated.v8i16.v8i1(<8 x i16>, <8 x i1>, <8 x i16>)
+declare <4 x i32> @llvm.arm.mve.neg.predicated.v4i32.v4i1(<4 x i32>, <4 x i1>, <4 x i32>)
+declare <8 x half> @llvm.arm.mve.abs.predicated.v8f16.v8i1(<8 x half>, <8 x i1>, <8 x half>)
+declare <4 x float> @llvm.arm.mve.abs.predicated.v4f32.v4i1(<4 x float>, <4 x i1>, <4 x float>)
+declare <16 x i8> @llvm.arm.mve.abs.predicated.v16i8.v16i1(<16 x i8>, <16 x i1>, <16 x i8>)
+declare <8 x i16> @llvm.arm.mve.abs.predicated.v8i16.v8i1(<8 x i16>, <8 x i1>, <8 x i16>)
+declare <4 x i32> @llvm.arm.mve.abs.predicated.v4i32.v4i1(<4 x i32>, <4 x i1>, <4 x i32>)
+declare <16 x i8> @llvm.arm.mve.qneg.predicated.v16i8.v16i1(<16 x i8>, <16 x i1>, <16 x i8>)
+declare <8 x i16> @llvm.arm.mve.qneg.predicated.v8i16.v8i1(<8 x i16>, <8 x i1>, <8 x i16>)
+declare <4 x i32> @llvm.arm.mve.qneg.predicated.v4i32.v4i1(<4 x i32>, <4 x i1>, <4 x i32>)
+declare <16 x i8> @llvm.arm.mve.qabs.predicated.v16i8.v16i1(<16 x i8>, <16 x i1>, <16 x i8>)
+declare <8 x i16> @llvm.arm.mve.qabs.predicated.v8i16.v8i1(<8 x i16>, <8 x i1>, <8 x i16>)
+declare <4 x i32> @llvm.arm.mve.qabs.predicated.v4i32.v4i1(<4 x i32>, <4 x i1>, <4 x i32>)
diff --git a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vclzcls-predicated.ll b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vclzcls-predicated.ll
new file mode 100644
index 000000000000..1f0f588d8538
--- /dev/null
+++ b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vclzcls-predicated.ll
@@ -0,0 +1,138 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s
+
+define arm_aapcs_vfpcc <16 x i8> @test_vclsq_m_s8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vclsq_m_s8:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vclst.s8 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
+  %2 = tail call <16 x i8> @llvm.arm.mve.cls.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i1> %1, <16 x i8> %inactive)
+  ret <16 x i8> %2
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vclsq_m_s16(<8 x i16> %inactive, <8 x i16> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vclsq_m_s16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vclst.s16 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+  %2 = tail call <8 x i16> @llvm.arm.mve.cls.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i1> %1, <8 x i16> %inactive)
+  ret <8 x i16> %2
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vclsq_m_s32(<4 x i32> %inactive, <4 x i32> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vclsq_m_s32:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vclst.s32 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+  %2 = tail call <4 x i32> @llvm.arm.mve.cls.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i1> %1, <4 x i32> %inactive)
+  ret <4 x i32> %2
+}
+
+define arm_aapcs_vfpcc <16 x i8> @test_vclzq_m_s8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vclzq_m_s8:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vclzt.i8 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
+  %2 = tail call <16 x i8> @llvm.arm.mve.clz.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i1> %1, <16 x i8> %inactive)
+  ret <16 x i8> %2
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vclzq_m_s16(<8 x i16> %inactive, <8 x i16> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vclzq_m_s16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vclzt.i16 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+  %2 = tail call <8 x i16> @llvm.arm.mve.clz.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i1> %1, <8 x i16> %inactive)
+  ret <8 x i16> %2
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vclzq_m_s32(<4 x i32> %inactive, <4 x i32> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vclzq_m_s32:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vclzt.i32 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+  %2 = tail call <4 x i32> @llvm.arm.mve.clz.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i1> %1, <4 x i32> %inactive)
+  ret <4 x i32> %2
+}
+
+define arm_aapcs_vfpcc <16 x i8> @test_vclzq_m_u8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vclzq_m_u8:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vclzt.i8 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
+  %2 = tail call <16 x i8> @llvm.arm.mve.clz.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i1> %1, <16 x i8> %inactive)
+  ret <16 x i8> %2
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vclzq_m_u16(<8 x i16> %inactive, <8 x i16> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vclzq_m_u16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vclzt.i16 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+  %2 = tail call <8 x i16> @llvm.arm.mve.clz.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i1> %1, <8 x i16> %inactive)
+  ret <8 x i16> %2
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vclzq_m_u32(<4 x i32> %inactive, <4 x i32> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vclzq_m_u32:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vclzt.i32 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+  %2 = tail call <4 x i32> @llvm.arm.mve.clz.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i1> %1, <4 x i32> %inactive)
+  ret <4 x i32> %2
+}
+
+declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32)
+declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32)
+declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32)
+declare <16 x i8> @llvm.arm.mve.cls.predicated.v16i8.v16i1(<16 x i8>, <16 x i1>, <16 x i8>)
+declare <8 x i16> @llvm.arm.mve.cls.predicated.v8i16.v8i1(<8 x i16>, <8 x i1>, <8 x i16>)
+declare <4 x i32> @llvm.arm.mve.cls.predicated.v4i32.v4i1(<4 x i32>, <4 x i1>, <4 x i32>)
+declare <16 x i8> @llvm.arm.mve.clz.predicated.v16i8.v16i1(<16 x i8>, <16 x i1>, <16 x i8>)
+declare <8 x i16> @llvm.arm.mve.clz.predicated.v8i16.v8i1(<8 x i16>, <8 x i1>, <8 x i16>)
+declare <4 x i32> @llvm.arm.mve.clz.predicated.v4i32.v4i1(<4 x i32>, <4 x i1>, <4 x i32>)
diff --git a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vcvt-fp-int.ll b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vcvt-fp-int.ll
new file mode 100644
index 000000000000..d3373b3d79c3
--- /dev/null
+++ b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vcvt-fp-int.ll
@@ -0,0 +1,122 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s
+
+define arm_aapcs_vfpcc <8 x half> @test_vcvtq_m_f16_s16(<8 x half> %inactive, <8 x i16> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vcvtq_m_f16_s16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vcvtt.f16.s16 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+  %2 = tail call <8 x half> @llvm.arm.mve.vcvt.fp.int.predicated.v8f16.v8i16.v8i1(<8 x i16> %a, i32 0, <8 x i1> %1, <8 x half> %inactive)
+  ret <8 x half> %2
+}
+
+define arm_aapcs_vfpcc <8 x half> @test_vcvtq_m_f16_u16(<8 x half> %inactive, <8 x i16> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vcvtq_m_f16_u16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vcvtt.f16.u16 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+  %2 = tail call <8 x half> @llvm.arm.mve.vcvt.fp.int.predicated.v8f16.v8i16.v8i1(<8 x i16> %a, i32 1, <8 x i1> %1, <8 x half> %inactive)
+  ret <8 x half> %2
+}
+
+define arm_aapcs_vfpcc <4 x float> @test_vcvtq_m_f32_s32(<4 x float> %inactive, <4 x i32> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vcvtq_m_f32_s32:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vcvtt.f32.s32 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+  %2 = tail call <4 x float> @llvm.arm.mve.vcvt.fp.int.predicated.v4f32.v4i32.v4i1(<4 x i32> %a, i32 0, <4 x i1> %1, <4 x float> %inactive)
+  ret <4 x float> %2
+}
+
+define arm_aapcs_vfpcc <4 x float> @test_vcvtq_m_f32_u32(<4 x float> %inactive, <4 x i32> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vcvtq_m_f32_u32:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vcvtt.f32.u32 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+  %2 = tail call <4 x float> @llvm.arm.mve.vcvt.fp.int.predicated.v4f32.v4i32.v4i1(<4 x i32> %a, i32 1, <4 x i1> %1, <4 x float> %inactive)
+  ret <4 x float> %2
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vcvtq_m_s16_f16(<8 x i16> %inactive, <8 x half> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vcvtq_m_s16_f16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vcvtt.s16.f16 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+  %2 = tail call <8 x i16> @llvm.arm.mve.vcvt.fp.int.predicated.v8i16.v8f16.v8i1(<8 x half> %a, i32 0, <8 x i1> %1, <8 x i16> %inactive)
+  ret <8 x i16> %2
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vcvtq_m_s32_f32(<4 x i32> %inactive, <4 x float> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vcvtq_m_s32_f32:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vcvtt.s32.f32 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+  %2 = tail call <4 x i32> @llvm.arm.mve.vcvt.fp.int.predicated.v4i32.v4f32.v4i1(<4 x float> %a, i32 0, <4 x i1> %1, <4 x i32> %inactive)
+  ret <4 x i32> %2
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vcvtq_m_u16_f16(<8 x i16> %inactive, <8 x half> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vcvtq_m_u16_f16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vcvtt.u16.f16 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+  %2 = tail call <8 x i16> @llvm.arm.mve.vcvt.fp.int.predicated.v8i16.v8f16.v8i1(<8 x half> %a, i32 1, <8 x i1> %1, <8 x i16> %inactive)
+  ret <8 x i16> %2
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vcvtq_m_u32_f32(<4 x i32> %inactive, <4 x float> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vcvtq_m_u32_f32:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vcvtt.u32.f32 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+  %2 = tail call <4 x i32> @llvm.arm.mve.vcvt.fp.int.predicated.v4i32.v4f32.v4i1(<4 x float> %a, i32 1, <4 x i1> %1, <4 x i32> %inactive)
+  ret <4 x i32> %2
+}
+
+declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32)
+declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32)
+
+declare <8 x half> @llvm.arm.mve.vcvt.fp.int.predicated.v8f16.v8i16.v8i1(<8 x i16>, i32, <8 x i1>, <8 x half>)
+declare <4 x float> @llvm.arm.mve.vcvt.fp.int.predicated.v4f32.v4i32.v4i1(<4 x i32>, i32, <4 x i1>, <4 x float>)
+declare <8 x i16> @llvm.arm.mve.vcvt.fp.int.predicated.v8i16.v8f16.v8i1(<8 x half>, i32, <8 x i1>, <8 x i16>)
+declare <4 x i32> @llvm.arm.mve.vcvt.fp.int.predicated.v4i32.v4f32.v4i1(<4 x float>, i32, <4 x i1>, <4 x i32>)
diff --git a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vmovl.ll b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vmovl.ll
index c9ce38f9d612..fd33dddc685e 100644
--- a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vmovl.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vmovl.ll
@@ -145,3 +145,200 @@ entry:
   %1 = zext <4 x i16> %0 to <4 x i32>
   ret <4 x i32> %1
 }
+
+define arm_aapcs_vfpcc <8 x i16> @test_vmovlbq_m_s8(<8 x i16> %inactive, <16 x i8> %a, i16 zeroext %p) {
+; LE-LABEL: test_vmovlbq_m_s8:
+; LE:       @ %bb.0: @ %entry
+; LE-NEXT:    vmsr p0, r0
+; LE-NEXT:    vpst
+; LE-NEXT:    vmovlbt.s8 q0, q1
+; LE-NEXT:    bx lr
+;
+; BE-LABEL: test_vmovlbq_m_s8:
+; BE:       @ %bb.0: @ %entry
+; BE-NEXT:    vrev64.16 q2, q0
+; BE-NEXT:    vrev64.8 q0, q1
+; BE-NEXT:    vmsr p0, r0
+; BE-NEXT:    vpst
+; BE-NEXT:    vmovlbt.s8 q2, q0
+; BE-NEXT:    vrev64.16 q0, q2
+; BE-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+  %2 = tail call <8 x i16> @llvm.arm.mve.vmovl.predicated.v8i16.v16i8.v8i1(<16 x i8> %a, i32 0, i32 0, <8 x i1> %1, <8 x i16> %inactive)
+  ret <8 x i16> %2
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vmovlbq_m_s16(<4 x i32> %inactive, <8 x i16> %a, i16 zeroext %p) {
+; LE-LABEL: test_vmovlbq_m_s16:
+; LE:       @ %bb.0: @ %entry
+; LE-NEXT:    vmsr p0, r0
+; LE-NEXT:    vpst
+; LE-NEXT:    vmovlbt.s16 q0, q1
+; LE-NEXT:    bx lr
+;
+; BE-LABEL: test_vmovlbq_m_s16:
+; BE:       @ %bb.0: @ %entry
+; BE-NEXT:    vrev64.32 q2, q0
+; BE-NEXT:    vrev64.16 q0, q1
+; BE-NEXT:    vmsr p0, r0
+; BE-NEXT:    vpst
+; BE-NEXT:    vmovlbt.s16 q2, q0
+; BE-NEXT:    vrev64.32 q0, q2
+; BE-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+  %2 = tail call <4 x i32> @llvm.arm.mve.vmovl.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, i32 0, i32 0, <4 x i1> %1, <4 x i32> %inactive)
+  ret <4 x i32> %2
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vmovlbq_m_u8(<8 x i16> %inactive, <16 x i8> %a, i16 zeroext %p) {
+; LE-LABEL: test_vmovlbq_m_u8:
+; LE:       @ %bb.0: @ %entry
+; LE-NEXT:    vmsr p0, r0
+; LE-NEXT:    vpst
+; LE-NEXT:    vmovlbt.u8 q0, q1
+; LE-NEXT:    bx lr
+;
+; BE-LABEL: test_vmovlbq_m_u8:
+; BE:       @ %bb.0: @ %entry
+; BE-NEXT:    vrev64.16 q2, q0
+; BE-NEXT:    vrev64.8 q0, q1
+; BE-NEXT:    vmsr p0, r0
+; BE-NEXT:    vpst
+; BE-NEXT:    vmovlbt.u8 q2, q0
+; BE-NEXT:    vrev64.16 q0, q2
+; BE-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+  %2 = tail call <8 x i16> @llvm.arm.mve.vmovl.predicated.v8i16.v16i8.v8i1(<16 x i8> %a, i32 1, i32 0, <8 x i1> %1, <8 x i16> %inactive)
+  ret <8 x i16> %2
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vmovlbq_m_u16(<4 x i32> %inactive, <8 x i16> %a, i16 zeroext %p) {
+; LE-LABEL: test_vmovlbq_m_u16:
+; LE:       @ %bb.0: @ %entry
+; LE-NEXT:    vmsr p0, r0
+; LE-NEXT:    vpst
+; LE-NEXT:    vmovlbt.u16 q0, q1
+; LE-NEXT:    bx lr
+;
+; BE-LABEL: test_vmovlbq_m_u16:
+; BE:       @ %bb.0: @ %entry
+; BE-NEXT:    vrev64.32 q2, q0
+; BE-NEXT:    vrev64.16 q0, q1
+; BE-NEXT:    vmsr p0, r0
+; BE-NEXT:    vpst
+; BE-NEXT:    vmovlbt.u16 q2, q0
+; BE-NEXT:    vrev64.32 q0, q2
+; BE-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+  %2 = tail call <4 x i32> @llvm.arm.mve.vmovl.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, i32 1, i32 0, <4 x i1> %1, <4 x i32> %inactive)
+  ret <4 x i32> %2
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vmovltq_m_s8(<8 x i16> %inactive, <16 x i8> %a, i16 zeroext %p) {
+; LE-LABEL: test_vmovltq_m_s8:
+; LE:       @ %bb.0: @ %entry
+; LE-NEXT:    vmsr p0, r0
+; LE-NEXT:    vpst
+; LE-NEXT:    vmovltt.s8 q0, q1
+; LE-NEXT:    bx lr
+;
+; BE-LABEL: test_vmovltq_m_s8:
+; BE:       @ %bb.0: @ %entry
+; BE-NEXT:    vrev64.16 q2, q0
+; BE-NEXT:    vrev64.8 q0, q1
+; BE-NEXT:    vmsr p0, r0
+; BE-NEXT:    vpst
+; BE-NEXT:    vmovltt.s8 q2, q0
+; BE-NEXT:    vrev64.16 q0, q2
+; BE-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+  %2 = tail call <8 x i16> @llvm.arm.mve.vmovl.predicated.v8i16.v16i8.v8i1(<16 x i8> %a, i32 0, i32 1, <8 x i1> %1, <8 x i16> %inactive)
+  ret <8 x i16> %2
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vmovltq_m_s16(<4 x i32> %inactive, <8 x i16> %a, i16 zeroext %p) {
+; LE-LABEL: test_vmovltq_m_s16:
+; LE:       @ %bb.0: @ %entry
+; LE-NEXT:    vmsr p0, r0
+; LE-NEXT:    vpst
+; LE-NEXT:    vmovltt.s16 q0, q1
+; LE-NEXT:    bx lr
+;
+; BE-LABEL: test_vmovltq_m_s16:
+; BE:       @ %bb.0: @ %entry
+; BE-NEXT:    vrev64.32 q2, q0
+; BE-NEXT:    vrev64.16 q0, q1
+; BE-NEXT:    vmsr p0, r0
+; BE-NEXT:    vpst
+; BE-NEXT:    vmovltt.s16 q2, q0
+; BE-NEXT:    vrev64.32 q0, q2
+; BE-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+  %2 = tail call <4 x i32> @llvm.arm.mve.vmovl.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, i32 0, i32 1, <4 x i1> %1, <4 x i32> %inactive)
+  ret <4 x i32> %2
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vmovltq_m_u8(<8 x i16> %inactive, <16 x i8> %a, i16 zeroext %p) {
+; LE-LABEL: test_vmovltq_m_u8:
+; LE:       @ %bb.0: @ %entry
+; LE-NEXT:    vmsr p0, r0
+; LE-NEXT:    vpst
+; LE-NEXT:    vmovltt.u8 q0, q1
+; LE-NEXT:    bx lr
+;
+; BE-LABEL: test_vmovltq_m_u8:
+; BE:       @ %bb.0: @ %entry
+; BE-NEXT:    vrev64.16 q2, q0
+; BE-NEXT:    vrev64.8 q0, q1
+; BE-NEXT:    vmsr p0, r0
+; BE-NEXT:    vpst
+; BE-NEXT:    vmovltt.u8 q2, q0
+; BE-NEXT:    vrev64.16 q0, q2
+; BE-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+  %2 = tail call <8 x i16> @llvm.arm.mve.vmovl.predicated.v8i16.v16i8.v8i1(<16 x i8> %a, i32 1, i32 1, <8 x i1> %1, <8 x i16> %inactive)
+  ret <8 x i16> %2
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vmovltq_m_u16(<4 x i32> %inactive, <8 x i16> %a, i16 zeroext %p) {
+; LE-LABEL: test_vmovltq_m_u16:
+; LE:       @ %bb.0: @ %entry
+; LE-NEXT:    vmsr p0, r0
+; LE-NEXT:    vpst
+; LE-NEXT:    vmovltt.u16 q0, q1
+; LE-NEXT:    bx lr
+;
+; BE-LABEL: test_vmovltq_m_u16:
+; BE:       @ %bb.0: @ %entry
+; BE-NEXT:    vrev64.32 q2, q0
+; BE-NEXT:    vrev64.16 q0, q1
+; BE-NEXT:    vmsr p0, r0
+; BE-NEXT:    vpst
+; BE-NEXT:    vmovltt.u16 q2, q0
+; BE-NEXT:    vrev64.32 q0, q2
+; BE-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+  %2 = tail call <4 x i32> @llvm.arm.mve.vmovl.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, i32 1, i32 1, <4 x i1> %1, <4 x i32> %inactive)
+  ret <4 x i32> %2
+}
+
+declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32)
+declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32)
+declare <8 x i16> @llvm.arm.mve.vmovl.predicated.v8i16.v16i8.v8i1(<16 x i8>, i32, i32, <8 x i1>, <8 x i16>)
+declare <4 x i32> @llvm.arm.mve.vmovl.predicated.v4i32.v8i16.v4i1(<8 x i16>, i32, i32, <4 x i1>, <4 x i32>)
diff --git a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vmovn.ll b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vmovn.ll
index f16e83a45b6f..391b163f718a 100644
--- a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vmovn.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vmovn.ll
@@ -166,5 +166,201 @@ entry:
   ret <8 x i16> %2
 }
 
+define arm_aapcs_vfpcc <16 x i8> @test_vmovnbq_m_s16(<16 x i8> %a, <8 x i16> %b, i16 zeroext %p) {
+; LE-LABEL: test_vmovnbq_m_s16:
+; LE:       @ %bb.0: @ %entry
+; LE-NEXT:    vmsr p0, r0
+; LE-NEXT:    vpst
+; LE-NEXT:    vmovnbt.i16 q0, q1
+; LE-NEXT:    bx lr
+;
+; BE-LABEL: test_vmovnbq_m_s16:
+; BE:       @ %bb.0: @ %entry
+; BE-NEXT:    vrev64.16 q2, q1
+; BE-NEXT:    vrev64.8 q1, q0
+; BE-NEXT:    vmsr p0, r0
+; BE-NEXT:    vpst
+; BE-NEXT:    vmovnbt.i16 q1, q2
+; BE-NEXT:    vrev64.8 q0, q1
+; BE-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+  %2 = tail call <16 x i8> @llvm.arm.mve.vmovn.predicated.v16i8.v8i16.v8i1(<16 x i8> %a, <8 x i16> %b, i32 0, <8 x i1> %1)
+  ret <16 x i8> %2
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vmovnbq_m_s32(<8 x i16> %a, <4 x i32> %b, i16 zeroext %p) {
+; LE-LABEL: test_vmovnbq_m_s32:
+; LE:       @ %bb.0: @ %entry
+; LE-NEXT:    vmsr p0, r0
+; LE-NEXT:    vpst
+; LE-NEXT:    vmovnbt.i32 q0, q1
+; LE-NEXT:    bx lr
+;
+; BE-LABEL: test_vmovnbq_m_s32:
+; BE:       @ %bb.0: @ %entry
+; BE-NEXT:    vrev64.32 q2, q1
+; BE-NEXT:    vrev64.16 q1, q0
+; BE-NEXT:    vmsr p0, r0
+; BE-NEXT:    vpst
+; BE-NEXT:    vmovnbt.i32 q1, q2
+; BE-NEXT:    vrev64.16 q0, q1
+; BE-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+  %2 = tail call <8 x i16> @llvm.arm.mve.vmovn.predicated.v8i16.v4i32.v4i1(<8 x i16> %a, <4 x i32> %b, i32 0, <4 x i1> %1)
+  ret <8 x i16> %2
+}
+
+define arm_aapcs_vfpcc <16 x i8> @test_vmovnbq_m_u16(<16 x i8> %a, <8 x i16> %b, i16 zeroext %p) {
+; LE-LABEL: test_vmovnbq_m_u16:
+; LE:       @ %bb.0: @ %entry
+; LE-NEXT:    vmsr p0, r0
+; LE-NEXT:    vpst
+; LE-NEXT:    vmovnbt.i16 q0, q1
+; LE-NEXT:    bx lr
+;
+; BE-LABEL: test_vmovnbq_m_u16:
+; BE:       @ %bb.0: @ %entry
+; BE-NEXT:    vrev64.16 q2, q1
+; BE-NEXT:    vrev64.8 q1, q0
+; BE-NEXT:    vmsr p0, r0
+; BE-NEXT:    vpst
+; BE-NEXT:    vmovnbt.i16 q1, q2
+; BE-NEXT:    vrev64.8 q0, q1
+; BE-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+  %2 = tail call <16 x i8> @llvm.arm.mve.vmovn.predicated.v16i8.v8i16.v8i1(<16 x i8> %a, <8 x i16> %b, i32 0, <8 x i1> %1)
+  ret <16 x i8> %2
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vmovnbq_m_u32(<8 x i16> %a, <4 x i32> %b, i16 zeroext %p) {
+; LE-LABEL: test_vmovnbq_m_u32:
+; LE:       @ %bb.0: @ %entry
+; LE-NEXT:    vmsr p0, r0
+; LE-NEXT:    vpst
+; LE-NEXT:    vmovnbt.i32 q0, q1
+; LE-NEXT:    bx lr
+;
+; BE-LABEL: test_vmovnbq_m_u32:
+; BE:       @ %bb.0: @ %entry
+; BE-NEXT:    vrev64.32 q2, q1
+; BE-NEXT:    vrev64.16 q1, q0
+; BE-NEXT:    vmsr p0, r0
+; BE-NEXT:    vpst
+; BE-NEXT:    vmovnbt.i32 q1, q2
+; BE-NEXT:    vrev64.16 q0, q1
+; BE-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+  %2 = tail call <8 x i16> @llvm.arm.mve.vmovn.predicated.v8i16.v4i32.v4i1(<8 x i16> %a, <4 x i32> %b, i32 0, <4 x i1> %1)
+  ret <8 x i16> %2
+}
+
+define arm_aapcs_vfpcc <16 x i8> @test_vmovntq_m_s16(<16 x i8> %a, <8 x i16> %b, i16 zeroext %p) {
+; LE-LABEL: test_vmovntq_m_s16:
+; LE:       @ %bb.0: @ %entry
+; LE-NEXT:    vmsr p0, r0
+; LE-NEXT:    vpst
+; LE-NEXT:    vmovntt.i16 q0, q1
+; LE-NEXT:    bx lr
+;
+; BE-LABEL: test_vmovntq_m_s16:
+; BE:       @ %bb.0: @ %entry
+; BE-NEXT:    vrev64.16 q2, q1
+; BE-NEXT:    vrev64.8 q1, q0
+; BE-NEXT:    vmsr p0, r0
+; BE-NEXT:    vpst
+; BE-NEXT:    vmovntt.i16 q1, q2
+; BE-NEXT:    vrev64.8 q0, q1
+; BE-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+  %2 = tail call <16 x i8> @llvm.arm.mve.vmovn.predicated.v16i8.v8i16.v8i1(<16 x i8> %a, <8 x i16> %b, i32 1, <8 x i1> %1)
+  ret <16 x i8> %2
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vmovntq_m_s32(<8 x i16> %a, <4 x i32> %b, i16 zeroext %p) {
+; LE-LABEL: test_vmovntq_m_s32:
+; LE:       @ %bb.0: @ %entry
+; LE-NEXT:    vmsr p0, r0
+; LE-NEXT:    vpst
+; LE-NEXT:    vmovntt.i32 q0, q1
+; LE-NEXT:    bx lr
+;
+; BE-LABEL: test_vmovntq_m_s32:
+; BE:       @ %bb.0: @ %entry
+; BE-NEXT:    vrev64.32 q2, q1
+; BE-NEXT:    vrev64.16 q1, q0
+; BE-NEXT:    vmsr p0, r0
+; BE-NEXT:    vpst
+; BE-NEXT:    vmovntt.i32 q1, q2
+; BE-NEXT:    vrev64.16 q0, q1
+; BE-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+  %2 = tail call <8 x i16> @llvm.arm.mve.vmovn.predicated.v8i16.v4i32.v4i1(<8 x i16> %a, <4 x i32> %b, i32 1, <4 x i1> %1)
+  ret <8 x i16> %2
+}
+
+define arm_aapcs_vfpcc <16 x i8> @test_vmovntq_m_u16(<16 x i8> %a, <8 x i16> %b, i16 zeroext %p) {
+; LE-LABEL: test_vmovntq_m_u16:
+; LE:       @ %bb.0: @ %entry
+; LE-NEXT:    vmsr p0, r0
+; LE-NEXT:    vpst
+; LE-NEXT:    vmovntt.i16 q0, q1
+; LE-NEXT:    bx lr
+;
+; BE-LABEL: test_vmovntq_m_u16:
+; BE:       @ %bb.0: @ %entry
+; BE-NEXT:    vrev64.16 q2, q1
+; BE-NEXT:    vrev64.8 q1, q0
+; BE-NEXT:    vmsr p0, r0
+; BE-NEXT:    vpst
+; BE-NEXT:    vmovntt.i16 q1, q2
+; BE-NEXT:    vrev64.8 q0, q1
+; BE-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+  %2 = tail call <16 x i8> @llvm.arm.mve.vmovn.predicated.v16i8.v8i16.v8i1(<16 x i8> %a, <8 x i16> %b, i32 1, <8 x i1> %1)
+  ret <16 x i8> %2
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vmovntq_m_u32(<8 x i16> %a, <4 x i32> %b, i16 zeroext %p) {
+; LE-LABEL: test_vmovntq_m_u32:
+; LE:       @ %bb.0: @ %entry
+; LE-NEXT:    vmsr p0, r0
+; LE-NEXT:    vpst
+; LE-NEXT:    vmovntt.i32 q0, q1
+; LE-NEXT:    bx lr
+;
+; BE-LABEL: test_vmovntq_m_u32:
+; BE:       @ %bb.0: @ %entry
+; BE-NEXT:    vrev64.32 q2, q1
+; BE-NEXT:    vrev64.16 q1, q0
+; BE-NEXT:    vmsr p0, r0
+; BE-NEXT:    vpst
+; BE-NEXT:    vmovntt.i32 q1, q2
+; BE-NEXT:    vrev64.16 q0, q1
+; BE-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+  %2 = tail call <8 x i16> @llvm.arm.mve.vmovn.predicated.v8i16.v4i32.v4i1(<8 x i16> %a, <4 x i32> %b, i32 1, <4 x i1> %1)
+  ret <8 x i16> %2
+}
+
 declare <8 x i16> @llvm.arm.mve.vreinterpretq.v8i16.v16i8(<16 x i8>)
 declare <4 x i32> @llvm.arm.mve.vreinterpretq.v4i32.v8i16(<8 x i16>)
+declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32)
+declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32)
+declare <16 x i8> @llvm.arm.mve.vmovn.predicated.v16i8.v8i16.v8i1(<16 x i8>, <8 x i16>, i32, <8 x i1>)
+declare <8 x i16> @llvm.arm.mve.vmovn.predicated.v8i16.v4i32.v4i1(<8 x i16>, <4 x i32>, i32, <4 x i1>)
diff --git a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vrev.ll b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vrev.ll
new file mode 100644
index 000000000000..841291e4a70e
--- /dev/null
+++ b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vrev.ll
@@ -0,0 +1,138 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s
+
+define arm_aapcs_vfpcc <16 x i8> @test_vrev16q_m_i8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vrev16q_m_i8:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vrev16t.8 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
+  %2 = tail call <16 x i8> @llvm.arm.mve.vrev.predicated.v16i8.v16i1(<16 x i8> %a, i32 16, <16 x i1> %1, <16 x i8> %inactive)
+  ret <16 x i8> %2
+}
+
+define arm_aapcs_vfpcc <16 x i8> @test_vrev32q_m_i8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vrev32q_m_i8:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vrev32t.8 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
+  %2 = tail call <16 x i8> @llvm.arm.mve.vrev.predicated.v16i8.v16i1(<16 x i8> %a, i32 32, <16 x i1> %1, <16 x i8> %inactive)
+  ret <16 x i8> %2
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vrev32q_m_i16(<8 x i16> %inactive, <8 x i16> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vrev32q_m_i16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vrev32t.16 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+  %2 = tail call <8 x i16> @llvm.arm.mve.vrev.predicated.v8i16.v8i1(<8 x i16> %a, i32 32, <8 x i1> %1, <8 x i16> %inactive)
+  ret <8 x i16> %2
+}
+
+define arm_aapcs_vfpcc <8 x half> @test_vrev32q_m_f16(<8 x half> %inactive, <8 x half> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vrev32q_m_f16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vrev32t.16 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+  %2 = tail call <8 x half> @llvm.arm.mve.vrev.predicated.v8f16.v8i1(<8 x half> %a, i32 32, <8 x i1> %1, <8 x half> %inactive)
+  ret <8 x half> %2
+}
+
+define arm_aapcs_vfpcc <16 x i8> @test_vrev64q_m_i8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vrev64q_m_i8:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vrev64t.8 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
+  %2 = tail call <16 x i8> @llvm.arm.mve.vrev.predicated.v16i8.v16i1(<16 x i8> %a, i32 64, <16 x i1> %1, <16 x i8> %inactive)
+  ret <16 x i8> %2
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vrev64q_m_i16(<8 x i16> %inactive, <8 x i16> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vrev64q_m_i16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vrev64t.16 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+  %2 = tail call <8 x i16> @llvm.arm.mve.vrev.predicated.v8i16.v8i1(<8 x i16> %a, i32 64, <8 x i1> %1, <8 x i16> %inactive)
+  ret <8 x i16> %2
+}
+
+define arm_aapcs_vfpcc <8 x half> @test_vrev64q_m_f16(<8 x half> %inactive, <8 x half> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vrev64q_m_f16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vrev64t.16 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+  %2 = tail call <8 x half> @llvm.arm.mve.vrev.predicated.v8f16.v8i1(<8 x half> %a, i32 64, <8 x i1> %1, <8 x half> %inactive)
+  ret <8 x half> %2
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vrev64q_m_i32(<4 x i32> %inactive, <4 x i32> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vrev64q_m_i32:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vrev64t.32 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+  %2 = tail call <4 x i32> @llvm.arm.mve.vrev.predicated.v4i32.v4i1(<4 x i32> %a, i32 64, <4 x i1> %1, <4 x i32> %inactive)
+  ret <4 x i32> %2
+}
+
+define arm_aapcs_vfpcc <4 x float> @test_vrev64q_m_f32(<4 x float> %inactive, <4 x float> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vrev64q_m_f32:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vrev64t.32 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+  %2 = tail call <4 x float> @llvm.arm.mve.vrev.predicated.v4f32.v4i1(<4 x float> %a, i32 64, <4 x i1> %1, <4 x float> %inactive)
+  ret <4 x float> %2
+}
+
+declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32)
+declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32)
+declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32)
+
+declare <16 x i8> @llvm.arm.mve.vrev.predicated.v16i8.v16i1(<16 x i8>, i32, <16 x i1>, <16 x i8>)
+declare <8 x i16> @llvm.arm.mve.vrev.predicated.v8i16.v8i1(<8 x i16>, i32, <8 x i1>, <8 x i16>)
+declare <8 x half> @llvm.arm.mve.vrev.predicated.v8f16.v8i1(<8 x half>, i32, <8 x i1>, <8 x half>)
+declare <4 x i32> @llvm.arm.mve.vrev.predicated.v4i32.v4i1(<4 x i32>, i32, <4 x i1>, <4 x i32>)
+declare <4 x float> @llvm.arm.mve.vrev.predicated.v4f32.v4i1(<4 x float>, i32, <4 x i1>, <4 x float>)
diff --git a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vrint-predicated.ll b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vrint-predicated.ll
new file mode 100644
index 000000000000..c24cc87447e8
--- /dev/null
+++ b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vrint-predicated.ll
@@ -0,0 +1,185 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s
+
+define arm_aapcs_vfpcc <8 x half> @test_vrndaq_m_f16(<8 x half> %inactive, <8 x half> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vrndaq_m_f16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vrintat.f16 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+  %2 = tail call <8 x half> @llvm.arm.mve.vrinta.predicated.v8f16.v8i1(<8 x half> %a, <8 x i1> %1, <8 x half> %inactive)
+  ret <8 x half> %2
+}
+
+define arm_aapcs_vfpcc <4 x float> @test_vrndaq_m_f32(<4 x float> %inactive, <4 x float> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vrndaq_m_f32:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vrintat.f32 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+  %2 = tail call <4 x float> @llvm.arm.mve.vrinta.predicated.v4f32.v4i1(<4 x float> %a, <4 x i1> %1, <4 x float> %inactive)
+  ret <4 x float> %2
+}
+
+define arm_aapcs_vfpcc <8 x half> @test_vrndmq_m_f16(<8 x half> %inactive, <8 x half> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vrndmq_m_f16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vrintmt.f16 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+  %2 = tail call <8 x half> @llvm.arm.mve.vrintm.predicated.v8f16.v8i1(<8 x half> %a, <8 x i1> %1, <8 x half> %inactive)
+  ret <8 x half> %2
+}
+
+define arm_aapcs_vfpcc <4 x float> @test_vrndmq_m_f32(<4 x float> %inactive, <4 x float> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vrndmq_m_f32:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vrintmt.f32 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+  %2 = tail call <4 x float> @llvm.arm.mve.vrintm.predicated.v4f32.v4i1(<4 x float> %a, <4 x i1> %1, <4 x float> %inactive)
+  ret <4 x float> %2
+}
+
+define arm_aapcs_vfpcc <8 x half> @test_vrndnq_m_f16(<8 x half> %inactive, <8 x half> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vrndnq_m_f16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vrintnt.f16 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+  %2 = tail call <8 x half> @llvm.arm.mve.vrintn.predicated.v8f16.v8i1(<8 x half> %a, <8 x i1> %1, <8 x half> %inactive)
+  ret <8 x half> %2
+}
+
+define arm_aapcs_vfpcc <4 x float> @test_vrndnq_m_f32(<4 x float> %inactive, <4 x float> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vrndnq_m_f32:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vrintnt.f32 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+  %2 = tail call <4 x float> @llvm.arm.mve.vrintn.predicated.v4f32.v4i1(<4 x float> %a, <4 x i1> %1, <4 x float> %inactive)
+  ret <4 x float> %2
+}
+
+define arm_aapcs_vfpcc <8 x half> @test_vrndpq_m_f16(<8 x half> %inactive, <8 x half> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vrndpq_m_f16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vrintpt.f16 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+  %2 = tail call <8 x half> @llvm.arm.mve.vrintp.predicated.v8f16.v8i1(<8 x half> %a, <8 x i1> %1, <8 x half> %inactive)
+  ret <8 x half> %2
+}
+
+define arm_aapcs_vfpcc <4 x float> @test_vrndpq_m_f32(<4 x float> %inactive, <4 x float> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vrndpq_m_f32:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vrintpt.f32 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+  %2 = tail call <4 x float> @llvm.arm.mve.vrintp.predicated.v4f32.v4i1(<4 x float> %a, <4 x i1> %1, <4 x float> %inactive)
+  ret <4 x float> %2
+}
+
+define arm_aapcs_vfpcc <8 x half> @test_vrndq_m_f16(<8 x half> %inactive, <8 x half> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vrndq_m_f16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vrintzt.f16 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+  %2 = tail call <8 x half> @llvm.arm.mve.vrintz.predicated.v8f16.v8i1(<8 x half> %a, <8 x i1> %1, <8 x half> %inactive)
+  ret <8 x half> %2
+}
+
+define arm_aapcs_vfpcc <4 x float> @test_vrndq_m_f32(<4 x float> %inactive, <4 x float> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vrndq_m_f32:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vrintzt.f32 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+  %2 = tail call <4 x float> @llvm.arm.mve.vrintz.predicated.v4f32.v4i1(<4 x float> %a, <4 x i1> %1, <4 x float> %inactive)
+  ret <4 x float> %2
+}
+
+define arm_aapcs_vfpcc <8 x half> @test_vrndxq_m_f16(<8 x half> %inactive, <8 x half> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vrndxq_m_f16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vrintxt.f16 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+  %2 = tail call <8 x half> @llvm.arm.mve.vrintx.predicated.v8f16.v8i1(<8 x half> %a, <8 x i1> %1, <8 x half> %inactive)
+  ret <8 x half> %2
+}
+
+define arm_aapcs_vfpcc <4 x float> @test_vrndxq_m_f32(<4 x float> %inactive, <4 x float> %a, i16 zeroext %p) {
+; CHECK-LABEL: test_vrndxq_m_f32:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vrintxt.f32 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext i16 %p to i32
+  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+  %2 = tail call <4 x float> @llvm.arm.mve.vrintx.predicated.v4f32.v4i1(<4 x float> %a, <4 x i1> %1, <4 x float> %inactive)
+  ret <4 x float> %2
+}
+
+declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32)
+declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32)
+declare <8 x half> @llvm.arm.mve.vrinta.predicated.v8f16.v8i1(<8 x half>, <8 x i1>, <8 x half>)
+declare <4 x float> @llvm.arm.mve.vrinta.predicated.v4f32.v4i1(<4 x float>, <4 x i1>, <4 x float>)
+declare <8 x half> @llvm.arm.mve.vrintm.predicated.v8f16.v8i1(<8 x half>, <8 x i1>, <8 x half>)
+declare <4 x float> @llvm.arm.mve.vrintm.predicated.v4f32.v4i1(<4 x float>, <4 x i1>, <4 x float>)
+declare <8 x half> @llvm.arm.mve.vrintn.predicated.v8f16.v8i1(<8 x half>, <8 x i1>, <8 x half>)
+declare <4 x float> @llvm.arm.mve.vrintn.predicated.v4f32.v4i1(<4 x float>, <4 x i1>, <4 x float>)
+declare <8 x half> @llvm.arm.mve.vrintp.predicated.v8f16.v8i1(<8 x half>, <8 x i1>, <8 x half>)
+declare <4 x float> @llvm.arm.mve.vrintp.predicated.v4f32.v4i1(<4 x float>, <4 x i1>, <4 x float>)
+declare <8 x half> @llvm.arm.mve.vrintz.predicated.v8f16.v8i1(<8 x half>, <8 x i1>, <8 x half>)
+declare <4 x float> @llvm.arm.mve.vrintz.predicated.v4f32.v4i1(<4 x float>, <4 x i1>, <4 x float>)
+declare <8 x half> @llvm.arm.mve.vrintx.predicated.v8f16.v8i1(<8 x half>, <8 x i1>, <8 x half>)
+declare <4 x float> @llvm.arm.mve.vrintx.predicated.v4f32.v4i1(<4 x float>, <4 x i1>, <4 x float>)
-- 
2.34.1