From 9eb3cc10b2c6f78ccf033cb264113fc904651cd0 Mon Sep 17 00:00:00 2001 From: Simon Tatham Date: Wed, 26 Feb 2020 14:10:43 +0000 Subject: [PATCH] [ARM,MVE] Add predicated intrinsics for many unary functions. Summary: This commit adds the predicated MVE intrinsics for the same set of unary operations that I added in their unpredicated forms in * D74333 (vrint) * D74334 (vrev) * D74335 (vclz, vcls) * D74336 (vmovl) * D74337 (vmovn) but since the predicated versions are a lot more similar to each other, I've kept them all together in a single big patch. Everything here is done in the standard way we've been doing other predicated operations: an IR intrinsic called `@llvm.arm.mve.foo.predicated` and some isel rules that match that alongside whatever they accept for the unpredicated version of the same instruction. In order to write the isel rules conveniently, I've refactored the existing isel rules for the affected instructions into multiclasses parametrised by a vector-type class, in the usual way. All those refactorings are intended to leave the existing isel rules unchanged: the only difference should be that new ones for the predicated intrinsics are introduced. The only tiny infrastructure change I needed in this commit was to change the implementation of `IntrinsicMX` in `arm_mve_defs.td` so that the records it defines are anonymous rather than named (and use `NameOverride` to set the output intrinsic name), which allows me to call it twice in two multiclasses with the same `NAME` without a tablegen-time error. Reviewers: dmgreen, MarkMurrayARM, miyuki, ostannard Reviewed By: MarkMurrayARM Subscribers: kristof.beyls, hiraditya, cfe-commits, llvm-commits Tags: #clang, #llvm Differential Revision: https://reviews.llvm.org/D75165 --- clang/include/clang/Basic/arm_mve.td | 139 ++-- clang/include/clang/Basic/arm_mve_defs.td | 8 +- .../test/CodeGen/arm-mve-intrinsics/absneg.c | 615 ++++++++++++++++++ clang/test/CodeGen/arm-mve-intrinsics/vclz.c | 287 ++++++++ clang/test/CodeGen/arm-mve-intrinsics/vcvt.c | 240 +++++++ clang/test/CodeGen/arm-mve-intrinsics/vmovl.c | 256 ++++++++ clang/test/CodeGen/arm-mve-intrinsics/vmovn.c | 184 ++++++ clang/test/CodeGen/arm-mve-intrinsics/vrev.c | 480 ++++++++++++++ clang/test/CodeGen/arm-mve-intrinsics/vrnd.c | 385 +++++++++++ llvm/include/llvm/IR/IntrinsicsARM.td | 33 + llvm/lib/Target/ARM/ARMInstrMVE.td | 419 ++++++------ .../mve-intrinsics/absneg-predicated.ll | 335 ++++++++++ .../mve-intrinsics/vclzcls-predicated.ll | 138 ++++ .../Thumb2/mve-intrinsics/vcvt-fp-int.ll | 122 ++++ .../CodeGen/Thumb2/mve-intrinsics/vmovl.ll | 197 ++++++ .../CodeGen/Thumb2/mve-intrinsics/vmovn.ll | 196 ++++++ .../CodeGen/Thumb2/mve-intrinsics/vrev.ll | 138 ++++ .../Thumb2/mve-intrinsics/vrint-predicated.ll | 185 ++++++ 18 files changed, 4107 insertions(+), 250 deletions(-) create mode 100644 llvm/test/CodeGen/Thumb2/mve-intrinsics/absneg-predicated.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-intrinsics/vclzcls-predicated.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-intrinsics/vcvt-fp-int.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-intrinsics/vrev.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-intrinsics/vrint-predicated.ll diff --git a/clang/include/clang/Basic/arm_mve.td b/clang/include/clang/Basic/arm_mve.td index 7150852d7004..efc6be1158b8 100644 --- a/clang/include/clang/Basic/arm_mve.td +++ b/clang/include/clang/Basic/arm_mve.td @@ -237,11 +237,18 @@ let params = T.Unsigned in { let params = T.Int in { def vmvnq: Intrinsic; + defm vmvnq: IntrinsicMX $a, $pred, $inactive)>; def vclzq: Intrinsic $a, (i1 0))>; + defm vclzq: IntrinsicMX $a, $pred, $inactive)>; } let params = T.Signed in { def vclsq: Intrinsic $a)>; + defm vclsq: IntrinsicMX $a, $pred, $inactive)>; + def vnegq: Intrinsic; def vabsq: Intrinsic; + + foreach name = ["qneg", "qabs"] in { + defm v#name#q: IntrinsicMX $a, $pred, $inactive), + 0 /* no _x variant for saturating intrinsics */>; + } +} +let params = !listconcat(T.Signed, T.Float) in { + foreach name = ["neg", "abs"] in { + defm v#name#q: IntrinsicMX $a, $pred, $inactive)>; + } } let params = T.Float in { def vnegq_f: Intrinsic, @@ -440,49 +459,77 @@ multiclass float_int_conversions; defvar IVector = VecOf; - let params = [IScalar], pnt = PNT_2Type in - def : Intrinsic, - NameOverride<"vcvtq_" # FScalar>; - let params = [FScalar], pnt = PNT_None in - def : Intrinsic, - NameOverride<"vcvtq_" # IScalar>; + let params = [IScalar] in { + let pnt = PNT_2Type in { + def : Intrinsic, + NameOverride<"vcvtq_" # FScalar>; + } + defm vcvtq: IntrinsicMX + $a, (unsignedflag IScalar), $pred, $inactive), + 1, "_" # FScalar, PNT_2Type, PNT_2Type>; + } + let params = [FScalar] in { + let pnt = PNT_None in { + def : Intrinsic, + NameOverride<"vcvtq_" # IScalar>; + } + defm vcvtq: IntrinsicMX + $a, (unsignedflag IScalar), $pred, $inactive), + 1, "_" # IScalar, PNT_2Type, PNT_None>; + } } -defm : float_int_conversions; -defm : float_int_conversions; -defm : float_int_conversions; -defm : float_int_conversions; +defm "" : float_int_conversions; +defm "" : float_int_conversions; +defm "" : float_int_conversions; +defm "" : float_int_conversions; -let params = [s8, u8, s16, u16] in { - def vmovlbq: Intrinsic; - def vmovltq: Intrinsic; +multiclass vmovl { + let params = [s8, u8, s16, u16] in { + def "": Intrinsic; + defm "": IntrinsicMX + $a, (unsignedflag Scalar), top, $pred, $inactive)>; + } } -let params = [s16, u16, s32, u32] in { - def vmovnbq: Intrinsic; - def vmovntq: Intrinsic; +defm vmovlbq: vmovl<0>; +defm vmovltq: vmovl<1>; + +multiclass vmovn { + let params = [s16, u16, s32, u32] in { + def "": Intrinsic; + def _m: Intrinsic + $inactive, $a, top, $pred)>; + } } -let params = T.Float in { - def vrndq: Intrinsic $a)>; - def vrndmq: Intrinsic $a)>; - def vrndpq: Intrinsic $a)>; - def vrndaq: Intrinsic $a)>; - def vrndxq: Intrinsic $a)>; - def vrndnq: Intrinsic $a)>; +defm vmovntq: vmovn<1, (zip (vreinterpret $inactive, Vector), $a)>; +defm vmovnbq: vmovn<0, + (zip $a, (vreinterpret (vrev $inactive, (bitsize Scalar)), Vector))>; + +multiclass vrnd { + let params = T.Float in { + def "": Intrinsic; + defm "": IntrinsicMX + $a, $pred, $inactive)>; + } } +defm vrndq: vrnd, "z">; +defm vrndmq: vrnd, "m">; +defm vrndpq: vrnd, "p">; +defm vrndaq: vrnd, "a">; +defm vrndxq: vrnd, "x">; +defm vrndnq: vrnd, "n">; + multiclass compare_with_pred { // Make the predicated and unpredicated versions of a single comparison. @@ -1231,12 +1278,24 @@ defm vrmlsldavh : MVEBinaryVectorHoriz64R; defm vrmlsldavh : MVEBinaryVectorHoriz64R; } -let params = T.All8 in -def vrev16q : Intrinsic; -let params = !listconcat(T.All8, T.All16) in -def vrev32q : Intrinsic; -let params = T.Usual in -def vrev64q : Intrinsic; +multiclass vrev_predicated { + defm "" : IntrinsicMX + $a, revsize, $pred, $inactive)>; +} + +let params = T.All8 in { + def vrev16q : Intrinsic; + defm vrev16q: vrev_predicated<16>; +} +let params = !listconcat(T.All8, T.All16) in { + def vrev32q : Intrinsic; + defm vrev32q: vrev_predicated<32>; +} +let params = T.Usual in { + def vrev64q : Intrinsic; + defm vrev64q: vrev_predicated<64>; +} foreach desttype = T.All in { // We want a vreinterpretq between every pair of supported vector types diff --git a/clang/include/clang/Basic/arm_mve_defs.td b/clang/include/clang/Basic/arm_mve_defs.td index 776dc9c73da4..dbcad78cce75 100644 --- a/clang/include/clang/Basic/arm_mve_defs.td +++ b/clang/include/clang/Basic/arm_mve_defs.td @@ -506,8 +506,8 @@ multiclass IntrinsicMX { + def : Intrinsic, + NameOverride { let pnt = pnt_m; } @@ -515,8 +515,8 @@ multiclass IntrinsicMX { + def : Intrinsic, + NameOverride { let pnt = pnt_x; } } diff --git a/clang/test/CodeGen/arm-mve-intrinsics/absneg.c b/clang/test/CodeGen/arm-mve-intrinsics/absneg.c index db4253f3590b..94339c834809 100644 --- a/clang/test/CodeGen/arm-mve-intrinsics/absneg.c +++ b/clang/test/CodeGen/arm-mve-intrinsics/absneg.c @@ -164,6 +164,198 @@ uint32x4_t test_vmvnq_u32(uint32x4_t a) #endif /* POLYMORPHIC */ } +// CHECK-LABEL: @test_vmvnq_m_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.mvn.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +int8x16_t test_vmvnq_m_s8(int8x16_t inactive, int8x16_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vmvnq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vmvnq_m_s8(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmvnq_m_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.mvn.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +int16x8_t test_vmvnq_m_s16(int16x8_t inactive, int16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vmvnq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vmvnq_m_s16(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmvnq_m_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.mvn.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +int32x4_t test_vmvnq_m_s32(int32x4_t inactive, int32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vmvnq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vmvnq_m_s32(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmvnq_m_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.mvn.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +uint8x16_t test_vmvnq_m_u8(uint8x16_t inactive, uint8x16_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vmvnq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vmvnq_m_u8(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmvnq_m_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.mvn.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +uint16x8_t test_vmvnq_m_u16(uint16x8_t inactive, uint16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vmvnq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vmvnq_m_u16(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmvnq_m_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.mvn.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +uint32x4_t test_vmvnq_m_u32(uint32x4_t inactive, uint32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vmvnq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vmvnq_m_u32(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmvnq_x_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.mvn.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i1> [[TMP1]], <16 x i8> undef) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +int8x16_t test_vmvnq_x_s8(int8x16_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vmvnq_x(a, p); +#else /* POLYMORPHIC */ + return vmvnq_x_s8(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmvnq_x_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.mvn.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i1> [[TMP1]], <8 x i16> undef) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +int16x8_t test_vmvnq_x_s16(int16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vmvnq_x(a, p); +#else /* POLYMORPHIC */ + return vmvnq_x_s16(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmvnq_x_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.mvn.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i1> [[TMP1]], <4 x i32> undef) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +int32x4_t test_vmvnq_x_s32(int32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vmvnq_x(a, p); +#else /* POLYMORPHIC */ + return vmvnq_x_s32(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmvnq_x_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.mvn.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i1> [[TMP1]], <16 x i8> undef) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +uint8x16_t test_vmvnq_x_u8(uint8x16_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vmvnq_x(a, p); +#else /* POLYMORPHIC */ + return vmvnq_x_u8(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmvnq_x_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.mvn.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i1> [[TMP1]], <8 x i16> undef) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +uint16x8_t test_vmvnq_x_u16(uint16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vmvnq_x(a, p); +#else /* POLYMORPHIC */ + return vmvnq_x_u16(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmvnq_x_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.mvn.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i1> [[TMP1]], <4 x i32> undef) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +uint32x4_t test_vmvnq_x_u32(uint32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vmvnq_x(a, p); +#else /* POLYMORPHIC */ + return vmvnq_x_u32(a, p); +#endif /* POLYMORPHIC */ +} + // CHECK-LABEL: @test_vnegq_f16( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = fneg <8 x half> [[A:%.*]] @@ -335,4 +527,427 @@ int32x4_t test_vqnegq_s32(int32x4_t a) return vqnegq_s32(a); #endif /* POLYMORPHIC */ } +#include + +// CHECK-LABEL: @test_vnegq_m_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.neg.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <8 x half> [[TMP2]] +// +float16x8_t test_vnegq_m_f16(float16x8_t inactive, float16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vnegq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vnegq_m_f16(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vnegq_m_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.neg.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <4 x float> [[TMP2]] +// +float32x4_t test_vnegq_m_f32(float32x4_t inactive, float32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vnegq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vnegq_m_f32(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vnegq_m_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.neg.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +int8x16_t test_vnegq_m_s8(int8x16_t inactive, int8x16_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vnegq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vnegq_m_s8(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vnegq_m_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.neg.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +int16x8_t test_vnegq_m_s16(int16x8_t inactive, int16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vnegq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vnegq_m_s16(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vnegq_m_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.neg.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +int32x4_t test_vnegq_m_s32(int32x4_t inactive, int32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vnegq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vnegq_m_s32(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vnegq_x_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.neg.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> undef) +// CHECK-NEXT: ret <8 x half> [[TMP2]] +// +float16x8_t test_vnegq_x_f16(float16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vnegq_x(a, p); +#else /* POLYMORPHIC */ + return vnegq_x_f16(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vnegq_x_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.neg.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> undef) +// CHECK-NEXT: ret <4 x float> [[TMP2]] +// +float32x4_t test_vnegq_x_f32(float32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vnegq_x(a, p); +#else /* POLYMORPHIC */ + return vnegq_x_f32(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vnegq_x_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.neg.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i1> [[TMP1]], <16 x i8> undef) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +int8x16_t test_vnegq_x_s8(int8x16_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vnegq_x(a, p); +#else /* POLYMORPHIC */ + return vnegq_x_s8(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vnegq_x_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.neg.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i1> [[TMP1]], <8 x i16> undef) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +int16x8_t test_vnegq_x_s16(int16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vnegq_x(a, p); +#else /* POLYMORPHIC */ + return vnegq_x_s16(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vnegq_x_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.neg.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i1> [[TMP1]], <4 x i32> undef) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +int32x4_t test_vnegq_x_s32(int32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vnegq_x(a, p); +#else /* POLYMORPHIC */ + return vnegq_x_s32(a, p); +#endif /* POLYMORPHIC */ +} + +#include + +// CHECK-LABEL: @test_vabsq_m_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.abs.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <8 x half> [[TMP2]] +// +float16x8_t test_vabsq_m_f16(float16x8_t inactive, float16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vabsq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vabsq_m_f16(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vabsq_m_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.abs.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <4 x float> [[TMP2]] +// +float32x4_t test_vabsq_m_f32(float32x4_t inactive, float32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vabsq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vabsq_m_f32(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vabsq_m_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.abs.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +int8x16_t test_vabsq_m_s8(int8x16_t inactive, int8x16_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vabsq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vabsq_m_s8(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vabsq_m_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.abs.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +int16x8_t test_vabsq_m_s16(int16x8_t inactive, int16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vabsq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vabsq_m_s16(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vabsq_m_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.abs.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +int32x4_t test_vabsq_m_s32(int32x4_t inactive, int32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vabsq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vabsq_m_s32(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vabsq_x_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.abs.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> undef) +// CHECK-NEXT: ret <8 x half> [[TMP2]] +// +float16x8_t test_vabsq_x_f16(float16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vabsq_x(a, p); +#else /* POLYMORPHIC */ + return vabsq_x_f16(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vabsq_x_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.abs.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> undef) +// CHECK-NEXT: ret <4 x float> [[TMP2]] +// +float32x4_t test_vabsq_x_f32(float32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vabsq_x(a, p); +#else /* POLYMORPHIC */ + return vabsq_x_f32(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vabsq_x_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.abs.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i1> [[TMP1]], <16 x i8> undef) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +int8x16_t test_vabsq_x_s8(int8x16_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vabsq_x(a, p); +#else /* POLYMORPHIC */ + return vabsq_x_s8(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vabsq_x_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.abs.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i1> [[TMP1]], <8 x i16> undef) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +int16x8_t test_vabsq_x_s16(int16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vabsq_x(a, p); +#else /* POLYMORPHIC */ + return vabsq_x_s16(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vabsq_x_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.abs.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i1> [[TMP1]], <4 x i32> undef) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +int32x4_t test_vabsq_x_s32(int32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vabsq_x(a, p); +#else /* POLYMORPHIC */ + return vabsq_x_s32(a, p); +#endif /* POLYMORPHIC */ +} + +#include + +// CHECK-LABEL: @test_vqnegq_m_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.qneg.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +int8x16_t test_vqnegq_m_s8(int8x16_t inactive, int8x16_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vqnegq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vqnegq_m_s8(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vqnegq_m_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.qneg.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +int16x8_t test_vqnegq_m_s16(int16x8_t inactive, int16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vqnegq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vqnegq_m_s16(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vqnegq_m_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.qneg.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +int32x4_t test_vqnegq_m_s32(int32x4_t inactive, int32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vqnegq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vqnegq_m_s32(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +#include + +// CHECK-LABEL: @test_vqabsq_m_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.qabs.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +int8x16_t test_vqabsq_m_s8(int8x16_t inactive, int8x16_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vqabsq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vqabsq_m_s8(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vqabsq_m_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.qabs.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +int16x8_t test_vqabsq_m_s16(int16x8_t inactive, int16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vqabsq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vqabsq_m_s16(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vqabsq_m_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.qabs.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +int32x4_t test_vqabsq_m_s32(int32x4_t inactive, int32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vqabsq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vqabsq_m_s32(inactive, a, p); +#endif /* POLYMORPHIC */ +} diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vclz.c b/clang/test/CodeGen/arm-mve-intrinsics/vclz.c index 7a2ebe0a627a..b39ac36eb340 100644 --- a/clang/test/CodeGen/arm-mve-intrinsics/vclz.c +++ b/clang/test/CodeGen/arm-mve-intrinsics/vclz.c @@ -130,3 +130,290 @@ int32x4_t test_vclsq_s32(int32x4_t a) #endif /* POLYMORPHIC */ } +// CHECK-LABEL: @test_vclsq_m_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.cls.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +int8x16_t test_vclsq_m_s8(int8x16_t inactive, int8x16_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vclsq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vclsq_m_s8(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vclsq_m_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.cls.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +int16x8_t test_vclsq_m_s16(int16x8_t inactive, int16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vclsq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vclsq_m_s16(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vclsq_m_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.cls.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +int32x4_t test_vclsq_m_s32(int32x4_t inactive, int32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vclsq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vclsq_m_s32(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vclzq_m_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.clz.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +int8x16_t test_vclzq_m_s8(int8x16_t inactive, int8x16_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vclzq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vclzq_m_s8(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vclzq_m_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.clz.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +int16x8_t test_vclzq_m_s16(int16x8_t inactive, int16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vclzq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vclzq_m_s16(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vclzq_m_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.clz.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +int32x4_t test_vclzq_m_s32(int32x4_t inactive, int32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vclzq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vclzq_m_s32(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vclzq_m_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.clz.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +uint8x16_t test_vclzq_m_u8(uint8x16_t inactive, uint8x16_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vclzq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vclzq_m_u8(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vclzq_m_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.clz.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +uint16x8_t test_vclzq_m_u16(uint16x8_t inactive, uint16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vclzq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vclzq_m_u16(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vclzq_m_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.clz.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +uint32x4_t test_vclzq_m_u32(uint32x4_t inactive, uint32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vclzq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vclzq_m_u32(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vclsq_x_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.cls.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i1> [[TMP1]], <16 x i8> undef) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +int8x16_t test_vclsq_x_s8(int8x16_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vclsq_x(a, p); +#else /* POLYMORPHIC */ + return vclsq_x_s8(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vclsq_x_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.cls.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i1> [[TMP1]], <8 x i16> undef) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +int16x8_t test_vclsq_x_s16(int16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vclsq_x(a, p); +#else /* POLYMORPHIC */ + return vclsq_x_s16(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vclsq_x_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.cls.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i1> [[TMP1]], <4 x i32> undef) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +int32x4_t test_vclsq_x_s32(int32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vclsq_x(a, p); +#else /* POLYMORPHIC */ + return vclsq_x_s32(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vclzq_x_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.clz.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i1> [[TMP1]], <16 x i8> undef) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +int8x16_t test_vclzq_x_s8(int8x16_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vclzq_x(a, p); +#else /* POLYMORPHIC */ + return vclzq_x_s8(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vclzq_x_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.clz.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i1> [[TMP1]], <8 x i16> undef) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +int16x8_t test_vclzq_x_s16(int16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vclzq_x(a, p); +#else /* POLYMORPHIC */ + return vclzq_x_s16(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vclzq_x_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.clz.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i1> [[TMP1]], <4 x i32> undef) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +int32x4_t test_vclzq_x_s32(int32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vclzq_x(a, p); +#else /* POLYMORPHIC */ + return vclzq_x_s32(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vclzq_x_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.clz.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i1> [[TMP1]], <16 x i8> undef) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +uint8x16_t test_vclzq_x_u8(uint8x16_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vclzq_x(a, p); +#else /* POLYMORPHIC */ + return vclzq_x_u8(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vclzq_x_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.clz.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i1> [[TMP1]], <8 x i16> undef) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +uint16x8_t test_vclzq_x_u16(uint16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vclzq_x(a, p); +#else /* POLYMORPHIC */ + return vclzq_x_u16(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vclzq_x_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.clz.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i1> [[TMP1]], <4 x i32> undef) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +uint32x4_t test_vclzq_x_u32(uint32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vclzq_x(a, p); +#else /* POLYMORPHIC */ + return vclzq_x_u32(a, p); +#endif /* POLYMORPHIC */ +} diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vcvt.c b/clang/test/CodeGen/arm-mve-intrinsics/vcvt.c index 3220100d7b89..0391b77e365f 100644 --- a/clang/test/CodeGen/arm-mve-intrinsics/vcvt.c +++ b/clang/test/CodeGen/arm-mve-intrinsics/vcvt.c @@ -100,6 +100,246 @@ uint32x4_t test_vcvtq_u32_f32(float32x4_t a) return vcvtq_u32_f32(a); } +// CHECK-LABEL: @test_vcvtq_m_f16_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vcvt.fp.int.predicated.v8f16.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <8 x half> [[TMP2]] +// +float16x8_t test_vcvtq_m_f16_s16(float16x8_t inactive, int16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vcvtq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vcvtq_m_f16_s16(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vcvtq_m_f16_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vcvt.fp.int.predicated.v8f16.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 1, <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <8 x half> [[TMP2]] +// +float16x8_t test_vcvtq_m_f16_u16(float16x8_t inactive, uint16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vcvtq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vcvtq_m_f16_u16(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vcvtq_m_f32_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vcvt.fp.int.predicated.v4f32.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <4 x float> [[TMP2]] +// +float32x4_t test_vcvtq_m_f32_s32(float32x4_t inactive, int32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vcvtq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vcvtq_m_f32_s32(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vcvtq_m_f32_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vcvt.fp.int.predicated.v4f32.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 1, <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <4 x float> [[TMP2]] +// +float32x4_t test_vcvtq_m_f32_u32(float32x4_t inactive, uint32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vcvtq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vcvtq_m_f32_u32(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vcvtq_m_s16_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vcvt.fp.int.predicated.v8i16.v8f16.v8i1(<8 x half> [[A:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +int16x8_t test_vcvtq_m_s16_f16(int16x8_t inactive, float16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vcvtq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vcvtq_m_s16_f16(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vcvtq_m_s32_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vcvt.fp.int.predicated.v4i32.v4f32.v4i1(<4 x float> [[A:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +int32x4_t test_vcvtq_m_s32_f32(int32x4_t inactive, float32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vcvtq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vcvtq_m_s32_f32(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vcvtq_m_u16_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vcvt.fp.int.predicated.v8i16.v8f16.v8i1(<8 x half> [[A:%.*]], i32 1, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +uint16x8_t test_vcvtq_m_u16_f16(uint16x8_t inactive, float16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vcvtq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vcvtq_m_u16_f16(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vcvtq_m_u32_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vcvt.fp.int.predicated.v4i32.v4f32.v4i1(<4 x float> [[A:%.*]], i32 1, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +uint32x4_t test_vcvtq_m_u32_f32(uint32x4_t inactive, float32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vcvtq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vcvtq_m_u32_f32(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vcvtq_x_f16_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vcvt.fp.int.predicated.v8f16.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x half> undef) +// CHECK-NEXT: ret <8 x half> [[TMP2]] +// +float16x8_t test_vcvtq_x_f16_s16(int16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vcvtq_x(a, p); +#else /* POLYMORPHIC */ + return vcvtq_x_f16_s16(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vcvtq_x_f16_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vcvt.fp.int.predicated.v8f16.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 1, <8 x i1> [[TMP1]], <8 x half> undef) +// CHECK-NEXT: ret <8 x half> [[TMP2]] +// +float16x8_t test_vcvtq_x_f16_u16(uint16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vcvtq_x(a, p); +#else /* POLYMORPHIC */ + return vcvtq_x_f16_u16(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vcvtq_x_f32_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vcvt.fp.int.predicated.v4f32.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x float> undef) +// CHECK-NEXT: ret <4 x float> [[TMP2]] +// +float32x4_t test_vcvtq_x_f32_s32(int32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vcvtq_x(a, p); +#else /* POLYMORPHIC */ + return vcvtq_x_f32_s32(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vcvtq_x_f32_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vcvt.fp.int.predicated.v4f32.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 1, <4 x i1> [[TMP1]], <4 x float> undef) +// CHECK-NEXT: ret <4 x float> [[TMP2]] +// +float32x4_t test_vcvtq_x_f32_u32(uint32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vcvtq_x(a, p); +#else /* POLYMORPHIC */ + return vcvtq_x_f32_u32(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vcvtq_x_s16_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vcvt.fp.int.predicated.v8i16.v8f16.v8i1(<8 x half> [[A:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x i16> undef) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +int16x8_t test_vcvtq_x_s16_f16(float16x8_t a, mve_pred16_t p) +{ + return vcvtq_x_s16_f16(a, p); +} + +// CHECK-LABEL: @test_vcvtq_x_s32_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vcvt.fp.int.predicated.v4i32.v4f32.v4i1(<4 x float> [[A:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x i32> undef) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +int32x4_t test_vcvtq_x_s32_f32(float32x4_t a, mve_pred16_t p) +{ + return vcvtq_x_s32_f32(a, p); +} + +// CHECK-LABEL: @test_vcvtq_x_u16_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vcvt.fp.int.predicated.v8i16.v8f16.v8i1(<8 x half> [[A:%.*]], i32 1, <8 x i1> [[TMP1]], <8 x i16> undef) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +uint16x8_t test_vcvtq_x_u16_f16(float16x8_t a, mve_pred16_t p) +{ + return vcvtq_x_u16_f16(a, p); +} + +// CHECK-LABEL: @test_vcvtq_x_u32_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vcvt.fp.int.predicated.v4i32.v4f32.v4i1(<4 x float> [[A:%.*]], i32 1, <4 x i1> [[TMP1]], <4 x i32> undef) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +uint32x4_t test_vcvtq_x_u32_f32(float32x4_t a, mve_pred16_t p) +{ + return vcvtq_x_u32_f32(a, p); +} + // CHECK-LABEL: @test_vcvttq_f16_f32( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.arm.mve.vcvt.narrow(<8 x half> [[A:%.*]], <4 x float> [[B:%.*]], i32 1) diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vmovl.c b/clang/test/CodeGen/arm-mve-intrinsics/vmovl.c index 0b8ef596faed..e66e67c49976 100644 --- a/clang/test/CodeGen/arm-mve-intrinsics/vmovl.c +++ b/clang/test/CodeGen/arm-mve-intrinsics/vmovl.c @@ -124,3 +124,259 @@ uint32x4_t test_vmovltq_u16(uint16x8_t a) #endif /* POLYMORPHIC */ } +// CHECK-LABEL: @test_vmovlbq_m_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vmovl.predicated.v8i16.v16i8.v8i1(<16 x i8> [[A:%.*]], i32 0, i32 0, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +int16x8_t test_vmovlbq_m_s8(int16x8_t inactive, int8x16_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vmovlbq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vmovlbq_m_s8(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmovlbq_m_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vmovl.predicated.v4i32.v8i16.v4i1(<8 x i16> [[A:%.*]], i32 0, i32 0, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +int32x4_t test_vmovlbq_m_s16(int32x4_t inactive, int16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vmovlbq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vmovlbq_m_s16(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmovlbq_m_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vmovl.predicated.v8i16.v16i8.v8i1(<16 x i8> [[A:%.*]], i32 1, i32 0, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +uint16x8_t test_vmovlbq_m_u8(uint16x8_t inactive, uint8x16_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vmovlbq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vmovlbq_m_u8(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmovlbq_m_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vmovl.predicated.v4i32.v8i16.v4i1(<8 x i16> [[A:%.*]], i32 1, i32 0, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +uint32x4_t test_vmovlbq_m_u16(uint32x4_t inactive, uint16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vmovlbq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vmovlbq_m_u16(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmovltq_m_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vmovl.predicated.v8i16.v16i8.v8i1(<16 x i8> [[A:%.*]], i32 0, i32 1, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +int16x8_t test_vmovltq_m_s8(int16x8_t inactive, int8x16_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vmovltq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vmovltq_m_s8(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmovltq_m_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vmovl.predicated.v4i32.v8i16.v4i1(<8 x i16> [[A:%.*]], i32 0, i32 1, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +int32x4_t test_vmovltq_m_s16(int32x4_t inactive, int16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vmovltq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vmovltq_m_s16(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmovltq_m_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vmovl.predicated.v8i16.v16i8.v8i1(<16 x i8> [[A:%.*]], i32 1, i32 1, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +uint16x8_t test_vmovltq_m_u8(uint16x8_t inactive, uint8x16_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vmovltq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vmovltq_m_u8(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmovltq_m_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vmovl.predicated.v4i32.v8i16.v4i1(<8 x i16> [[A:%.*]], i32 1, i32 1, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +uint32x4_t test_vmovltq_m_u16(uint32x4_t inactive, uint16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vmovltq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vmovltq_m_u16(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmovlbq_x_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vmovl.predicated.v8i16.v16i8.v8i1(<16 x i8> [[A:%.*]], i32 0, i32 0, <8 x i1> [[TMP1]], <8 x i16> undef) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +int16x8_t test_vmovlbq_x_s8(int8x16_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vmovlbq_x(a, p); +#else /* POLYMORPHIC */ + return vmovlbq_x_s8(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmovlbq_x_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vmovl.predicated.v4i32.v8i16.v4i1(<8 x i16> [[A:%.*]], i32 0, i32 0, <4 x i1> [[TMP1]], <4 x i32> undef) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +int32x4_t test_vmovlbq_x_s16(int16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vmovlbq_x(a, p); +#else /* POLYMORPHIC */ + return vmovlbq_x_s16(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmovlbq_x_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vmovl.predicated.v8i16.v16i8.v8i1(<16 x i8> [[A:%.*]], i32 1, i32 0, <8 x i1> [[TMP1]], <8 x i16> undef) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +uint16x8_t test_vmovlbq_x_u8(uint8x16_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vmovlbq_x(a, p); +#else /* POLYMORPHIC */ + return vmovlbq_x_u8(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmovlbq_x_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vmovl.predicated.v4i32.v8i16.v4i1(<8 x i16> [[A:%.*]], i32 1, i32 0, <4 x i1> [[TMP1]], <4 x i32> undef) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +uint32x4_t test_vmovlbq_x_u16(uint16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vmovlbq_x(a, p); +#else /* POLYMORPHIC */ + return vmovlbq_x_u16(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmovltq_x_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vmovl.predicated.v8i16.v16i8.v8i1(<16 x i8> [[A:%.*]], i32 0, i32 1, <8 x i1> [[TMP1]], <8 x i16> undef) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +int16x8_t test_vmovltq_x_s8(int8x16_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vmovltq_x(a, p); +#else /* POLYMORPHIC */ + return vmovltq_x_s8(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmovltq_x_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vmovl.predicated.v4i32.v8i16.v4i1(<8 x i16> [[A:%.*]], i32 0, i32 1, <4 x i1> [[TMP1]], <4 x i32> undef) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +int32x4_t test_vmovltq_x_s16(int16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vmovltq_x(a, p); +#else /* POLYMORPHIC */ + return vmovltq_x_s16(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmovltq_x_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vmovl.predicated.v8i16.v16i8.v8i1(<16 x i8> [[A:%.*]], i32 1, i32 1, <8 x i1> [[TMP1]], <8 x i16> undef) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +uint16x8_t test_vmovltq_x_u8(uint8x16_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vmovltq_x(a, p); +#else /* POLYMORPHIC */ + return vmovltq_x_u8(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmovltq_x_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vmovl.predicated.v4i32.v8i16.v4i1(<8 x i16> [[A:%.*]], i32 1, i32 1, <4 x i1> [[TMP1]], <4 x i32> undef) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +uint32x4_t test_vmovltq_x_u16(uint16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vmovltq_x(a, p); +#else /* POLYMORPHIC */ + return vmovltq_x_u16(a, p); +#endif /* POLYMORPHIC */ +} + diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vmovn.c b/clang/test/CodeGen/arm-mve-intrinsics/vmovn.c index 5d157de0feb8..ed414b52ade8 100644 --- a/clang/test/CodeGen/arm-mve-intrinsics/vmovn.c +++ b/clang/test/CodeGen/arm-mve-intrinsics/vmovn.c @@ -197,3 +197,187 @@ uint16x8_t test_vmovntq_u32(uint16x8_t a, uint32x4_t b) return vmovntq_u32(a, b); #endif /* POLYMORPHIC */ } + +// LE-LABEL: @test_vmovnbq_m_s16( +// LE-NEXT: entry: +// LE-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// LE-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// LE-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vmovn.predicated.v16i8.v8i16.v8i1(<16 x i8> [[A:%.*]], <8 x i16> [[B:%.*]], i32 0, <8 x i1> [[TMP1]]) +// LE-NEXT: ret <16 x i8> [[TMP2]] +// +// BE-LABEL: @test_vmovnbq_m_s16( +// BE-NEXT: entry: +// BE-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// BE-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// BE-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vmovn.predicated.v16i8.v8i16.v8i1(<16 x i8> [[A:%.*]], <8 x i16> [[B:%.*]], i32 0, <8 x i1> [[TMP1]]) +// BE-NEXT: ret <16 x i8> [[TMP2]] +// +int8x16_t test_vmovnbq_m_s16(int8x16_t a, int16x8_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vmovnbq_m(a, b, p); +#else /* POLYMORPHIC */ + return vmovnbq_m_s16(a, b, p); +#endif /* POLYMORPHIC */ +} + +// LE-LABEL: @test_vmovnbq_m_s32( +// LE-NEXT: entry: +// LE-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// LE-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// LE-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vmovn.predicated.v8i16.v4i32.v4i1(<8 x i16> [[A:%.*]], <4 x i32> [[B:%.*]], i32 0, <4 x i1> [[TMP1]]) +// LE-NEXT: ret <8 x i16> [[TMP2]] +// +// BE-LABEL: @test_vmovnbq_m_s32( +// BE-NEXT: entry: +// BE-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// BE-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// BE-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vmovn.predicated.v8i16.v4i32.v4i1(<8 x i16> [[A:%.*]], <4 x i32> [[B:%.*]], i32 0, <4 x i1> [[TMP1]]) +// BE-NEXT: ret <8 x i16> [[TMP2]] +// +int16x8_t test_vmovnbq_m_s32(int16x8_t a, int32x4_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vmovnbq_m(a, b, p); +#else /* POLYMORPHIC */ + return vmovnbq_m_s32(a, b, p); +#endif /* POLYMORPHIC */ +} + +// LE-LABEL: @test_vmovnbq_m_u16( +// LE-NEXT: entry: +// LE-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// LE-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// LE-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vmovn.predicated.v16i8.v8i16.v8i1(<16 x i8> [[A:%.*]], <8 x i16> [[B:%.*]], i32 0, <8 x i1> [[TMP1]]) +// LE-NEXT: ret <16 x i8> [[TMP2]] +// +// BE-LABEL: @test_vmovnbq_m_u16( +// BE-NEXT: entry: +// BE-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// BE-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// BE-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vmovn.predicated.v16i8.v8i16.v8i1(<16 x i8> [[A:%.*]], <8 x i16> [[B:%.*]], i32 0, <8 x i1> [[TMP1]]) +// BE-NEXT: ret <16 x i8> [[TMP2]] +// +uint8x16_t test_vmovnbq_m_u16(uint8x16_t a, uint16x8_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vmovnbq_m(a, b, p); +#else /* POLYMORPHIC */ + return vmovnbq_m_u16(a, b, p); +#endif /* POLYMORPHIC */ +} + +// LE-LABEL: @test_vmovnbq_m_u32( +// LE-NEXT: entry: +// LE-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// LE-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// LE-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vmovn.predicated.v8i16.v4i32.v4i1(<8 x i16> [[A:%.*]], <4 x i32> [[B:%.*]], i32 0, <4 x i1> [[TMP1]]) +// LE-NEXT: ret <8 x i16> [[TMP2]] +// +// BE-LABEL: @test_vmovnbq_m_u32( +// BE-NEXT: entry: +// BE-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// BE-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// BE-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vmovn.predicated.v8i16.v4i32.v4i1(<8 x i16> [[A:%.*]], <4 x i32> [[B:%.*]], i32 0, <4 x i1> [[TMP1]]) +// BE-NEXT: ret <8 x i16> [[TMP2]] +// +uint16x8_t test_vmovnbq_m_u32(uint16x8_t a, uint32x4_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vmovnbq_m(a, b, p); +#else /* POLYMORPHIC */ + return vmovnbq_m_u32(a, b, p); +#endif /* POLYMORPHIC */ +} + +// LE-LABEL: @test_vmovntq_m_s16( +// LE-NEXT: entry: +// LE-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// LE-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// LE-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vmovn.predicated.v16i8.v8i16.v8i1(<16 x i8> [[A:%.*]], <8 x i16> [[B:%.*]], i32 1, <8 x i1> [[TMP1]]) +// LE-NEXT: ret <16 x i8> [[TMP2]] +// +// BE-LABEL: @test_vmovntq_m_s16( +// BE-NEXT: entry: +// BE-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// BE-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// BE-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vmovn.predicated.v16i8.v8i16.v8i1(<16 x i8> [[A:%.*]], <8 x i16> [[B:%.*]], i32 1, <8 x i1> [[TMP1]]) +// BE-NEXT: ret <16 x i8> [[TMP2]] +// +int8x16_t test_vmovntq_m_s16(int8x16_t a, int16x8_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vmovntq_m(a, b, p); +#else /* POLYMORPHIC */ + return vmovntq_m_s16(a, b, p); +#endif /* POLYMORPHIC */ +} + +// LE-LABEL: @test_vmovntq_m_s32( +// LE-NEXT: entry: +// LE-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// LE-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// LE-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vmovn.predicated.v8i16.v4i32.v4i1(<8 x i16> [[A:%.*]], <4 x i32> [[B:%.*]], i32 1, <4 x i1> [[TMP1]]) +// LE-NEXT: ret <8 x i16> [[TMP2]] +// +// BE-LABEL: @test_vmovntq_m_s32( +// BE-NEXT: entry: +// BE-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// BE-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// BE-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vmovn.predicated.v8i16.v4i32.v4i1(<8 x i16> [[A:%.*]], <4 x i32> [[B:%.*]], i32 1, <4 x i1> [[TMP1]]) +// BE-NEXT: ret <8 x i16> [[TMP2]] +// +int16x8_t test_vmovntq_m_s32(int16x8_t a, int32x4_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vmovntq_m(a, b, p); +#else /* POLYMORPHIC */ + return vmovntq_m_s32(a, b, p); +#endif /* POLYMORPHIC */ +} + +// LE-LABEL: @test_vmovntq_m_u16( +// LE-NEXT: entry: +// LE-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// LE-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// LE-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vmovn.predicated.v16i8.v8i16.v8i1(<16 x i8> [[A:%.*]], <8 x i16> [[B:%.*]], i32 1, <8 x i1> [[TMP1]]) +// LE-NEXT: ret <16 x i8> [[TMP2]] +// +// BE-LABEL: @test_vmovntq_m_u16( +// BE-NEXT: entry: +// BE-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// BE-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// BE-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vmovn.predicated.v16i8.v8i16.v8i1(<16 x i8> [[A:%.*]], <8 x i16> [[B:%.*]], i32 1, <8 x i1> [[TMP1]]) +// BE-NEXT: ret <16 x i8> [[TMP2]] +// +uint8x16_t test_vmovntq_m_u16(uint8x16_t a, uint16x8_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vmovntq_m(a, b, p); +#else /* POLYMORPHIC */ + return vmovntq_m_u16(a, b, p); +#endif /* POLYMORPHIC */ +} + +// LE-LABEL: @test_vmovntq_m_u32( +// LE-NEXT: entry: +// LE-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// LE-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// LE-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vmovn.predicated.v8i16.v4i32.v4i1(<8 x i16> [[A:%.*]], <4 x i32> [[B:%.*]], i32 1, <4 x i1> [[TMP1]]) +// LE-NEXT: ret <8 x i16> [[TMP2]] +// +// BE-LABEL: @test_vmovntq_m_u32( +// BE-NEXT: entry: +// BE-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// BE-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// BE-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vmovn.predicated.v8i16.v4i32.v4i1(<8 x i16> [[A:%.*]], <4 x i32> [[B:%.*]], i32 1, <4 x i1> [[TMP1]]) +// BE-NEXT: ret <8 x i16> [[TMP2]] +// +uint16x8_t test_vmovntq_m_u32(uint16x8_t a, uint32x4_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vmovntq_m(a, b, p); +#else /* POLYMORPHIC */ + return vmovntq_m_u32(a, b, p); +#endif /* POLYMORPHIC */ +} diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vrev.c b/clang/test/CodeGen/arm-mve-intrinsics/vrev.c index 384d736d2a6d..73675cc005b2 100644 --- a/clang/test/CodeGen/arm-mve-intrinsics/vrev.c +++ b/clang/test/CodeGen/arm-mve-intrinsics/vrev.c @@ -213,3 +213,483 @@ uint32x4_t test_vrev64q_u32(uint32x4_t a) return vrev64q_u32(a); #endif /* POLYMORPHIC */ } + +// CHECK-LABEL: @test_vrev16q_m_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vrev.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 16, <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +int8x16_t test_vrev16q_m_s8(int8x16_t inactive, int8x16_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrev16q_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vrev16q_m_s8(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrev16q_m_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vrev.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 16, <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +uint8x16_t test_vrev16q_m_u8(uint8x16_t inactive, uint8x16_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrev16q_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vrev16q_m_u8(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrev32q_m_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrev.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], i32 32, <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <8 x half> [[TMP2]] +// +float16x8_t test_vrev32q_m_f16(float16x8_t inactive, float16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrev32q_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vrev32q_m_f16(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrev32q_m_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vrev.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 32, <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +int8x16_t test_vrev32q_m_s8(int8x16_t inactive, int8x16_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrev32q_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vrev32q_m_s8(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrev32q_m_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vrev.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 32, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +int16x8_t test_vrev32q_m_s16(int16x8_t inactive, int16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrev32q_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vrev32q_m_s16(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrev32q_m_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vrev.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 32, <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +uint8x16_t test_vrev32q_m_u8(uint8x16_t inactive, uint8x16_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrev32q_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vrev32q_m_u8(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrev32q_m_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vrev.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 32, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +uint16x8_t test_vrev32q_m_u16(uint16x8_t inactive, uint16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrev32q_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vrev32q_m_u16(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrev64q_m_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrev.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], i32 64, <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <8 x half> [[TMP2]] +// +float16x8_t test_vrev64q_m_f16(float16x8_t inactive, float16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrev64q_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vrev64q_m_f16(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrev64q_m_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vrev.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], i32 64, <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <4 x float> [[TMP2]] +// +float32x4_t test_vrev64q_m_f32(float32x4_t inactive, float32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrev64q_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vrev64q_m_f32(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrev64q_m_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vrev.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 64, <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +int8x16_t test_vrev64q_m_s8(int8x16_t inactive, int8x16_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrev64q_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vrev64q_m_s8(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrev64q_m_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vrev.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 64, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +int16x8_t test_vrev64q_m_s16(int16x8_t inactive, int16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrev64q_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vrev64q_m_s16(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrev64q_m_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vrev.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 64, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +int32x4_t test_vrev64q_m_s32(int32x4_t inactive, int32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrev64q_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vrev64q_m_s32(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrev64q_m_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vrev.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 64, <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +uint8x16_t test_vrev64q_m_u8(uint8x16_t inactive, uint8x16_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrev64q_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vrev64q_m_u8(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrev64q_m_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vrev.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 64, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +uint16x8_t test_vrev64q_m_u16(uint16x8_t inactive, uint16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrev64q_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vrev64q_m_u16(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrev64q_m_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vrev.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 64, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +uint32x4_t test_vrev64q_m_u32(uint32x4_t inactive, uint32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrev64q_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vrev64q_m_u32(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrev16q_x_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vrev.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 16, <16 x i1> [[TMP1]], <16 x i8> undef) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +int8x16_t test_vrev16q_x_s8(int8x16_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrev16q_x(a, p); +#else /* POLYMORPHIC */ + return vrev16q_x_s8(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrev16q_x_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vrev.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 16, <16 x i1> [[TMP1]], <16 x i8> undef) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +uint8x16_t test_vrev16q_x_u8(uint8x16_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrev16q_x(a, p); +#else /* POLYMORPHIC */ + return vrev16q_x_u8(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrev32q_x_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrev.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], i32 32, <8 x i1> [[TMP1]], <8 x half> undef) +// CHECK-NEXT: ret <8 x half> [[TMP2]] +// +float16x8_t test_vrev32q_x_f16(float16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrev32q_x(a, p); +#else /* POLYMORPHIC */ + return vrev32q_x_f16(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrev32q_x_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vrev.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 32, <16 x i1> [[TMP1]], <16 x i8> undef) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +int8x16_t test_vrev32q_x_s8(int8x16_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrev32q_x(a, p); +#else /* POLYMORPHIC */ + return vrev32q_x_s8(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrev32q_x_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vrev.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 32, <8 x i1> [[TMP1]], <8 x i16> undef) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +int16x8_t test_vrev32q_x_s16(int16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrev32q_x(a, p); +#else /* POLYMORPHIC */ + return vrev32q_x_s16(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrev32q_x_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vrev.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 32, <16 x i1> [[TMP1]], <16 x i8> undef) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +uint8x16_t test_vrev32q_x_u8(uint8x16_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrev32q_x(a, p); +#else /* POLYMORPHIC */ + return vrev32q_x_u8(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrev32q_x_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vrev.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 32, <8 x i1> [[TMP1]], <8 x i16> undef) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +uint16x8_t test_vrev32q_x_u16(uint16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrev32q_x(a, p); +#else /* POLYMORPHIC */ + return vrev32q_x_u16(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrev64q_x_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrev.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], i32 64, <8 x i1> [[TMP1]], <8 x half> undef) +// CHECK-NEXT: ret <8 x half> [[TMP2]] +// +float16x8_t test_vrev64q_x_f16(float16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrev64q_x(a, p); +#else /* POLYMORPHIC */ + return vrev64q_x_f16(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrev64q_x_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vrev.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], i32 64, <4 x i1> [[TMP1]], <4 x float> undef) +// CHECK-NEXT: ret <4 x float> [[TMP2]] +// +float32x4_t test_vrev64q_x_f32(float32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrev64q_x(a, p); +#else /* POLYMORPHIC */ + return vrev64q_x_f32(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrev64q_x_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vrev.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 64, <16 x i1> [[TMP1]], <16 x i8> undef) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +int8x16_t test_vrev64q_x_s8(int8x16_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrev64q_x(a, p); +#else /* POLYMORPHIC */ + return vrev64q_x_s8(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrev64q_x_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vrev.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 64, <8 x i1> [[TMP1]], <8 x i16> undef) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +int16x8_t test_vrev64q_x_s16(int16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrev64q_x(a, p); +#else /* POLYMORPHIC */ + return vrev64q_x_s16(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrev64q_x_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vrev.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 64, <4 x i1> [[TMP1]], <4 x i32> undef) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +int32x4_t test_vrev64q_x_s32(int32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrev64q_x(a, p); +#else /* POLYMORPHIC */ + return vrev64q_x_s32(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrev64q_x_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vrev.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 64, <16 x i1> [[TMP1]], <16 x i8> undef) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +uint8x16_t test_vrev64q_x_u8(uint8x16_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrev64q_x(a, p); +#else /* POLYMORPHIC */ + return vrev64q_x_u8(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrev64q_x_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vrev.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 64, <8 x i1> [[TMP1]], <8 x i16> undef) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +uint16x8_t test_vrev64q_x_u16(uint16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrev64q_x(a, p); +#else /* POLYMORPHIC */ + return vrev64q_x_u16(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrev64q_x_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vrev.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 64, <4 x i1> [[TMP1]], <4 x i32> undef) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +uint32x4_t test_vrev64q_x_u32(uint32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrev64q_x(a, p); +#else /* POLYMORPHIC */ + return vrev64q_x_u32(a, p); +#endif /* POLYMORPHIC */ +} diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vrnd.c b/clang/test/CodeGen/arm-mve-intrinsics/vrnd.c index a324c36ed838..8ad5f48b5856 100644 --- a/clang/test/CodeGen/arm-mve-intrinsics/vrnd.c +++ b/clang/test/CodeGen/arm-mve-intrinsics/vrnd.c @@ -171,3 +171,388 @@ float32x4_t test_vrndnq_f32(float32x4_t a) return vrndnq_f32(a); #endif /* POLYMORPHIC */ } + +// CHECK-LABEL: @test_vrndaq_m_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrinta.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <8 x half> [[TMP2]] +// +float16x8_t test_vrndaq_m_f16(float16x8_t inactive, float16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrndaq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vrndaq_m_f16(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrndaq_m_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vrinta.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <4 x float> [[TMP2]] +// +float32x4_t test_vrndaq_m_f32(float32x4_t inactive, float32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrndaq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vrndaq_m_f32(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrndmq_m_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrintm.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <8 x half> [[TMP2]] +// +float16x8_t test_vrndmq_m_f16(float16x8_t inactive, float16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrndmq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vrndmq_m_f16(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrndmq_m_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vrintm.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <4 x float> [[TMP2]] +// +float32x4_t test_vrndmq_m_f32(float32x4_t inactive, float32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrndmq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vrndmq_m_f32(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrndnq_m_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrintn.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <8 x half> [[TMP2]] +// +float16x8_t test_vrndnq_m_f16(float16x8_t inactive, float16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrndnq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vrndnq_m_f16(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrndnq_m_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vrintn.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <4 x float> [[TMP2]] +// +float32x4_t test_vrndnq_m_f32(float32x4_t inactive, float32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrndnq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vrndnq_m_f32(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrndpq_m_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrintp.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <8 x half> [[TMP2]] +// +float16x8_t test_vrndpq_m_f16(float16x8_t inactive, float16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrndpq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vrndpq_m_f16(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrndpq_m_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vrintp.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <4 x float> [[TMP2]] +// +float32x4_t test_vrndpq_m_f32(float32x4_t inactive, float32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrndpq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vrndpq_m_f32(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrndq_m_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrintz.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <8 x half> [[TMP2]] +// +float16x8_t test_vrndq_m_f16(float16x8_t inactive, float16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrndq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vrndq_m_f16(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrndq_m_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vrintz.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <4 x float> [[TMP2]] +// +float32x4_t test_vrndq_m_f32(float32x4_t inactive, float32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrndq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vrndq_m_f32(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrndxq_m_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrintx.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <8 x half> [[TMP2]] +// +float16x8_t test_vrndxq_m_f16(float16x8_t inactive, float16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrndxq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vrndxq_m_f16(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrndxq_m_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vrintx.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <4 x float> [[TMP2]] +// +float32x4_t test_vrndxq_m_f32(float32x4_t inactive, float32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrndxq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vrndxq_m_f32(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrndaq_x_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrinta.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> undef) +// CHECK-NEXT: ret <8 x half> [[TMP2]] +// +float16x8_t test_vrndaq_x_f16(float16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrndaq_x(a, p); +#else /* POLYMORPHIC */ + return vrndaq_x_f16(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrndaq_x_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vrinta.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> undef) +// CHECK-NEXT: ret <4 x float> [[TMP2]] +// +float32x4_t test_vrndaq_x_f32(float32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrndaq_x(a, p); +#else /* POLYMORPHIC */ + return vrndaq_x_f32(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrndmq_x_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrintm.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> undef) +// CHECK-NEXT: ret <8 x half> [[TMP2]] +// +float16x8_t test_vrndmq_x_f16(float16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrndmq_x(a, p); +#else /* POLYMORPHIC */ + return vrndmq_x_f16(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrndmq_x_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vrintm.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> undef) +// CHECK-NEXT: ret <4 x float> [[TMP2]] +// +float32x4_t test_vrndmq_x_f32(float32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrndmq_x(a, p); +#else /* POLYMORPHIC */ + return vrndmq_x_f32(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrndnq_x_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrintn.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> undef) +// CHECK-NEXT: ret <8 x half> [[TMP2]] +// +float16x8_t test_vrndnq_x_f16(float16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrndnq_x(a, p); +#else /* POLYMORPHIC */ + return vrndnq_x_f16(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrndnq_x_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vrintn.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> undef) +// CHECK-NEXT: ret <4 x float> [[TMP2]] +// +float32x4_t test_vrndnq_x_f32(float32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrndnq_x(a, p); +#else /* POLYMORPHIC */ + return vrndnq_x_f32(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrndpq_x_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrintp.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> undef) +// CHECK-NEXT: ret <8 x half> [[TMP2]] +// +float16x8_t test_vrndpq_x_f16(float16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrndpq_x(a, p); +#else /* POLYMORPHIC */ + return vrndpq_x_f16(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrndpq_x_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vrintp.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> undef) +// CHECK-NEXT: ret <4 x float> [[TMP2]] +// +float32x4_t test_vrndpq_x_f32(float32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrndpq_x(a, p); +#else /* POLYMORPHIC */ + return vrndpq_x_f32(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrndq_x_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrintz.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> undef) +// CHECK-NEXT: ret <8 x half> [[TMP2]] +// +float16x8_t test_vrndq_x_f16(float16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrndq_x(a, p); +#else /* POLYMORPHIC */ + return vrndq_x_f16(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrndq_x_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vrintz.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> undef) +// CHECK-NEXT: ret <4 x float> [[TMP2]] +// +float32x4_t test_vrndq_x_f32(float32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrndq_x(a, p); +#else /* POLYMORPHIC */ + return vrndq_x_f32(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrndxq_x_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrintx.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> undef) +// CHECK-NEXT: ret <8 x half> [[TMP2]] +// +float16x8_t test_vrndxq_x_f16(float16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrndxq_x(a, p); +#else /* POLYMORPHIC */ + return vrndxq_x_f16(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrndxq_x_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vrintx.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> undef) +// CHECK-NEXT: ret <4 x float> [[TMP2]] +// +float32x4_t test_vrndxq_x_f32(float32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrndxq_x(a, p); +#else /* POLYMORPHIC */ + return vrndxq_x_f32(a, p); +#endif /* POLYMORPHIC */ +} + diff --git a/llvm/include/llvm/IR/IntrinsicsARM.td b/llvm/include/llvm/IR/IntrinsicsARM.td index 68af4ae82579..4e939fb4bc3a 100644 --- a/llvm/include/llvm/IR/IntrinsicsARM.td +++ b/llvm/include/llvm/IR/IntrinsicsARM.td @@ -1159,6 +1159,11 @@ defm int_arm_mve_vcvt_fix: MVEMXPredicated< [llvm_anyvector_ty /* input vector */, llvm_i32_ty /* scale */], LLVMMatchType<0>, llvm_anyvector_ty>; +def int_arm_mve_vcvt_fp_int_predicated: Intrinsic< + [llvm_anyvector_ty], [llvm_anyvector_ty, llvm_i32_ty /* unsigned */, + llvm_anyvector_ty /* predicate */, LLVMMatchType<0> /* inactive */], + [IntrNoMem]>; + def int_arm_mve_vrintn: Intrinsic< [llvm_anyvector_ty], [LLVMMatchType<0>], [IntrNoMem]>; def int_arm_mve_vcls: Intrinsic< @@ -1178,4 +1183,32 @@ def int_arm_mve_vqdmull_predicated: Intrinsic< LLVMMatchType<0>], [IntrNoMem]>; +class MVESimpleUnaryPredicated: Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>], [IntrNoMem]>; + +def int_arm_mve_mvn_predicated: MVESimpleUnaryPredicated; +def int_arm_mve_abs_predicated: MVESimpleUnaryPredicated; +def int_arm_mve_neg_predicated: MVESimpleUnaryPredicated; +def int_arm_mve_qabs_predicated: MVESimpleUnaryPredicated; +def int_arm_mve_qneg_predicated: MVESimpleUnaryPredicated; +def int_arm_mve_clz_predicated: MVESimpleUnaryPredicated; +def int_arm_mve_cls_predicated: MVESimpleUnaryPredicated; +def int_arm_mve_vrintz_predicated: MVESimpleUnaryPredicated; +def int_arm_mve_vrintm_predicated: MVESimpleUnaryPredicated; +def int_arm_mve_vrintp_predicated: MVESimpleUnaryPredicated; +def int_arm_mve_vrinta_predicated: MVESimpleUnaryPredicated; +def int_arm_mve_vrintx_predicated: MVESimpleUnaryPredicated; +def int_arm_mve_vrintn_predicated: MVESimpleUnaryPredicated; + +def int_arm_mve_vrev_predicated: Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, llvm_i32_ty /* size to reverse */, + llvm_anyvector_ty, LLVMMatchType<0>], [IntrNoMem]>; + +def int_arm_mve_vmovl_predicated: Intrinsic<[llvm_anyvector_ty], + [llvm_anyvector_ty, llvm_i32_ty /* unsigned */, llvm_i32_ty /* top half */, + llvm_anyvector_ty /* predicate */, LLVMMatchType<0>], [IntrNoMem]>; +def int_arm_mve_vmovn_predicated: Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, llvm_anyvector_ty, llvm_i32_ty /* top half */, + llvm_anyvector_ty /* predicate */], [IntrNoMem]>; + } // end TargetPrefix diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td index 9a7886d6ecc5..07cea414a172 100644 --- a/llvm/lib/Target/ARM/ARMInstrMVE.td +++ b/llvm/lib/Target/ARM/ARMInstrMVE.td @@ -1320,28 +1320,29 @@ let Predicates = [HasMVEInt] in { (v4i32 (MVE_VREV32_8 (v4i32 MQPR:$src)))>; } -let Predicates = [HasMVEInt] in { - def : Pat<(v4i32 (ARMvrev64 (v4i32 MQPR:$src))), - (v4i32 (MVE_VREV64_32 (v4i32 MQPR:$src)))>; - def : Pat<(v8i16 (ARMvrev64 (v8i16 MQPR:$src))), - (v8i16 (MVE_VREV64_16 (v8i16 MQPR:$src)))>; - def : Pat<(v16i8 (ARMvrev64 (v16i8 MQPR:$src))), - (v16i8 (MVE_VREV64_8 (v16i8 MQPR:$src)))>; +multiclass MVE_VREV_basic_patterns VTIs, + Instruction Inst> { + defvar unpred_op = !cast("ARMvrev" # revbits); + + foreach VTI = VTIs in { + def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$src))), + (VTI.Vec (Inst (VTI.Vec MQPR:$src)))>; + def : Pat<(VTI.Vec (int_arm_mve_vrev_predicated (VTI.Vec MQPR:$src), + revbits, (VTI.Pred VCCR:$pred), (VTI.Vec MQPR:$inactive))), + (VTI.Vec (Inst (VTI.Vec MQPR:$src), ARMVCCThen, + (VTI.Pred VCCR:$pred), (VTI.Vec MQPR:$inactive)))>; + } +} - def : Pat<(v8i16 (ARMvrev32 (v8i16 MQPR:$src))), - (v8i16 (MVE_VREV32_16 (v8i16 MQPR:$src)))>; - def : Pat<(v16i8 (ARMvrev32 (v16i8 MQPR:$src))), - (v16i8 (MVE_VREV32_8 (v16i8 MQPR:$src)))>; +let Predicates = [HasMVEInt] in { + defm: MVE_VREV_basic_patterns<64, [MVE_v4i32, MVE_v4f32], MVE_VREV64_32>; + defm: MVE_VREV_basic_patterns<64, [MVE_v8i16, MVE_v8f16], MVE_VREV64_16>; + defm: MVE_VREV_basic_patterns<64, [MVE_v16i8 ], MVE_VREV64_8>; - def : Pat<(v16i8 (ARMvrev16 (v16i8 MQPR:$src))), - (v16i8 (MVE_VREV16_8 (v16i8 MQPR:$src)))>; + defm: MVE_VREV_basic_patterns<32, [MVE_v8i16, MVE_v8f16], MVE_VREV32_16>; + defm: MVE_VREV_basic_patterns<32, [MVE_v16i8 ], MVE_VREV32_8>; - def : Pat<(v4f32 (ARMvrev64 (v4f32 MQPR:$src))), - (v4f32 (MVE_VREV64_32 (v4f32 MQPR:$src)))>; - def : Pat<(v8f16 (ARMvrev64 (v8f16 MQPR:$src))), - (v8f16 (MVE_VREV64_16 (v8f16 MQPR:$src)))>; - def : Pat<(v8f16 (ARMvrev32 (v8f16 MQPR:$src))), - (v8f16 (MVE_VREV32_16 (v8f16 MQPR:$src)))>; + defm: MVE_VREV_basic_patterns<16, [MVE_v16i8 ], MVE_VREV16_8>; } def MVE_VMVN : MVE_bit_arith<(outs MQPR:$Qd), (ins MQPR:$Qm), @@ -1356,14 +1357,14 @@ def MVE_VMVN : MVE_bit_arith<(outs MQPR:$Qd), (ins MQPR:$Qm), } let Predicates = [HasMVEInt] in { - def : Pat<(v16i8 (vnotq (v16i8 MQPR:$val1))), - (v16i8 (MVE_VMVN (v16i8 MQPR:$val1)))>; - def : Pat<(v8i16 (vnotq (v8i16 MQPR:$val1))), - (v8i16 (MVE_VMVN (v8i16 MQPR:$val1)))>; - def : Pat<(v4i32 (vnotq (v4i32 MQPR:$val1))), - (v4i32 (MVE_VMVN (v4i32 MQPR:$val1)))>; - def : Pat<(v2i64 (vnotq (v2i64 MQPR:$val1))), - (v2i64 (MVE_VMVN (v2i64 MQPR:$val1)))>; + foreach VTI = [ MVE_v16i8, MVE_v8i16, MVE_v4i32, MVE_v2i64 ] in { + def : Pat<(VTI.Vec (vnotq (VTI.Vec MQPR:$val1))), + (VTI.Vec (MVE_VMVN (VTI.Vec MQPR:$val1)))>; + def : Pat<(VTI.Vec (int_arm_mve_mvn_predicated (VTI.Vec MQPR:$val1), + (VTI.Pred VCCR:$pred), (VTI.Vec MQPR:$inactive))), + (VTI.Vec (MVE_VMVN (VTI.Vec MQPR:$val1), ARMVCCThen, + (VTI.Pred VCCR:$pred), (VTI.Vec MQPR:$inactive)))>; + } } class MVE_bit_ops bit_21_20, bit bit_28> @@ -2175,39 +2176,43 @@ class MVE_VCLSCLZ size, let validForTailPredication = 1; } -def MVE_VCLSs8 : MVE_VCLSCLZ<"vcls", "s8", 0b00, 0b0>; -def MVE_VCLSs16 : MVE_VCLSCLZ<"vcls", "s16", 0b01, 0b0>; -def MVE_VCLSs32 : MVE_VCLSCLZ<"vcls", "s32", 0b10, 0b0>; +multiclass MVE_VCLSCLZ_p { + def "": MVE_VCLSCLZ<"v"#opname, VTI.Suffix, VTI.Size, opcode>; -def MVE_VCLZs8 : MVE_VCLSCLZ<"vclz", "i8", 0b00, 0b1>; -def MVE_VCLZs16 : MVE_VCLSCLZ<"vclz", "i16", 0b01, 0b1>; -def MVE_VCLZs32 : MVE_VCLSCLZ<"vclz", "i32", 0b10, 0b1>; + defvar Inst = !cast(NAME); + defvar pred_int = !cast("int_arm_mve_"#opname#"_predicated"); -let Predicates = [HasMVEInt] in { - def : Pat<(v16i8 ( ctlz (v16i8 MQPR:$val1))), - (v16i8 ( MVE_VCLZs8 (v16i8 MQPR:$val1)))>; - def : Pat<(v4i32 ( ctlz (v4i32 MQPR:$val1))), - (v4i32 ( MVE_VCLZs32 (v4i32 MQPR:$val1)))>; - def : Pat<(v8i16 ( ctlz (v8i16 MQPR:$val1))), - (v8i16 ( MVE_VCLZs16 (v8i16 MQPR:$val1)))>; - - def : Pat<(v16i8 ( int_arm_mve_vcls (v16i8 MQPR:$val1))), - (v16i8 ( MVE_VCLSs8 (v16i8 MQPR:$val1)))>; - def : Pat<(v4i32 ( int_arm_mve_vcls (v4i32 MQPR:$val1))), - (v4i32 ( MVE_VCLSs32 (v4i32 MQPR:$val1)))>; - def : Pat<(v8i16 ( int_arm_mve_vcls (v8i16 MQPR:$val1))), - (v8i16 ( MVE_VCLSs16 (v8i16 MQPR:$val1)))>; + let Predicates = [HasMVEInt] in { + def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$val))), + (VTI.Vec (Inst (VTI.Vec MQPR:$val)))>; + def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$val), (VTI.Pred VCCR:$pred), + (VTI.Vec MQPR:$inactive))), + (VTI.Vec (Inst (VTI.Vec MQPR:$val), ARMVCCThen, + (VTI.Pred VCCR:$pred), (VTI.Vec MQPR:$inactive)))>; + } } +defm MVE_VCLSs8 : MVE_VCLSCLZ_p<"cls", 0, MVE_v16s8, int_arm_mve_vcls>; +defm MVE_VCLSs16 : MVE_VCLSCLZ_p<"cls", 0, MVE_v8s16, int_arm_mve_vcls>; +defm MVE_VCLSs32 : MVE_VCLSCLZ_p<"cls", 0, MVE_v4s32, int_arm_mve_vcls>; + +defm MVE_VCLZs8 : MVE_VCLSCLZ_p<"clz", 1, MVE_v16i8, ctlz>; +defm MVE_VCLZs16 : MVE_VCLSCLZ_p<"clz", 1, MVE_v8i16, ctlz>; +defm MVE_VCLZs32 : MVE_VCLSCLZ_p<"clz", 1, MVE_v4i32, ctlz>; + class MVE_VABSNEG_int size, bit negate, - list pattern=[]> + bit saturate, list pattern=[]> : MVEIntSingleSrc { let Inst{28} = 0b1; let Inst{25-23} = 0b111; let Inst{21-20} = 0b11; - let Inst{17-16} = 0b01; - let Inst{12-8} = 0b00011; + let Inst{17} = 0b0; + let Inst{16} = !eq(saturate, 0); + let Inst{12-11} = 0b00; + let Inst{10} = saturate; + let Inst{9-8} = 0b11; let Inst{7} = negate; let Inst{6} = 0b1; let Inst{4} = 0b0; @@ -2215,61 +2220,40 @@ class MVE_VABSNEG_int size, bit negate, let validForTailPredication = 1; } -def MVE_VABSs8 : MVE_VABSNEG_int<"vabs", "s8", 0b00, 0b0>; -def MVE_VABSs16 : MVE_VABSNEG_int<"vabs", "s16", 0b01, 0b0>; -def MVE_VABSs32 : MVE_VABSNEG_int<"vabs", "s32", 0b10, 0b0>; - -let Predicates = [HasMVEInt] in { - def : Pat<(v16i8 (abs (v16i8 MQPR:$v))), - (v16i8 (MVE_VABSs8 $v))>; - def : Pat<(v8i16 (abs (v8i16 MQPR:$v))), - (v8i16 (MVE_VABSs16 $v))>; - def : Pat<(v4i32 (abs (v4i32 MQPR:$v))), - (v4i32 (MVE_VABSs32 $v))>; -} +multiclass MVE_VABSNEG_int_m { + def "" : MVE_VABSNEG_int; + defvar Inst = !cast(NAME); -def MVE_VNEGs8 : MVE_VABSNEG_int<"vneg", "s8", 0b00, 0b1>; -def MVE_VNEGs16 : MVE_VABSNEG_int<"vneg", "s16", 0b01, 0b1>; -def MVE_VNEGs32 : MVE_VABSNEG_int<"vneg", "s32", 0b10, 0b1>; + let Predicates = [HasMVEInt] in { + // VQABS and VQNEG have more difficult isel patterns defined elsewhere + if !eq(saturate, 0) then { + def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$v))), (VTI.Vec (Inst $v))>; + } -let Predicates = [HasMVEInt] in { - def : Pat<(v16i8 (vnegq (v16i8 MQPR:$v))), - (v16i8 (MVE_VNEGs8 $v))>; - def : Pat<(v8i16 (vnegq (v8i16 MQPR:$v))), - (v8i16 (MVE_VNEGs16 $v))>; - def : Pat<(v4i32 (vnegq (v4i32 MQPR:$v))), - (v4i32 (MVE_VNEGs32 $v))>; + def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$v), (VTI.Pred VCCR:$mask), + (VTI.Vec MQPR:$inactive))), + (VTI.Vec (Inst $v, ARMVCCThen, $mask, $inactive))>; + } } -class MVE_VQABSNEG size, - bit negate, list pattern=[]> - : MVEIntSingleSrc { - - let Inst{28} = 0b1; - let Inst{25-23} = 0b111; - let Inst{21-20} = 0b11; - let Inst{17-16} = 0b00; - let Inst{12-8} = 0b00111; - let Inst{7} = negate; - let Inst{6} = 0b1; - let Inst{4} = 0b0; - let Inst{0} = 0b0; - let validForTailPredication = 1; +foreach VTI = [ MVE_v16s8, MVE_v8s16, MVE_v4s32 ] in { + defm "MVE_VABS" # VTI.Suffix : MVE_VABSNEG_int_m< + "vabs", 0, 0, abs, int_arm_mve_abs_predicated, VTI>; + defm "MVE_VQABS" # VTI.Suffix : MVE_VABSNEG_int_m< + "vqabs", 0, 1, ?, int_arm_mve_qabs_predicated, VTI>; + defm "MVE_VNEG" # VTI.Suffix : MVE_VABSNEG_int_m< + "vneg", 1, 0, vnegq, int_arm_mve_neg_predicated, VTI>; + defm "MVE_VQNEG" # VTI.Suffix : MVE_VABSNEG_int_m< + "vqneg", 1, 1, ?, int_arm_mve_qneg_predicated, VTI>; } -def MVE_VQABSs8 : MVE_VQABSNEG<"vqabs", "s8", 0b00, 0b0>; -def MVE_VQABSs16 : MVE_VQABSNEG<"vqabs", "s16", 0b01, 0b0>; -def MVE_VQABSs32 : MVE_VQABSNEG<"vqabs", "s32", 0b10, 0b0>; - -def MVE_VQNEGs8 : MVE_VQABSNEG<"vqneg", "s8", 0b00, 0b1>; -def MVE_VQNEGs16 : MVE_VQABSNEG<"vqneg", "s16", 0b01, 0b1>; -def MVE_VQNEGs32 : MVE_VQABSNEG<"vqneg", "s32", 0b10, 0b1>; - // int_min/int_max: vector containing INT_MIN/INT_MAX VTI.Size times // zero_vec: v4i32-initialized zero vector, potentially wrapped in a bitconvert multiclass vqabsneg_pattern { + dag zero_vec, MVE_VABSNEG_int vqabs_instruction, + MVE_VABSNEG_int vqneg_instruction> { let Predicates = [HasMVEInt] in { // The below tree can be replaced by a vqabs instruction, as it represents // the following vectorized expression (r being the value in $reg): @@ -2470,7 +2454,7 @@ class MVE_shift_imm sz, bit U, +class MVE_VMOVL sz, bit U, bit top, list pattern=[]> : MVE_shift_imm<(outs MQPR:$Qd), (ins MQPR:$Qm), iname, suffix, "$Qd, $Qm", vpred_r, "", @@ -2480,25 +2464,35 @@ class MVE_VMOVL sz, bit U, let Inst{21} = 0b1; let Inst{20-19} = sz{1-0}; let Inst{18-16} = 0b000; + let Inst{12} = top; let Inst{11-6} = 0b111101; let Inst{4} = 0b0; let Inst{0} = 0b0; } -multiclass MVE_VMOVL_shift_half sz, bit U, - list pattern=[]> { - def bh : MVE_VMOVL { - let Inst{12} = 0b0; - } - def th : MVE_VMOVL { - let Inst{12} = 0b1; - } +multiclass MVE_VMOVL_m { + def "": MVE_VMOVL<"vmovl" # chr, InVTI.Suffix, OutVTI.Size, + InVTI.Unsigned, top>; + defvar Inst = !cast(NAME); + + def : Pat<(OutVTI.Vec (int_arm_mve_vmovl_predicated (InVTI.Vec MQPR:$src), + (i32 InVTI.Unsigned), (i32 top), + (OutVTI.Pred VCCR:$pred), + (OutVTI.Vec MQPR:$inactive))), + (OutVTI.Vec (Inst (InVTI.Vec MQPR:$src), ARMVCCThen, + (OutVTI.Pred VCCR:$pred), + (OutVTI.Vec MQPR:$inactive)))>; } -defm MVE_VMOVLs8 : MVE_VMOVL_shift_half<"vmovl", "s8", 0b01, 0b0>; -defm MVE_VMOVLu8 : MVE_VMOVL_shift_half<"vmovl", "u8", 0b01, 0b1>; -defm MVE_VMOVLs16 : MVE_VMOVL_shift_half<"vmovl", "s16", 0b10, 0b0>; -defm MVE_VMOVLu16 : MVE_VMOVL_shift_half<"vmovl", "u16", 0b10, 0b1>; +defm MVE_VMOVLs8bh : MVE_VMOVL_m<0, "b", MVE_v8s16, MVE_v16s8>; +defm MVE_VMOVLs8th : MVE_VMOVL_m<1, "t", MVE_v8s16, MVE_v16s8>; +defm MVE_VMOVLu8bh : MVE_VMOVL_m<0, "b", MVE_v8u16, MVE_v16u8>; +defm MVE_VMOVLu8th : MVE_VMOVL_m<1, "t", MVE_v8u16, MVE_v16u8>; +defm MVE_VMOVLs16bh : MVE_VMOVL_m<0, "b", MVE_v4s32, MVE_v8s16>; +defm MVE_VMOVLs16th : MVE_VMOVL_m<1, "t", MVE_v4s32, MVE_v8s16>; +defm MVE_VMOVLu16bh : MVE_VMOVL_m<0, "b", MVE_v4s32, MVE_v8u16>; +defm MVE_VMOVLu16th : MVE_VMOVL_m<1, "t", MVE_v4s32, MVE_v8u16>; let Predicates = [HasMVEInt] in { def : Pat<(sext_inreg (v4i32 MQPR:$src), v4i16), @@ -3277,45 +3271,34 @@ class MVE_VRINT op, string suffix, bits<2> size, } -multiclass MVE_VRINT_ops size, list pattern=[]> { - def N : MVE_VRINT<"n", 0b000, suffix, size, pattern>; - def X : MVE_VRINT<"x", 0b001, suffix, size, pattern>; - def A : MVE_VRINT<"a", 0b010, suffix, size, pattern>; - def Z : MVE_VRINT<"z", 0b011, suffix, size, pattern>; - def M : MVE_VRINT<"m", 0b101, suffix, size, pattern>; - def P : MVE_VRINT<"p", 0b111, suffix, size, pattern>; -} +multiclass MVE_VRINT_m opcode, + SDNode unpred_op> { + def "": MVE_VRINT; + defvar Inst = !cast(NAME); + defvar pred_int = !cast("int_arm_mve_vrint"#suffix#"_predicated"); -defm MVE_VRINTf16 : MVE_VRINT_ops<"f16", 0b01>; -defm MVE_VRINTf32 : MVE_VRINT_ops<"f32", 0b10>; + let Predicates = [HasMVEFloat] in { + def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$val))), + (VTI.Vec (Inst (VTI.Vec MQPR:$val)))>; + def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$val), (VTI.Pred VCCR:$pred), + (VTI.Vec MQPR:$inactive))), + (VTI.Vec (Inst (VTI.Vec MQPR:$val), ARMVCCThen, + (VTI.Pred VCCR:$pred), (VTI.Vec MQPR:$inactive)))>; + } +} -let Predicates = [HasMVEFloat] in { - def : Pat<(v4f32 (frint (v4f32 MQPR:$val1))), - (v4f32 (MVE_VRINTf32X (v4f32 MQPR:$val1)))>; - def : Pat<(v8f16 (frint (v8f16 MQPR:$val1))), - (v8f16 (MVE_VRINTf16X (v8f16 MQPR:$val1)))>; - def : Pat<(v4f32 (fround (v4f32 MQPR:$val1))), - (v4f32 (MVE_VRINTf32A (v4f32 MQPR:$val1)))>; - def : Pat<(v8f16 (fround (v8f16 MQPR:$val1))), - (v8f16 (MVE_VRINTf16A (v8f16 MQPR:$val1)))>; - def : Pat<(v4f32 (ftrunc (v4f32 MQPR:$val1))), - (v4f32 (MVE_VRINTf32Z (v4f32 MQPR:$val1)))>; - def : Pat<(v8f16 (ftrunc (v8f16 MQPR:$val1))), - (v8f16 (MVE_VRINTf16Z (v8f16 MQPR:$val1)))>; - def : Pat<(v4f32 (ffloor (v4f32 MQPR:$val1))), - (v4f32 (MVE_VRINTf32M (v4f32 MQPR:$val1)))>; - def : Pat<(v8f16 (ffloor (v8f16 MQPR:$val1))), - (v8f16 (MVE_VRINTf16M (v8f16 MQPR:$val1)))>; - def : Pat<(v4f32 (fceil (v4f32 MQPR:$val1))), - (v4f32 (MVE_VRINTf32P (v4f32 MQPR:$val1)))>; - def : Pat<(v8f16 (fceil (v8f16 MQPR:$val1))), - (v8f16 (MVE_VRINTf16P (v8f16 MQPR:$val1)))>; - def : Pat<(v4f32 (int_arm_mve_vrintn (v4f32 MQPR:$val1))), - (v4f32 (MVE_VRINTf32N (v4f32 MQPR:$val1)))>; - def : Pat<(v8f16 (int_arm_mve_vrintn (v8f16 MQPR:$val1))), - (v8f16 (MVE_VRINTf16N (v8f16 MQPR:$val1)))>; +multiclass MVE_VRINT_ops { + defm N : MVE_VRINT_m; + defm X : MVE_VRINT_m; + defm A : MVE_VRINT_m; + defm Z : MVE_VRINT_m; + defm M : MVE_VRINT_m; + defm P : MVE_VRINT_m; } +defm MVE_VRINTf16 : MVE_VRINT_ops; +defm MVE_VRINTf32 : MVE_VRINT_ops; + class MVEFloatArithNeon pattern=[]> @@ -3692,7 +3675,7 @@ defm MVE_VCVTu16f16 : MVE_VCVT_fp_int_anpm_multi<"u16.f16", 0b01, 0b1>; defm MVE_VCVTs32f32 : MVE_VCVT_fp_int_anpm_multi<"s32.f32", 0b10, 0b0>; defm MVE_VCVTu32f32 : MVE_VCVT_fp_int_anpm_multi<"u32.f32", 0b10, 0b1>; -class MVE_VCVT_fp_int size, bits<2> op, +class MVE_VCVT_fp_int size, bit toint, bit unsigned, list pattern=[]> : MVE_float<"vcvt", suffix, (outs MQPR:$Qd), (ins MQPR:$Qm), "$Qd, $Qm", vpred_r, "", pattern> { @@ -3706,41 +3689,43 @@ class MVE_VCVT_fp_int size, bits<2> op, let Inst{17-16} = 0b11; let Inst{15-13} = Qd{2-0}; let Inst{12-9} = 0b0011; - let Inst{8-7} = op; + let Inst{8} = toint; + let Inst{7} = unsigned; let Inst{4} = 0b0; let validForTailPredication = 1; } +multiclass MVE_VCVT_fp_int_m { + defvar Unsigned = !or(!eq(Dest.SuffixLetter,"u"), !eq(Src.SuffixLetter,"u")); + defvar ToInt = !eq(Src.SuffixLetter,"f"); + + def "" : MVE_VCVT_fp_int; + defvar Inst = !cast(NAME); + + let Predicates = [HasMVEFloat] in { + def : Pat<(Dest.Vec (unpred_op (Src.Vec MQPR:$src))), + (Dest.Vec (Inst (Src.Vec MQPR:$src)))>; + def : Pat<(Dest.Vec (int_arm_mve_vcvt_fp_int_predicated + (Src.Vec MQPR:$src), (i32 Unsigned), + (Src.Pred VCCR:$mask), (Dest.Vec MQPR:$inactive))), + (Dest.Vec (Inst (Src.Vec MQPR:$src), ARMVCCThen, + (Src.Pred VCCR:$mask), + (Dest.Vec MQPR:$inactive)))>; + } +} // The unsuffixed VCVT for float->int implicitly rounds toward zero, // which I reflect here in the llvm instruction names -def MVE_VCVTs16f16z : MVE_VCVT_fp_int<"s16.f16", 0b01, 0b10>; -def MVE_VCVTu16f16z : MVE_VCVT_fp_int<"u16.f16", 0b01, 0b11>; -def MVE_VCVTs32f32z : MVE_VCVT_fp_int<"s32.f32", 0b10, 0b10>; -def MVE_VCVTu32f32z : MVE_VCVT_fp_int<"u32.f32", 0b10, 0b11>; +defm MVE_VCVTs16f16z : MVE_VCVT_fp_int_m; +defm MVE_VCVTu16f16z : MVE_VCVT_fp_int_m; +defm MVE_VCVTs32f32z : MVE_VCVT_fp_int_m; +defm MVE_VCVTu32f32z : MVE_VCVT_fp_int_m; // Whereas VCVT for int->float rounds to nearest -def MVE_VCVTf16s16n : MVE_VCVT_fp_int<"f16.s16", 0b01, 0b00>; -def MVE_VCVTf16u16n : MVE_VCVT_fp_int<"f16.u16", 0b01, 0b01>; -def MVE_VCVTf32s32n : MVE_VCVT_fp_int<"f32.s32", 0b10, 0b00>; -def MVE_VCVTf32u32n : MVE_VCVT_fp_int<"f32.u32", 0b10, 0b01>; - -let Predicates = [HasMVEFloat] in { - def : Pat<(v4i32 (fp_to_sint (v4f32 MQPR:$src))), - (v4i32 (MVE_VCVTs32f32z (v4f32 MQPR:$src)))>; - def : Pat<(v4i32 (fp_to_uint (v4f32 MQPR:$src))), - (v4i32 (MVE_VCVTu32f32z (v4f32 MQPR:$src)))>; - def : Pat<(v8i16 (fp_to_sint (v8f16 MQPR:$src))), - (v8i16 (MVE_VCVTs16f16z (v8f16 MQPR:$src)))>; - def : Pat<(v8i16 (fp_to_uint (v8f16 MQPR:$src))), - (v8i16 (MVE_VCVTu16f16z (v8f16 MQPR:$src)))>; - def : Pat<(v4f32 (sint_to_fp (v4i32 MQPR:$src))), - (v4f32 (MVE_VCVTf32s32n (v4i32 MQPR:$src)))>; - def : Pat<(v4f32 (uint_to_fp (v4i32 MQPR:$src))), - (v4f32 (MVE_VCVTf32u32n (v4i32 MQPR:$src)))>; - def : Pat<(v8f16 (sint_to_fp (v8i16 MQPR:$src))), - (v8f16 (MVE_VCVTf16s16n (v8i16 MQPR:$src)))>; - def : Pat<(v8f16 (uint_to_fp (v8i16 MQPR:$src))), - (v8f16 (MVE_VCVTf16u16n (v8i16 MQPR:$src)))>; -} +defm MVE_VCVTf16s16n : MVE_VCVT_fp_int_m; +defm MVE_VCVTf16u16n : MVE_VCVT_fp_int_m; +defm MVE_VCVTf32s32n : MVE_VCVT_fp_int_m; +defm MVE_VCVTf32u32n : MVE_VCVT_fp_int_m; class MVE_VABSNEG_fp size, bit negate, list pattern=[]> @@ -3761,26 +3746,29 @@ class MVE_VABSNEG_fp size, bit negate, let validForTailPredication = 1; } -def MVE_VABSf16 : MVE_VABSNEG_fp<"vabs", "f16", 0b01, 0b0>; -def MVE_VABSf32 : MVE_VABSNEG_fp<"vabs", "f32", 0b10, 0b0>; - -let Predicates = [HasMVEFloat] in { - def : Pat<(v8f16 (fabs MQPR:$src)), - (MVE_VABSf16 MQPR:$src)>; - def : Pat<(v4f32 (fabs MQPR:$src)), - (MVE_VABSf32 MQPR:$src)>; -} +multiclass MVE_VABSNEG_fp_m { + def "" : MVE_VABSNEG_fp; + defvar Inst = !cast(NAME); -def MVE_VNEGf16 : MVE_VABSNEG_fp<"vneg", "f16", 0b01, 0b1>; -def MVE_VNEGf32 : MVE_VABSNEG_fp<"vneg", "f32", 0b10, 0b1>; + let Predicates = [HasMVEInt] in { + def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$v))), (VTI.Vec (Inst $v))>; -let Predicates = [HasMVEFloat] in { - def : Pat<(v8f16 (fneg MQPR:$src)), - (MVE_VNEGf16 MQPR:$src)>; - def : Pat<(v4f32 (fneg MQPR:$src)), - (MVE_VNEGf32 MQPR:$src)>; + def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$v), (VTI.Pred VCCR:$mask), + (VTI.Vec MQPR:$inactive))), + (VTI.Vec (Inst $v, ARMVCCThen, $mask, $inactive))>; + } } +defm MVE_VABSf16 : MVE_VABSNEG_fp_m<"vabs", fabs, int_arm_mve_abs_predicated, + MVE_v8f16, 0>; +defm MVE_VABSf32 : MVE_VABSNEG_fp_m<"vabs", fabs, int_arm_mve_abs_predicated, + MVE_v4f32, 0>; +defm MVE_VNEGf16 : MVE_VABSNEG_fp_m<"vneg", fneg, int_arm_mve_neg_predicated, + MVE_v8f16, 1>; +defm MVE_VNEGf32 : MVE_VABSNEG_fp_m<"vneg", fneg, int_arm_mve_neg_predicated, + MVE_v4f32, 1>; + class MVE_VMAXMINNMA pattern=[]> : MVE_f<(outs MQPR:$Qd), (ins MQPR:$Qd_src, MQPR:$Qm), @@ -4427,23 +4415,42 @@ defm MVE_VQMOVUNs16 : MVE_VxMOVxN_halves<"vqmovun", "s16", 0b0, 0b0, 0b00>; defm MVE_VQMOVUNs32 : MVE_VxMOVxN_halves<"vqmovun", "s32", 0b0, 0b0, 0b01>; def MVEvmovn : SDNode<"ARMISD::VMOVN", SDTARMVEXT>; -let Predicates = [HasMVEInt] in { - def : Pat<(v8i16 (MVEvmovn (v8i16 MQPR:$Qd_src), (v8i16 MQPR:$Qm), (i32 0))), - (v8i16 (MVE_VMOVNi32bh (v8i16 MQPR:$Qd_src), (v8i16 MQPR:$Qm)))>; - def : Pat<(v8i16 (MVEvmovn (v8i16 MQPR:$Qd_src), (v8i16 MQPR:$Qm), (i32 1))), - (v8i16 (MVE_VMOVNi32th (v8i16 MQPR:$Qd_src), (v8i16 MQPR:$Qm)))>; - def : Pat<(v16i8 (MVEvmovn (v16i8 MQPR:$Qd_src), (v16i8 MQPR:$Qm), (i32 0))), - (v16i8 (MVE_VMOVNi16bh (v16i8 MQPR:$Qd_src), (v16i8 MQPR:$Qm)))>; - def : Pat<(v16i8 (MVEvmovn (v16i8 MQPR:$Qd_src), (v16i8 MQPR:$Qm), (i32 1))), - (v16i8 (MVE_VMOVNi16th (v16i8 MQPR:$Qd_src), (v16i8 MQPR:$Qm)))>; - - def : Pat<(v8i16 (MVEvmovn (v8i16 MQPR:$Qm), - (v8i16 (ARMvrev32 MQPR:$Qd_src)), (i32 1))), - (v8i16 (MVE_VMOVNi32bh (v8i16 MQPR:$Qd_src), (v8i16 MQPR:$Qm)))>; - def : Pat<(v16i8 (MVEvmovn (v16i8 MQPR:$Qm), - (v16i8 (ARMvrev16 MQPR:$Qd_src)), (i32 1))), - (v16i8 (MVE_VMOVNi16bh (v16i8 MQPR:$Qd_src), (v16i8 MQPR:$Qm)))>; -} + +multiclass MVE_VMOVN_p { + // Match the most obvious MVEvmovn(a,b,t), which overwrites the odd or even + // lanes of a (depending on t) with the even lanes of b. + def : Pat<(VTI.Vec (MVEvmovn (VTI.Vec MQPR:$Qd_src), + (VTI.Vec MQPR:$Qm), (i32 top))), + (VTI.Vec (Inst (VTI.Vec MQPR:$Qd_src), (VTI.Vec MQPR:$Qm)))>; + + if !eq(top, 0) then { + // If we see MVEvmovn(a,ARMvrev(b),1), that wants to overwrite the odd + // lanes of a with the odd lanes of b. In other words, the lanes we're + // _keeping_ from a are the even ones. So we can flip it round and say that + // this is the same as overwriting the even lanes of b with the even lanes + // of a, i.e. it's a VMOVNB with the operands reversed. + defvar vrev = !cast("ARMvrev" # InVTI.LaneBits); + def : Pat<(VTI.Vec (MVEvmovn (VTI.Vec MQPR:$Qm), + (VTI.Vec (vrev MQPR:$Qd_src)), (i32 1))), + (VTI.Vec (Inst (VTI.Vec MQPR:$Qd_src), (VTI.Vec MQPR:$Qm)))>; + } + + // Match the IR intrinsic for a predicated VMOVN. This regards the Qm input + // as having wider lanes that we're narrowing, instead of already-narrow + // lanes that we're taking every other one of. + def : Pat<(VTI.Vec (int_arm_mve_vmovn_predicated (VTI.Vec MQPR:$Qd_src), + (InVTI.Vec MQPR:$Qm), (i32 top), + (InVTI.Pred VCCR:$pred))), + (VTI.Vec (Inst (VTI.Vec MQPR:$Qd_src), + (InVTI.Vec MQPR:$Qm), + ARMVCCThen, (InVTI.Pred VCCR:$pred)))>; +} + +defm : MVE_VMOVN_p; +defm : MVE_VMOVN_p; +defm : MVE_VMOVN_p; +defm : MVE_VMOVN_p; class MVE_VCVT_ff @test_vmvnq_m_s8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vmvnq_m_s8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vmvnt q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) + %2 = tail call <16 x i8> @llvm.arm.mve.mvn.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i1> %1, <16 x i8> %inactive) + ret <16 x i8> %2 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vmvnq_m_s16(<8 x i16> %inactive, <8 x i16> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vmvnq_m_s16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vmvnt q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <8 x i16> @llvm.arm.mve.mvn.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i1> %1, <8 x i16> %inactive) + ret <8 x i16> %2 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vmvnq_m_s32(<4 x i32> %inactive, <4 x i32> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vmvnq_m_s32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vmvnt q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = tail call <4 x i32> @llvm.arm.mve.mvn.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i1> %1, <4 x i32> %inactive) + ret <4 x i32> %2 +} + +define arm_aapcs_vfpcc <16 x i8> @test_vmvnq_m_u8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vmvnq_m_u8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vmvnt q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) + %2 = tail call <16 x i8> @llvm.arm.mve.mvn.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i1> %1, <16 x i8> %inactive) + ret <16 x i8> %2 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vmvnq_m_u16(<8 x i16> %inactive, <8 x i16> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vmvnq_m_u16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vmvnt q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <8 x i16> @llvm.arm.mve.mvn.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i1> %1, <8 x i16> %inactive) + ret <8 x i16> %2 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vmvnq_m_u32(<4 x i32> %inactive, <4 x i32> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vmvnq_m_u32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vmvnt q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = tail call <4 x i32> @llvm.arm.mve.mvn.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i1> %1, <4 x i32> %inactive) + ret <4 x i32> %2 +} + +define arm_aapcs_vfpcc <8 x half> @test_vnegq_m_f16(<8 x half> %inactive, <8 x half> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vnegq_m_f16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vnegt.f16 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <8 x half> @llvm.arm.mve.neg.predicated.v8f16.v8i1(<8 x half> %a, <8 x i1> %1, <8 x half> %inactive) + ret <8 x half> %2 +} + +define arm_aapcs_vfpcc <4 x float> @test_vnegq_m_f32(<4 x float> %inactive, <4 x float> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vnegq_m_f32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vnegt.f32 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = tail call <4 x float> @llvm.arm.mve.neg.predicated.v4f32.v4i1(<4 x float> %a, <4 x i1> %1, <4 x float> %inactive) + ret <4 x float> %2 +} + +define arm_aapcs_vfpcc <16 x i8> @test_vnegq_m_s8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vnegq_m_s8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vnegt.s8 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) + %2 = tail call <16 x i8> @llvm.arm.mve.neg.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i1> %1, <16 x i8> %inactive) + ret <16 x i8> %2 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vnegq_m_s16(<8 x i16> %inactive, <8 x i16> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vnegq_m_s16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vnegt.s16 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <8 x i16> @llvm.arm.mve.neg.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i1> %1, <8 x i16> %inactive) + ret <8 x i16> %2 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vnegq_m_s32(<4 x i32> %inactive, <4 x i32> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vnegq_m_s32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vnegt.s32 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = tail call <4 x i32> @llvm.arm.mve.neg.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i1> %1, <4 x i32> %inactive) + ret <4 x i32> %2 +} + +define arm_aapcs_vfpcc <8 x half> @test_vabsq_m_f16(<8 x half> %inactive, <8 x half> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vabsq_m_f16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vabst.f16 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <8 x half> @llvm.arm.mve.abs.predicated.v8f16.v8i1(<8 x half> %a, <8 x i1> %1, <8 x half> %inactive) + ret <8 x half> %2 +} + +define arm_aapcs_vfpcc <4 x float> @test_vabsq_m_f32(<4 x float> %inactive, <4 x float> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vabsq_m_f32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vabst.f32 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = tail call <4 x float> @llvm.arm.mve.abs.predicated.v4f32.v4i1(<4 x float> %a, <4 x i1> %1, <4 x float> %inactive) + ret <4 x float> %2 +} + +define arm_aapcs_vfpcc <16 x i8> @test_vabsq_m_s8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vabsq_m_s8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vabst.s8 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) + %2 = tail call <16 x i8> @llvm.arm.mve.abs.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i1> %1, <16 x i8> %inactive) + ret <16 x i8> %2 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vabsq_m_s16(<8 x i16> %inactive, <8 x i16> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vabsq_m_s16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vabst.s16 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <8 x i16> @llvm.arm.mve.abs.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i1> %1, <8 x i16> %inactive) + ret <8 x i16> %2 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vabsq_m_s32(<4 x i32> %inactive, <4 x i32> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vabsq_m_s32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vabst.s32 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = tail call <4 x i32> @llvm.arm.mve.abs.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i1> %1, <4 x i32> %inactive) + ret <4 x i32> %2 +} + +define arm_aapcs_vfpcc <16 x i8> @test_vqnegq_m_s8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vqnegq_m_s8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vqnegt.s8 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) + %2 = tail call <16 x i8> @llvm.arm.mve.qneg.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i1> %1, <16 x i8> %inactive) + ret <16 x i8> %2 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vqnegq_m_s16(<8 x i16> %inactive, <8 x i16> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vqnegq_m_s16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vqnegt.s16 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <8 x i16> @llvm.arm.mve.qneg.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i1> %1, <8 x i16> %inactive) + ret <8 x i16> %2 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vqnegq_m_s32(<4 x i32> %inactive, <4 x i32> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vqnegq_m_s32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vqnegt.s32 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = tail call <4 x i32> @llvm.arm.mve.qneg.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i1> %1, <4 x i32> %inactive) + ret <4 x i32> %2 +} + +define arm_aapcs_vfpcc <16 x i8> @test_vqabsq_m_s8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vqabsq_m_s8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vqabst.s8 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) + %2 = tail call <16 x i8> @llvm.arm.mve.qabs.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i1> %1, <16 x i8> %inactive) + ret <16 x i8> %2 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vqabsq_m_s16(<8 x i16> %inactive, <8 x i16> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vqabsq_m_s16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vqabst.s16 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <8 x i16> @llvm.arm.mve.qabs.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i1> %1, <8 x i16> %inactive) + ret <8 x i16> %2 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vqabsq_m_s32(<4 x i32> %inactive, <4 x i32> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vqabsq_m_s32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vqabst.s32 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = tail call <4 x i32> @llvm.arm.mve.qabs.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i1> %1, <4 x i32> %inactive) + ret <4 x i32> %2 +} + +declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32) +declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) +declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) + +declare <16 x i8> @llvm.arm.mve.mvn.predicated.v16i8.v16i1(<16 x i8>, <16 x i1>, <16 x i8>) +declare <8 x i16> @llvm.arm.mve.mvn.predicated.v8i16.v8i1(<8 x i16>, <8 x i1>, <8 x i16>) +declare <4 x i32> @llvm.arm.mve.mvn.predicated.v4i32.v4i1(<4 x i32>, <4 x i1>, <4 x i32>) +declare <8 x half> @llvm.arm.mve.neg.predicated.v8f16.v8i1(<8 x half>, <8 x i1>, <8 x half>) +declare <4 x float> @llvm.arm.mve.neg.predicated.v4f32.v4i1(<4 x float>, <4 x i1>, <4 x float>) +declare <16 x i8> @llvm.arm.mve.neg.predicated.v16i8.v16i1(<16 x i8>, <16 x i1>, <16 x i8>) +declare <8 x i16> @llvm.arm.mve.neg.predicated.v8i16.v8i1(<8 x i16>, <8 x i1>, <8 x i16>) +declare <4 x i32> @llvm.arm.mve.neg.predicated.v4i32.v4i1(<4 x i32>, <4 x i1>, <4 x i32>) +declare <8 x half> @llvm.arm.mve.abs.predicated.v8f16.v8i1(<8 x half>, <8 x i1>, <8 x half>) +declare <4 x float> @llvm.arm.mve.abs.predicated.v4f32.v4i1(<4 x float>, <4 x i1>, <4 x float>) +declare <16 x i8> @llvm.arm.mve.abs.predicated.v16i8.v16i1(<16 x i8>, <16 x i1>, <16 x i8>) +declare <8 x i16> @llvm.arm.mve.abs.predicated.v8i16.v8i1(<8 x i16>, <8 x i1>, <8 x i16>) +declare <4 x i32> @llvm.arm.mve.abs.predicated.v4i32.v4i1(<4 x i32>, <4 x i1>, <4 x i32>) +declare <16 x i8> @llvm.arm.mve.qneg.predicated.v16i8.v16i1(<16 x i8>, <16 x i1>, <16 x i8>) +declare <8 x i16> @llvm.arm.mve.qneg.predicated.v8i16.v8i1(<8 x i16>, <8 x i1>, <8 x i16>) +declare <4 x i32> @llvm.arm.mve.qneg.predicated.v4i32.v4i1(<4 x i32>, <4 x i1>, <4 x i32>) +declare <16 x i8> @llvm.arm.mve.qabs.predicated.v16i8.v16i1(<16 x i8>, <16 x i1>, <16 x i8>) +declare <8 x i16> @llvm.arm.mve.qabs.predicated.v8i16.v8i1(<8 x i16>, <8 x i1>, <8 x i16>) +declare <4 x i32> @llvm.arm.mve.qabs.predicated.v4i32.v4i1(<4 x i32>, <4 x i1>, <4 x i32>) diff --git a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vclzcls-predicated.ll b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vclzcls-predicated.ll new file mode 100644 index 000000000000..1f0f588d8538 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vclzcls-predicated.ll @@ -0,0 +1,138 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s + +define arm_aapcs_vfpcc <16 x i8> @test_vclsq_m_s8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vclsq_m_s8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vclst.s8 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) + %2 = tail call <16 x i8> @llvm.arm.mve.cls.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i1> %1, <16 x i8> %inactive) + ret <16 x i8> %2 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vclsq_m_s16(<8 x i16> %inactive, <8 x i16> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vclsq_m_s16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vclst.s16 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <8 x i16> @llvm.arm.mve.cls.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i1> %1, <8 x i16> %inactive) + ret <8 x i16> %2 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vclsq_m_s32(<4 x i32> %inactive, <4 x i32> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vclsq_m_s32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vclst.s32 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = tail call <4 x i32> @llvm.arm.mve.cls.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i1> %1, <4 x i32> %inactive) + ret <4 x i32> %2 +} + +define arm_aapcs_vfpcc <16 x i8> @test_vclzq_m_s8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vclzq_m_s8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vclzt.i8 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) + %2 = tail call <16 x i8> @llvm.arm.mve.clz.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i1> %1, <16 x i8> %inactive) + ret <16 x i8> %2 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vclzq_m_s16(<8 x i16> %inactive, <8 x i16> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vclzq_m_s16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vclzt.i16 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <8 x i16> @llvm.arm.mve.clz.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i1> %1, <8 x i16> %inactive) + ret <8 x i16> %2 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vclzq_m_s32(<4 x i32> %inactive, <4 x i32> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vclzq_m_s32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vclzt.i32 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = tail call <4 x i32> @llvm.arm.mve.clz.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i1> %1, <4 x i32> %inactive) + ret <4 x i32> %2 +} + +define arm_aapcs_vfpcc <16 x i8> @test_vclzq_m_u8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vclzq_m_u8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vclzt.i8 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) + %2 = tail call <16 x i8> @llvm.arm.mve.clz.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i1> %1, <16 x i8> %inactive) + ret <16 x i8> %2 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vclzq_m_u16(<8 x i16> %inactive, <8 x i16> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vclzq_m_u16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vclzt.i16 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <8 x i16> @llvm.arm.mve.clz.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i1> %1, <8 x i16> %inactive) + ret <8 x i16> %2 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vclzq_m_u32(<4 x i32> %inactive, <4 x i32> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vclzq_m_u32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vclzt.i32 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = tail call <4 x i32> @llvm.arm.mve.clz.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i1> %1, <4 x i32> %inactive) + ret <4 x i32> %2 +} + +declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32) +declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) +declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) +declare <16 x i8> @llvm.arm.mve.cls.predicated.v16i8.v16i1(<16 x i8>, <16 x i1>, <16 x i8>) +declare <8 x i16> @llvm.arm.mve.cls.predicated.v8i16.v8i1(<8 x i16>, <8 x i1>, <8 x i16>) +declare <4 x i32> @llvm.arm.mve.cls.predicated.v4i32.v4i1(<4 x i32>, <4 x i1>, <4 x i32>) +declare <16 x i8> @llvm.arm.mve.clz.predicated.v16i8.v16i1(<16 x i8>, <16 x i1>, <16 x i8>) +declare <8 x i16> @llvm.arm.mve.clz.predicated.v8i16.v8i1(<8 x i16>, <8 x i1>, <8 x i16>) +declare <4 x i32> @llvm.arm.mve.clz.predicated.v4i32.v4i1(<4 x i32>, <4 x i1>, <4 x i32>) diff --git a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vcvt-fp-int.ll b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vcvt-fp-int.ll new file mode 100644 index 000000000000..d3373b3d79c3 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vcvt-fp-int.ll @@ -0,0 +1,122 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s + +define arm_aapcs_vfpcc <8 x half> @test_vcvtq_m_f16_s16(<8 x half> %inactive, <8 x i16> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vcvtq_m_f16_s16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vcvtt.f16.s16 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <8 x half> @llvm.arm.mve.vcvt.fp.int.predicated.v8f16.v8i16.v8i1(<8 x i16> %a, i32 0, <8 x i1> %1, <8 x half> %inactive) + ret <8 x half> %2 +} + +define arm_aapcs_vfpcc <8 x half> @test_vcvtq_m_f16_u16(<8 x half> %inactive, <8 x i16> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vcvtq_m_f16_u16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vcvtt.f16.u16 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <8 x half> @llvm.arm.mve.vcvt.fp.int.predicated.v8f16.v8i16.v8i1(<8 x i16> %a, i32 1, <8 x i1> %1, <8 x half> %inactive) + ret <8 x half> %2 +} + +define arm_aapcs_vfpcc <4 x float> @test_vcvtq_m_f32_s32(<4 x float> %inactive, <4 x i32> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vcvtq_m_f32_s32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vcvtt.f32.s32 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = tail call <4 x float> @llvm.arm.mve.vcvt.fp.int.predicated.v4f32.v4i32.v4i1(<4 x i32> %a, i32 0, <4 x i1> %1, <4 x float> %inactive) + ret <4 x float> %2 +} + +define arm_aapcs_vfpcc <4 x float> @test_vcvtq_m_f32_u32(<4 x float> %inactive, <4 x i32> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vcvtq_m_f32_u32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vcvtt.f32.u32 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = tail call <4 x float> @llvm.arm.mve.vcvt.fp.int.predicated.v4f32.v4i32.v4i1(<4 x i32> %a, i32 1, <4 x i1> %1, <4 x float> %inactive) + ret <4 x float> %2 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vcvtq_m_s16_f16(<8 x i16> %inactive, <8 x half> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vcvtq_m_s16_f16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vcvtt.s16.f16 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <8 x i16> @llvm.arm.mve.vcvt.fp.int.predicated.v8i16.v8f16.v8i1(<8 x half> %a, i32 0, <8 x i1> %1, <8 x i16> %inactive) + ret <8 x i16> %2 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vcvtq_m_s32_f32(<4 x i32> %inactive, <4 x float> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vcvtq_m_s32_f32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vcvtt.s32.f32 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = tail call <4 x i32> @llvm.arm.mve.vcvt.fp.int.predicated.v4i32.v4f32.v4i1(<4 x float> %a, i32 0, <4 x i1> %1, <4 x i32> %inactive) + ret <4 x i32> %2 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vcvtq_m_u16_f16(<8 x i16> %inactive, <8 x half> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vcvtq_m_u16_f16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vcvtt.u16.f16 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <8 x i16> @llvm.arm.mve.vcvt.fp.int.predicated.v8i16.v8f16.v8i1(<8 x half> %a, i32 1, <8 x i1> %1, <8 x i16> %inactive) + ret <8 x i16> %2 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vcvtq_m_u32_f32(<4 x i32> %inactive, <4 x float> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vcvtq_m_u32_f32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vcvtt.u32.f32 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = tail call <4 x i32> @llvm.arm.mve.vcvt.fp.int.predicated.v4i32.v4f32.v4i1(<4 x float> %a, i32 1, <4 x i1> %1, <4 x i32> %inactive) + ret <4 x i32> %2 +} + +declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) +declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) + +declare <8 x half> @llvm.arm.mve.vcvt.fp.int.predicated.v8f16.v8i16.v8i1(<8 x i16>, i32, <8 x i1>, <8 x half>) +declare <4 x float> @llvm.arm.mve.vcvt.fp.int.predicated.v4f32.v4i32.v4i1(<4 x i32>, i32, <4 x i1>, <4 x float>) +declare <8 x i16> @llvm.arm.mve.vcvt.fp.int.predicated.v8i16.v8f16.v8i1(<8 x half>, i32, <8 x i1>, <8 x i16>) +declare <4 x i32> @llvm.arm.mve.vcvt.fp.int.predicated.v4i32.v4f32.v4i1(<4 x float>, i32, <4 x i1>, <4 x i32>) diff --git a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vmovl.ll b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vmovl.ll index c9ce38f9d612..fd33dddc685e 100644 --- a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vmovl.ll +++ b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vmovl.ll @@ -145,3 +145,200 @@ entry: %1 = zext <4 x i16> %0 to <4 x i32> ret <4 x i32> %1 } + +define arm_aapcs_vfpcc <8 x i16> @test_vmovlbq_m_s8(<8 x i16> %inactive, <16 x i8> %a, i16 zeroext %p) { +; LE-LABEL: test_vmovlbq_m_s8: +; LE: @ %bb.0: @ %entry +; LE-NEXT: vmsr p0, r0 +; LE-NEXT: vpst +; LE-NEXT: vmovlbt.s8 q0, q1 +; LE-NEXT: bx lr +; +; BE-LABEL: test_vmovlbq_m_s8: +; BE: @ %bb.0: @ %entry +; BE-NEXT: vrev64.16 q2, q0 +; BE-NEXT: vrev64.8 q0, q1 +; BE-NEXT: vmsr p0, r0 +; BE-NEXT: vpst +; BE-NEXT: vmovlbt.s8 q2, q0 +; BE-NEXT: vrev64.16 q0, q2 +; BE-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <8 x i16> @llvm.arm.mve.vmovl.predicated.v8i16.v16i8.v8i1(<16 x i8> %a, i32 0, i32 0, <8 x i1> %1, <8 x i16> %inactive) + ret <8 x i16> %2 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vmovlbq_m_s16(<4 x i32> %inactive, <8 x i16> %a, i16 zeroext %p) { +; LE-LABEL: test_vmovlbq_m_s16: +; LE: @ %bb.0: @ %entry +; LE-NEXT: vmsr p0, r0 +; LE-NEXT: vpst +; LE-NEXT: vmovlbt.s16 q0, q1 +; LE-NEXT: bx lr +; +; BE-LABEL: test_vmovlbq_m_s16: +; BE: @ %bb.0: @ %entry +; BE-NEXT: vrev64.32 q2, q0 +; BE-NEXT: vrev64.16 q0, q1 +; BE-NEXT: vmsr p0, r0 +; BE-NEXT: vpst +; BE-NEXT: vmovlbt.s16 q2, q0 +; BE-NEXT: vrev64.32 q0, q2 +; BE-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = tail call <4 x i32> @llvm.arm.mve.vmovl.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, i32 0, i32 0, <4 x i1> %1, <4 x i32> %inactive) + ret <4 x i32> %2 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vmovlbq_m_u8(<8 x i16> %inactive, <16 x i8> %a, i16 zeroext %p) { +; LE-LABEL: test_vmovlbq_m_u8: +; LE: @ %bb.0: @ %entry +; LE-NEXT: vmsr p0, r0 +; LE-NEXT: vpst +; LE-NEXT: vmovlbt.u8 q0, q1 +; LE-NEXT: bx lr +; +; BE-LABEL: test_vmovlbq_m_u8: +; BE: @ %bb.0: @ %entry +; BE-NEXT: vrev64.16 q2, q0 +; BE-NEXT: vrev64.8 q0, q1 +; BE-NEXT: vmsr p0, r0 +; BE-NEXT: vpst +; BE-NEXT: vmovlbt.u8 q2, q0 +; BE-NEXT: vrev64.16 q0, q2 +; BE-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <8 x i16> @llvm.arm.mve.vmovl.predicated.v8i16.v16i8.v8i1(<16 x i8> %a, i32 1, i32 0, <8 x i1> %1, <8 x i16> %inactive) + ret <8 x i16> %2 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vmovlbq_m_u16(<4 x i32> %inactive, <8 x i16> %a, i16 zeroext %p) { +; LE-LABEL: test_vmovlbq_m_u16: +; LE: @ %bb.0: @ %entry +; LE-NEXT: vmsr p0, r0 +; LE-NEXT: vpst +; LE-NEXT: vmovlbt.u16 q0, q1 +; LE-NEXT: bx lr +; +; BE-LABEL: test_vmovlbq_m_u16: +; BE: @ %bb.0: @ %entry +; BE-NEXT: vrev64.32 q2, q0 +; BE-NEXT: vrev64.16 q0, q1 +; BE-NEXT: vmsr p0, r0 +; BE-NEXT: vpst +; BE-NEXT: vmovlbt.u16 q2, q0 +; BE-NEXT: vrev64.32 q0, q2 +; BE-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = tail call <4 x i32> @llvm.arm.mve.vmovl.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, i32 1, i32 0, <4 x i1> %1, <4 x i32> %inactive) + ret <4 x i32> %2 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vmovltq_m_s8(<8 x i16> %inactive, <16 x i8> %a, i16 zeroext %p) { +; LE-LABEL: test_vmovltq_m_s8: +; LE: @ %bb.0: @ %entry +; LE-NEXT: vmsr p0, r0 +; LE-NEXT: vpst +; LE-NEXT: vmovltt.s8 q0, q1 +; LE-NEXT: bx lr +; +; BE-LABEL: test_vmovltq_m_s8: +; BE: @ %bb.0: @ %entry +; BE-NEXT: vrev64.16 q2, q0 +; BE-NEXT: vrev64.8 q0, q1 +; BE-NEXT: vmsr p0, r0 +; BE-NEXT: vpst +; BE-NEXT: vmovltt.s8 q2, q0 +; BE-NEXT: vrev64.16 q0, q2 +; BE-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <8 x i16> @llvm.arm.mve.vmovl.predicated.v8i16.v16i8.v8i1(<16 x i8> %a, i32 0, i32 1, <8 x i1> %1, <8 x i16> %inactive) + ret <8 x i16> %2 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vmovltq_m_s16(<4 x i32> %inactive, <8 x i16> %a, i16 zeroext %p) { +; LE-LABEL: test_vmovltq_m_s16: +; LE: @ %bb.0: @ %entry +; LE-NEXT: vmsr p0, r0 +; LE-NEXT: vpst +; LE-NEXT: vmovltt.s16 q0, q1 +; LE-NEXT: bx lr +; +; BE-LABEL: test_vmovltq_m_s16: +; BE: @ %bb.0: @ %entry +; BE-NEXT: vrev64.32 q2, q0 +; BE-NEXT: vrev64.16 q0, q1 +; BE-NEXT: vmsr p0, r0 +; BE-NEXT: vpst +; BE-NEXT: vmovltt.s16 q2, q0 +; BE-NEXT: vrev64.32 q0, q2 +; BE-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = tail call <4 x i32> @llvm.arm.mve.vmovl.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, i32 0, i32 1, <4 x i1> %1, <4 x i32> %inactive) + ret <4 x i32> %2 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vmovltq_m_u8(<8 x i16> %inactive, <16 x i8> %a, i16 zeroext %p) { +; LE-LABEL: test_vmovltq_m_u8: +; LE: @ %bb.0: @ %entry +; LE-NEXT: vmsr p0, r0 +; LE-NEXT: vpst +; LE-NEXT: vmovltt.u8 q0, q1 +; LE-NEXT: bx lr +; +; BE-LABEL: test_vmovltq_m_u8: +; BE: @ %bb.0: @ %entry +; BE-NEXT: vrev64.16 q2, q0 +; BE-NEXT: vrev64.8 q0, q1 +; BE-NEXT: vmsr p0, r0 +; BE-NEXT: vpst +; BE-NEXT: vmovltt.u8 q2, q0 +; BE-NEXT: vrev64.16 q0, q2 +; BE-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <8 x i16> @llvm.arm.mve.vmovl.predicated.v8i16.v16i8.v8i1(<16 x i8> %a, i32 1, i32 1, <8 x i1> %1, <8 x i16> %inactive) + ret <8 x i16> %2 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vmovltq_m_u16(<4 x i32> %inactive, <8 x i16> %a, i16 zeroext %p) { +; LE-LABEL: test_vmovltq_m_u16: +; LE: @ %bb.0: @ %entry +; LE-NEXT: vmsr p0, r0 +; LE-NEXT: vpst +; LE-NEXT: vmovltt.u16 q0, q1 +; LE-NEXT: bx lr +; +; BE-LABEL: test_vmovltq_m_u16: +; BE: @ %bb.0: @ %entry +; BE-NEXT: vrev64.32 q2, q0 +; BE-NEXT: vrev64.16 q0, q1 +; BE-NEXT: vmsr p0, r0 +; BE-NEXT: vpst +; BE-NEXT: vmovltt.u16 q2, q0 +; BE-NEXT: vrev64.32 q0, q2 +; BE-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = tail call <4 x i32> @llvm.arm.mve.vmovl.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, i32 1, i32 1, <4 x i1> %1, <4 x i32> %inactive) + ret <4 x i32> %2 +} + +declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) +declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) +declare <8 x i16> @llvm.arm.mve.vmovl.predicated.v8i16.v16i8.v8i1(<16 x i8>, i32, i32, <8 x i1>, <8 x i16>) +declare <4 x i32> @llvm.arm.mve.vmovl.predicated.v4i32.v8i16.v4i1(<8 x i16>, i32, i32, <4 x i1>, <4 x i32>) diff --git a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vmovn.ll b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vmovn.ll index f16e83a45b6f..391b163f718a 100644 --- a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vmovn.ll +++ b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vmovn.ll @@ -166,5 +166,201 @@ entry: ret <8 x i16> %2 } +define arm_aapcs_vfpcc <16 x i8> @test_vmovnbq_m_s16(<16 x i8> %a, <8 x i16> %b, i16 zeroext %p) { +; LE-LABEL: test_vmovnbq_m_s16: +; LE: @ %bb.0: @ %entry +; LE-NEXT: vmsr p0, r0 +; LE-NEXT: vpst +; LE-NEXT: vmovnbt.i16 q0, q1 +; LE-NEXT: bx lr +; +; BE-LABEL: test_vmovnbq_m_s16: +; BE: @ %bb.0: @ %entry +; BE-NEXT: vrev64.16 q2, q1 +; BE-NEXT: vrev64.8 q1, q0 +; BE-NEXT: vmsr p0, r0 +; BE-NEXT: vpst +; BE-NEXT: vmovnbt.i16 q1, q2 +; BE-NEXT: vrev64.8 q0, q1 +; BE-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <16 x i8> @llvm.arm.mve.vmovn.predicated.v16i8.v8i16.v8i1(<16 x i8> %a, <8 x i16> %b, i32 0, <8 x i1> %1) + ret <16 x i8> %2 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vmovnbq_m_s32(<8 x i16> %a, <4 x i32> %b, i16 zeroext %p) { +; LE-LABEL: test_vmovnbq_m_s32: +; LE: @ %bb.0: @ %entry +; LE-NEXT: vmsr p0, r0 +; LE-NEXT: vpst +; LE-NEXT: vmovnbt.i32 q0, q1 +; LE-NEXT: bx lr +; +; BE-LABEL: test_vmovnbq_m_s32: +; BE: @ %bb.0: @ %entry +; BE-NEXT: vrev64.32 q2, q1 +; BE-NEXT: vrev64.16 q1, q0 +; BE-NEXT: vmsr p0, r0 +; BE-NEXT: vpst +; BE-NEXT: vmovnbt.i32 q1, q2 +; BE-NEXT: vrev64.16 q0, q1 +; BE-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = tail call <8 x i16> @llvm.arm.mve.vmovn.predicated.v8i16.v4i32.v4i1(<8 x i16> %a, <4 x i32> %b, i32 0, <4 x i1> %1) + ret <8 x i16> %2 +} + +define arm_aapcs_vfpcc <16 x i8> @test_vmovnbq_m_u16(<16 x i8> %a, <8 x i16> %b, i16 zeroext %p) { +; LE-LABEL: test_vmovnbq_m_u16: +; LE: @ %bb.0: @ %entry +; LE-NEXT: vmsr p0, r0 +; LE-NEXT: vpst +; LE-NEXT: vmovnbt.i16 q0, q1 +; LE-NEXT: bx lr +; +; BE-LABEL: test_vmovnbq_m_u16: +; BE: @ %bb.0: @ %entry +; BE-NEXT: vrev64.16 q2, q1 +; BE-NEXT: vrev64.8 q1, q0 +; BE-NEXT: vmsr p0, r0 +; BE-NEXT: vpst +; BE-NEXT: vmovnbt.i16 q1, q2 +; BE-NEXT: vrev64.8 q0, q1 +; BE-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <16 x i8> @llvm.arm.mve.vmovn.predicated.v16i8.v8i16.v8i1(<16 x i8> %a, <8 x i16> %b, i32 0, <8 x i1> %1) + ret <16 x i8> %2 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vmovnbq_m_u32(<8 x i16> %a, <4 x i32> %b, i16 zeroext %p) { +; LE-LABEL: test_vmovnbq_m_u32: +; LE: @ %bb.0: @ %entry +; LE-NEXT: vmsr p0, r0 +; LE-NEXT: vpst +; LE-NEXT: vmovnbt.i32 q0, q1 +; LE-NEXT: bx lr +; +; BE-LABEL: test_vmovnbq_m_u32: +; BE: @ %bb.0: @ %entry +; BE-NEXT: vrev64.32 q2, q1 +; BE-NEXT: vrev64.16 q1, q0 +; BE-NEXT: vmsr p0, r0 +; BE-NEXT: vpst +; BE-NEXT: vmovnbt.i32 q1, q2 +; BE-NEXT: vrev64.16 q0, q1 +; BE-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = tail call <8 x i16> @llvm.arm.mve.vmovn.predicated.v8i16.v4i32.v4i1(<8 x i16> %a, <4 x i32> %b, i32 0, <4 x i1> %1) + ret <8 x i16> %2 +} + +define arm_aapcs_vfpcc <16 x i8> @test_vmovntq_m_s16(<16 x i8> %a, <8 x i16> %b, i16 zeroext %p) { +; LE-LABEL: test_vmovntq_m_s16: +; LE: @ %bb.0: @ %entry +; LE-NEXT: vmsr p0, r0 +; LE-NEXT: vpst +; LE-NEXT: vmovntt.i16 q0, q1 +; LE-NEXT: bx lr +; +; BE-LABEL: test_vmovntq_m_s16: +; BE: @ %bb.0: @ %entry +; BE-NEXT: vrev64.16 q2, q1 +; BE-NEXT: vrev64.8 q1, q0 +; BE-NEXT: vmsr p0, r0 +; BE-NEXT: vpst +; BE-NEXT: vmovntt.i16 q1, q2 +; BE-NEXT: vrev64.8 q0, q1 +; BE-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <16 x i8> @llvm.arm.mve.vmovn.predicated.v16i8.v8i16.v8i1(<16 x i8> %a, <8 x i16> %b, i32 1, <8 x i1> %1) + ret <16 x i8> %2 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vmovntq_m_s32(<8 x i16> %a, <4 x i32> %b, i16 zeroext %p) { +; LE-LABEL: test_vmovntq_m_s32: +; LE: @ %bb.0: @ %entry +; LE-NEXT: vmsr p0, r0 +; LE-NEXT: vpst +; LE-NEXT: vmovntt.i32 q0, q1 +; LE-NEXT: bx lr +; +; BE-LABEL: test_vmovntq_m_s32: +; BE: @ %bb.0: @ %entry +; BE-NEXT: vrev64.32 q2, q1 +; BE-NEXT: vrev64.16 q1, q0 +; BE-NEXT: vmsr p0, r0 +; BE-NEXT: vpst +; BE-NEXT: vmovntt.i32 q1, q2 +; BE-NEXT: vrev64.16 q0, q1 +; BE-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = tail call <8 x i16> @llvm.arm.mve.vmovn.predicated.v8i16.v4i32.v4i1(<8 x i16> %a, <4 x i32> %b, i32 1, <4 x i1> %1) + ret <8 x i16> %2 +} + +define arm_aapcs_vfpcc <16 x i8> @test_vmovntq_m_u16(<16 x i8> %a, <8 x i16> %b, i16 zeroext %p) { +; LE-LABEL: test_vmovntq_m_u16: +; LE: @ %bb.0: @ %entry +; LE-NEXT: vmsr p0, r0 +; LE-NEXT: vpst +; LE-NEXT: vmovntt.i16 q0, q1 +; LE-NEXT: bx lr +; +; BE-LABEL: test_vmovntq_m_u16: +; BE: @ %bb.0: @ %entry +; BE-NEXT: vrev64.16 q2, q1 +; BE-NEXT: vrev64.8 q1, q0 +; BE-NEXT: vmsr p0, r0 +; BE-NEXT: vpst +; BE-NEXT: vmovntt.i16 q1, q2 +; BE-NEXT: vrev64.8 q0, q1 +; BE-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <16 x i8> @llvm.arm.mve.vmovn.predicated.v16i8.v8i16.v8i1(<16 x i8> %a, <8 x i16> %b, i32 1, <8 x i1> %1) + ret <16 x i8> %2 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vmovntq_m_u32(<8 x i16> %a, <4 x i32> %b, i16 zeroext %p) { +; LE-LABEL: test_vmovntq_m_u32: +; LE: @ %bb.0: @ %entry +; LE-NEXT: vmsr p0, r0 +; LE-NEXT: vpst +; LE-NEXT: vmovntt.i32 q0, q1 +; LE-NEXT: bx lr +; +; BE-LABEL: test_vmovntq_m_u32: +; BE: @ %bb.0: @ %entry +; BE-NEXT: vrev64.32 q2, q1 +; BE-NEXT: vrev64.16 q1, q0 +; BE-NEXT: vmsr p0, r0 +; BE-NEXT: vpst +; BE-NEXT: vmovntt.i32 q1, q2 +; BE-NEXT: vrev64.16 q0, q1 +; BE-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = tail call <8 x i16> @llvm.arm.mve.vmovn.predicated.v8i16.v4i32.v4i1(<8 x i16> %a, <4 x i32> %b, i32 1, <4 x i1> %1) + ret <8 x i16> %2 +} + declare <8 x i16> @llvm.arm.mve.vreinterpretq.v8i16.v16i8(<16 x i8>) declare <4 x i32> @llvm.arm.mve.vreinterpretq.v4i32.v8i16(<8 x i16>) +declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) +declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) +declare <16 x i8> @llvm.arm.mve.vmovn.predicated.v16i8.v8i16.v8i1(<16 x i8>, <8 x i16>, i32, <8 x i1>) +declare <8 x i16> @llvm.arm.mve.vmovn.predicated.v8i16.v4i32.v4i1(<8 x i16>, <4 x i32>, i32, <4 x i1>) diff --git a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vrev.ll b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vrev.ll new file mode 100644 index 000000000000..841291e4a70e --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vrev.ll @@ -0,0 +1,138 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s + +define arm_aapcs_vfpcc <16 x i8> @test_vrev16q_m_i8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vrev16q_m_i8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vrev16t.8 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) + %2 = tail call <16 x i8> @llvm.arm.mve.vrev.predicated.v16i8.v16i1(<16 x i8> %a, i32 16, <16 x i1> %1, <16 x i8> %inactive) + ret <16 x i8> %2 +} + +define arm_aapcs_vfpcc <16 x i8> @test_vrev32q_m_i8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vrev32q_m_i8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vrev32t.8 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) + %2 = tail call <16 x i8> @llvm.arm.mve.vrev.predicated.v16i8.v16i1(<16 x i8> %a, i32 32, <16 x i1> %1, <16 x i8> %inactive) + ret <16 x i8> %2 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vrev32q_m_i16(<8 x i16> %inactive, <8 x i16> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vrev32q_m_i16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vrev32t.16 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <8 x i16> @llvm.arm.mve.vrev.predicated.v8i16.v8i1(<8 x i16> %a, i32 32, <8 x i1> %1, <8 x i16> %inactive) + ret <8 x i16> %2 +} + +define arm_aapcs_vfpcc <8 x half> @test_vrev32q_m_f16(<8 x half> %inactive, <8 x half> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vrev32q_m_f16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vrev32t.16 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <8 x half> @llvm.arm.mve.vrev.predicated.v8f16.v8i1(<8 x half> %a, i32 32, <8 x i1> %1, <8 x half> %inactive) + ret <8 x half> %2 +} + +define arm_aapcs_vfpcc <16 x i8> @test_vrev64q_m_i8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vrev64q_m_i8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vrev64t.8 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) + %2 = tail call <16 x i8> @llvm.arm.mve.vrev.predicated.v16i8.v16i1(<16 x i8> %a, i32 64, <16 x i1> %1, <16 x i8> %inactive) + ret <16 x i8> %2 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vrev64q_m_i16(<8 x i16> %inactive, <8 x i16> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vrev64q_m_i16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vrev64t.16 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <8 x i16> @llvm.arm.mve.vrev.predicated.v8i16.v8i1(<8 x i16> %a, i32 64, <8 x i1> %1, <8 x i16> %inactive) + ret <8 x i16> %2 +} + +define arm_aapcs_vfpcc <8 x half> @test_vrev64q_m_f16(<8 x half> %inactive, <8 x half> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vrev64q_m_f16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vrev64t.16 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <8 x half> @llvm.arm.mve.vrev.predicated.v8f16.v8i1(<8 x half> %a, i32 64, <8 x i1> %1, <8 x half> %inactive) + ret <8 x half> %2 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vrev64q_m_i32(<4 x i32> %inactive, <4 x i32> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vrev64q_m_i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vrev64t.32 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = tail call <4 x i32> @llvm.arm.mve.vrev.predicated.v4i32.v4i1(<4 x i32> %a, i32 64, <4 x i1> %1, <4 x i32> %inactive) + ret <4 x i32> %2 +} + +define arm_aapcs_vfpcc <4 x float> @test_vrev64q_m_f32(<4 x float> %inactive, <4 x float> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vrev64q_m_f32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vrev64t.32 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = tail call <4 x float> @llvm.arm.mve.vrev.predicated.v4f32.v4i1(<4 x float> %a, i32 64, <4 x i1> %1, <4 x float> %inactive) + ret <4 x float> %2 +} + +declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32) +declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) +declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) + +declare <16 x i8> @llvm.arm.mve.vrev.predicated.v16i8.v16i1(<16 x i8>, i32, <16 x i1>, <16 x i8>) +declare <8 x i16> @llvm.arm.mve.vrev.predicated.v8i16.v8i1(<8 x i16>, i32, <8 x i1>, <8 x i16>) +declare <8 x half> @llvm.arm.mve.vrev.predicated.v8f16.v8i1(<8 x half>, i32, <8 x i1>, <8 x half>) +declare <4 x i32> @llvm.arm.mve.vrev.predicated.v4i32.v4i1(<4 x i32>, i32, <4 x i1>, <4 x i32>) +declare <4 x float> @llvm.arm.mve.vrev.predicated.v4f32.v4i1(<4 x float>, i32, <4 x i1>, <4 x float>) diff --git a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vrint-predicated.ll b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vrint-predicated.ll new file mode 100644 index 000000000000..c24cc87447e8 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vrint-predicated.ll @@ -0,0 +1,185 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s + +define arm_aapcs_vfpcc <8 x half> @test_vrndaq_m_f16(<8 x half> %inactive, <8 x half> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vrndaq_m_f16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vrintat.f16 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <8 x half> @llvm.arm.mve.vrinta.predicated.v8f16.v8i1(<8 x half> %a, <8 x i1> %1, <8 x half> %inactive) + ret <8 x half> %2 +} + +define arm_aapcs_vfpcc <4 x float> @test_vrndaq_m_f32(<4 x float> %inactive, <4 x float> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vrndaq_m_f32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vrintat.f32 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = tail call <4 x float> @llvm.arm.mve.vrinta.predicated.v4f32.v4i1(<4 x float> %a, <4 x i1> %1, <4 x float> %inactive) + ret <4 x float> %2 +} + +define arm_aapcs_vfpcc <8 x half> @test_vrndmq_m_f16(<8 x half> %inactive, <8 x half> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vrndmq_m_f16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vrintmt.f16 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <8 x half> @llvm.arm.mve.vrintm.predicated.v8f16.v8i1(<8 x half> %a, <8 x i1> %1, <8 x half> %inactive) + ret <8 x half> %2 +} + +define arm_aapcs_vfpcc <4 x float> @test_vrndmq_m_f32(<4 x float> %inactive, <4 x float> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vrndmq_m_f32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vrintmt.f32 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = tail call <4 x float> @llvm.arm.mve.vrintm.predicated.v4f32.v4i1(<4 x float> %a, <4 x i1> %1, <4 x float> %inactive) + ret <4 x float> %2 +} + +define arm_aapcs_vfpcc <8 x half> @test_vrndnq_m_f16(<8 x half> %inactive, <8 x half> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vrndnq_m_f16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vrintnt.f16 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <8 x half> @llvm.arm.mve.vrintn.predicated.v8f16.v8i1(<8 x half> %a, <8 x i1> %1, <8 x half> %inactive) + ret <8 x half> %2 +} + +define arm_aapcs_vfpcc <4 x float> @test_vrndnq_m_f32(<4 x float> %inactive, <4 x float> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vrndnq_m_f32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vrintnt.f32 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = tail call <4 x float> @llvm.arm.mve.vrintn.predicated.v4f32.v4i1(<4 x float> %a, <4 x i1> %1, <4 x float> %inactive) + ret <4 x float> %2 +} + +define arm_aapcs_vfpcc <8 x half> @test_vrndpq_m_f16(<8 x half> %inactive, <8 x half> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vrndpq_m_f16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vrintpt.f16 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <8 x half> @llvm.arm.mve.vrintp.predicated.v8f16.v8i1(<8 x half> %a, <8 x i1> %1, <8 x half> %inactive) + ret <8 x half> %2 +} + +define arm_aapcs_vfpcc <4 x float> @test_vrndpq_m_f32(<4 x float> %inactive, <4 x float> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vrndpq_m_f32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vrintpt.f32 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = tail call <4 x float> @llvm.arm.mve.vrintp.predicated.v4f32.v4i1(<4 x float> %a, <4 x i1> %1, <4 x float> %inactive) + ret <4 x float> %2 +} + +define arm_aapcs_vfpcc <8 x half> @test_vrndq_m_f16(<8 x half> %inactive, <8 x half> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vrndq_m_f16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vrintzt.f16 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <8 x half> @llvm.arm.mve.vrintz.predicated.v8f16.v8i1(<8 x half> %a, <8 x i1> %1, <8 x half> %inactive) + ret <8 x half> %2 +} + +define arm_aapcs_vfpcc <4 x float> @test_vrndq_m_f32(<4 x float> %inactive, <4 x float> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vrndq_m_f32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vrintzt.f32 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = tail call <4 x float> @llvm.arm.mve.vrintz.predicated.v4f32.v4i1(<4 x float> %a, <4 x i1> %1, <4 x float> %inactive) + ret <4 x float> %2 +} + +define arm_aapcs_vfpcc <8 x half> @test_vrndxq_m_f16(<8 x half> %inactive, <8 x half> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vrndxq_m_f16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vrintxt.f16 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <8 x half> @llvm.arm.mve.vrintx.predicated.v8f16.v8i1(<8 x half> %a, <8 x i1> %1, <8 x half> %inactive) + ret <8 x half> %2 +} + +define arm_aapcs_vfpcc <4 x float> @test_vrndxq_m_f32(<4 x float> %inactive, <4 x float> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vrndxq_m_f32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vrintxt.f32 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = tail call <4 x float> @llvm.arm.mve.vrintx.predicated.v4f32.v4i1(<4 x float> %a, <4 x i1> %1, <4 x float> %inactive) + ret <4 x float> %2 +} + +declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) +declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) +declare <8 x half> @llvm.arm.mve.vrinta.predicated.v8f16.v8i1(<8 x half>, <8 x i1>, <8 x half>) +declare <4 x float> @llvm.arm.mve.vrinta.predicated.v4f32.v4i1(<4 x float>, <4 x i1>, <4 x float>) +declare <8 x half> @llvm.arm.mve.vrintm.predicated.v8f16.v8i1(<8 x half>, <8 x i1>, <8 x half>) +declare <4 x float> @llvm.arm.mve.vrintm.predicated.v4f32.v4i1(<4 x float>, <4 x i1>, <4 x float>) +declare <8 x half> @llvm.arm.mve.vrintn.predicated.v8f16.v8i1(<8 x half>, <8 x i1>, <8 x half>) +declare <4 x float> @llvm.arm.mve.vrintn.predicated.v4f32.v4i1(<4 x float>, <4 x i1>, <4 x float>) +declare <8 x half> @llvm.arm.mve.vrintp.predicated.v8f16.v8i1(<8 x half>, <8 x i1>, <8 x half>) +declare <4 x float> @llvm.arm.mve.vrintp.predicated.v4f32.v4i1(<4 x float>, <4 x i1>, <4 x float>) +declare <8 x half> @llvm.arm.mve.vrintz.predicated.v8f16.v8i1(<8 x half>, <8 x i1>, <8 x half>) +declare <4 x float> @llvm.arm.mve.vrintz.predicated.v4f32.v4i1(<4 x float>, <4 x i1>, <4 x float>) +declare <8 x half> @llvm.arm.mve.vrintx.predicated.v8f16.v8i1(<8 x half>, <8 x i1>, <8 x half>) +declare <4 x float> @llvm.arm.mve.vrintx.predicated.v4f32.v4i1(<4 x float>, <4 x i1>, <4 x float>) -- 2.34.1