From 590e4e8dde1e1266e6cfa4776fe289d04de0d021 Mon Sep 17 00:00:00 2001 From: Sjoerd Meijer Date: Wed, 1 Aug 2018 14:43:59 +0000 Subject: [PATCH] [ARM] Armv8.2-A FP16 vector intrinsics tests Clang support for the Armv8.2-A FP16 vector intrinsic was committed in rC328277, but this was never followed up, i.e. the LLVM part is missing. I've raised PR38404, and this is the first step to address this. I.e., this adds tests for the Armv8.2-A FP16 vector intrinsic, and thus shows which intrinsics already work, and which need further work. Differential Revision: https://reviews.llvm.org/D50142 llvm-svn: 338568 --- .../CodeGen/ARM/armv8.2a-fp16-vector-intrinsics.ll | 1148 ++++++++++++++++++++ 1 file changed, 1148 insertions(+) create mode 100644 llvm/test/CodeGen/ARM/armv8.2a-fp16-vector-intrinsics.ll diff --git a/llvm/test/CodeGen/ARM/armv8.2a-fp16-vector-intrinsics.ll b/llvm/test/CodeGen/ARM/armv8.2a-fp16-vector-intrinsics.ll new file mode 100644 index 0000000..ec18071 --- /dev/null +++ b/llvm/test/CodeGen/ARM/armv8.2a-fp16-vector-intrinsics.ll @@ -0,0 +1,1148 @@ +; RUN: llc -mtriple=arm-eabi -mattr=+v8.2a,+neon,+fullfp16 -float-abi=hard < %s | FileCheck %s + +%struct.float16x4x2_t = type { [2 x <4 x half>] } +%struct.float16x8x2_t = type { [2 x <8 x half>] } + +define dso_local <4 x half> @test_vabs_f16(<4 x half> %a) { +; CHECKLABEL: test_vabs_f16: +; CHECK: vabs.f16 d0, d0 +; CHECK-NEXT: bx lr +entry: + %vabs1.i = tail call <4 x half> @llvm.fabs.v4f16(<4 x half> %a) + ret <4 x half> %vabs1.i +} + +define dso_local <8 x half> @test_vabsq_f16(<8 x half> %a) { +; CHECKLABEL: test_vabsq_f16: +; CHECK: vabs.f16 q0, q0 +; CHECK-NEXT: bx lr +entry: + %vabs1.i = tail call <8 x half> @llvm.fabs.v8f16(<8 x half> %a) + ret <8 x half> %vabs1.i +} + +define dso_local <4 x i16> @test_vceqz_f16(<4 x half> %a) { +; CHECKLABEL: test_vceqz_f16: +; CHECK: vceq.f16 d0, d0, #0 +; CHECK-NEXT: bx lr +entry: + %0 = fcmp oeq <4 x half> %a, zeroinitializer + %vceqz.i = sext <4 x i1> %0 to <4 x i16> + ret <4 x i16> %vceqz.i +} + +define dso_local <8 x i16> @test_vceqzq_f16(<8 x half> %a) { +; CHECKLABEL: test_vceqzq_f16: +; CHECK: vceq.f16 q0, q0, #0 +; CHECK-NEXT: bx lr +entry: + %0 = fcmp oeq <8 x half> %a, zeroinitializer + %vceqz.i = sext <8 x i1> %0 to <8 x i16> + ret <8 x i16> %vceqz.i +} + +define dso_local <4 x i16> @test_vcgez_f16(<4 x half> %a) { +; CHECKLABEL: test_vcgez_f16: +; CHECK: vcge.f16 d0, d0, #0 +; CHECK-NEXT: bx lr +entry: + %0 = fcmp oge <4 x half> %a, zeroinitializer + %vcgez.i = sext <4 x i1> %0 to <4 x i16> + ret <4 x i16> %vcgez.i +} + +define dso_local <8 x i16> @test_vcgezq_f16(<8 x half> %a) { +; CHECKLABEL: test_vcgezq_f16: +; CHECK: vcge.f16 q0, q0, #0 +; CHECK-NEXT: bx lr +entry: + %0 = fcmp oge <8 x half> %a, zeroinitializer + %vcgez.i = sext <8 x i1> %0 to <8 x i16> + ret <8 x i16> %vcgez.i +} + +define dso_local <4 x i16> @test_vcgtz_f16(<4 x half> %a) { +; CHECKLABEL: test_vcgtz_f16: +; CHECK: vcgt.f16 d0, d0, #0 +; CHECK-NEXT: bx lr +entry: + %0 = fcmp ogt <4 x half> %a, zeroinitializer + %vcgtz.i = sext <4 x i1> %0 to <4 x i16> + ret <4 x i16> %vcgtz.i +} + +define dso_local <8 x i16> @test_vcgtzq_f16(<8 x half> %a) { +; CHECKLABEL: test_vcgtzq_f16: +; CHECK: vcgt.f16 q0, q0, #0 +; CHECK-NEXT: bx lr +entry: + %0 = fcmp ogt <8 x half> %a, zeroinitializer + %vcgtz.i = sext <8 x i1> %0 to <8 x i16> + ret <8 x i16> %vcgtz.i +} + +define dso_local <4 x i16> @test_vclez_f16(<4 x half> %a) { +; CHECKLABEL: test_vclez_f16: +; CHECK: vcle.f16 d0, d0, #0 +; CHECK-NEXT: bx lr +entry: + %0 = fcmp ole <4 x half> %a, zeroinitializer + %vclez.i = sext <4 x i1> %0 to <4 x i16> + ret <4 x i16> %vclez.i +} + +define dso_local <8 x i16> @test_vclezq_f16(<8 x half> %a) { +; CHECKLABEL: test_vclezq_f16: +; CHECK: vcle.f16 q0, q0, #0 +; CHECK-NEXT: bx lr +entry: + %0 = fcmp ole <8 x half> %a, zeroinitializer + %vclez.i = sext <8 x i1> %0 to <8 x i16> + ret <8 x i16> %vclez.i +} + +define dso_local <4 x i16> @test_vcltz_f16(<4 x half> %a) { +; CHECKLABEL: test_vcltz_f16: +; CHECK: vclt.f16 d0, d0, #0 +; CHECK-NEXT: bx lr +entry: + %0 = fcmp olt <4 x half> %a, zeroinitializer + %vcltz.i = sext <4 x i1> %0 to <4 x i16> + ret <4 x i16> %vcltz.i +} + +define dso_local <8 x i16> @test_vcltzq_f16(<8 x half> %a) { +; CHECKLABEL: test_vcltzq_f16: +; CHECK: vclt.f16 q0, q0, #0 +; CHECK-NEXT: bx lr +entry: + %0 = fcmp olt <8 x half> %a, zeroinitializer + %vcltz.i = sext <8 x i1> %0 to <8 x i16> + ret <8 x i16> %vcltz.i +} + +; FIXME (PR38404) +; +;define dso_local <4 x half> @test_vcvt_f16_s16(<4 x i16> %a) { +;entry: +; %vcvt.i = sitofp <4 x i16> %a to <4 x half> +; ret <4 x half> %vcvt.i +;} +; +;define dso_local <8 x half> @test_vcvtq_f16_s16(<8 x i16> %a) { +;entry: +; %vcvt.i = sitofp <8 x i16> %a to <8 x half> +; ret <8 x half> %vcvt.i +;} + +;define dso_local <4 x half> @test_vcvt_f16_u16(<4 x i16> %a) { +;entry: +; %vcvt.i = uitofp <4 x i16> %a to <4 x half> +; ret <4 x half> %vcvt.i +;} + +;define dso_local <8 x half> @test_vcvtq_f16_u16(<8 x i16> %a) { +;entry: +; %vcvt.i = uitofp <8 x i16> %a to <8 x half> +; ret <8 x half> %vcvt.i +;} + +;define dso_local <4 x i16> @test_vcvt_s16_f16(<4 x half> %a) { +;entry: +; %vcvt.i = fptosi <4 x half> %a to <4 x i16> +; ret <4 x i16> %vcvt.i +;} + +;define dso_local <8 x i16> @test_vcvtq_s16_f16(<8 x half> %a) { +;entry: +; %vcvt.i = fptosi <8 x half> %a to <8 x i16> +; ret <8 x i16> %vcvt.i +;} + +;define dso_local <4 x i16> @test_vcvt_u16_f16(<4 x half> %a) { +;entry: +; %vcvt.i = fptoui <4 x half> %a to <4 x i16> +; ret <4 x i16> %vcvt.i +;} + +;define dso_local <8 x i16> @test_vcvtq_u16_f16(<8 x half> %a) { +;entry: +; %vcvt.i = fptoui <8 x half> %a to <8 x i16> +; ret <8 x i16> %vcvt.i +;} + +;define dso_local <4 x i16> @test_vcvta_s16_f16(<4 x half> %a) { +;entry: +; %vcvta_s16_v1.i = tail call <4 x i16> @llvm.arm.neon.vcvtas.v4i16.v4f16(<4 x half> %a) +; ret <4 x i16> %vcvta_s16_v1.i +;} + +;define dso_local <4 x i16> @test_vcvta_u16_f16(<4 x half> %a) { +;entry: +; %vcvta_u16_v1.i = tail call <4 x i16> @llvm.arm.neon.vcvtau.v4i16.v4f16(<4 x half> %a) +; ret <4 x i16> %vcvta_u16_v1.i +;} + +;define dso_local <8 x i16> @test_vcvtaq_s16_f16(<8 x half> %a) { +;entry: +; %vcvtaq_s16_v1.i = tail call <8 x i16> @llvm.arm.neon.vcvtas.v8i16.v8f16(<8 x half> %a) +; ret <8 x i16> %vcvtaq_s16_v1.i +;} + +;define dso_local <4 x i16> @test_vcvtm_s16_f16(<4 x half> %a) { +;entry: +; %vcvtm_s16_v1.i = tail call <4 x i16> @llvm.arm.neon.vcvtms.v4i16.v4f16(<4 x half> %a) +; ret <4 x i16> %vcvtm_s16_v1.i +;} + +;define dso_local <8 x i16> @test_vcvtmq_s16_f16(<8 x half> %a) { +;entry: +; %vcvtmq_s16_v1.i = tail call <8 x i16> @llvm.arm.neon.vcvtms.v8i16.v8f16(<8 x half> %a) +; ret <8 x i16> %vcvtmq_s16_v1.i +;} + +;define dso_local <4 x i16> @test_vcvtm_u16_f16(<4 x half> %a) { +;entry: +; %vcvtm_u16_v1.i = tail call <4 x i16> @llvm.arm.neon.vcvtmu.v4i16.v4f16(<4 x half> %a) +; ret <4 x i16> %vcvtm_u16_v1.i +;} + +;define dso_local <8 x i16> @test_vcvtmq_u16_f16(<8 x half> %a) { +;entry: +; %vcvtmq_u16_v1.i = tail call <8 x i16> @llvm.arm.neon.vcvtmu.v8i16.v8f16(<8 x half> %a) +; ret <8 x i16> %vcvtmq_u16_v1.i +;} + +;define dso_local <4 x i16> @test_vcvtn_s16_f16(<4 x half> %a) { +;entry: +; %vcvtn_s16_v1.i = tail call <4 x i16> @llvm.arm.neon.vcvtns.v4i16.v4f16(<4 x half> %a) +; ret <4 x i16> %vcvtn_s16_v1.i +;} + +;define dso_local <8 x i16> @test_vcvtnq_s16_f16(<8 x half> %a) { +;entry: +; %vcvtnq_s16_v1.i = tail call <8 x i16> @llvm.arm.neon.vcvtns.v8i16.v8f16(<8 x half> %a) +; ret <8 x i16> %vcvtnq_s16_v1.i +;} + +;define dso_local <4 x i16> @test_vcvtn_u16_f16(<4 x half> %a) { +;entry: +; %vcvtn_u16_v1.i = tail call <4 x i16> @llvm.arm.neon.vcvtnu.v4i16.v4f16(<4 x half> %a) +; ret <4 x i16> %vcvtn_u16_v1.i +;} + +;define dso_local <8 x i16> @test_vcvtnq_u16_f16(<8 x half> %a) { +;entry: +; %vcvtnq_u16_v1.i = tail call <8 x i16> @llvm.arm.neon.vcvtnu.v8i16.v8f16(<8 x half> %a) +; ret <8 x i16> %vcvtnq_u16_v1.i +;} + +;define dso_local <4 x i16> @test_vcvtp_s16_f16(<4 x half> %a) { +;entry: +; %vcvtp_s16_v1.i = tail call <4 x i16> @llvm.arm.neon.vcvtps.v4i16.v4f16(<4 x half> %a) +; ret <4 x i16> %vcvtp_s16_v1.i +;} + +;define dso_local <8 x i16> @test_vcvtpq_s16_f16(<8 x half> %a) { +;entry: +; %vcvtpq_s16_v1.i = tail call <8 x i16> @llvm.arm.neon.vcvtps.v8i16.v8f16(<8 x half> %a) +; ret <8 x i16> %vcvtpq_s16_v1.i +;} + +;define dso_local <4 x i16> @test_vcvtp_u16_f16(<4 x half> %a) { +;entry: +; %vcvtp_u16_v1.i = tail call <4 x i16> @llvm.arm.neon.vcvtpu.v4i16.v4f16(<4 x half> %a) +; ret <4 x i16> %vcvtp_u16_v1.i +;} + +;define dso_local <8 x i16> @test_vcvtpq_u16_f16(<8 x half> %a) { +;entry: +; %vcvtpq_u16_v1.i = tail call <8 x i16> @llvm.arm.neon.vcvtpu.v8i16.v8f16(<8 x half> %a) +; ret <8 x i16> %vcvtpq_u16_v1.i +;} + +define dso_local <4 x half> @test_vneg_f16(<4 x half> %a) { +; CHECKLABEL: test_vneg_f16: +; CHECK: vneg.f16 d0, d0 +; CHECK-NEXT: bx lr +entry: + %sub.i = fsub <4 x half> , %a + ret <4 x half> %sub.i +} + +define dso_local <8 x half> @test_vnegq_f16(<8 x half> %a) { +; CHECKLABEL: test_vnegq_f16: +; CHECK: vneg.f16 q0, q0 +; CHECK-NEXT: bx lr +entry: + %sub.i = fsub <8 x half> , %a + ret <8 x half> %sub.i +} + +define dso_local <4 x half> @test_vrecpe_f16(<4 x half> %a) { +; CHECKLABEL: test_vrecpe_f16: +; CHECK: vrecpe.f16 d0, d0 +; CHECK-NEXT: bx lr +entry: + %vrecpe_v1.i = tail call <4 x half> @llvm.arm.neon.vrecpe.v4f16(<4 x half> %a) + ret <4 x half> %vrecpe_v1.i +} + +define dso_local <8 x half> @test_vrecpeq_f16(<8 x half> %a) { +; CHECKLABEL: test_vrecpeq_f16: +; CHECK: vrecpe.f16 q0, q0 +; CHECK-NEXT: bx lr +entry: + %vrecpeq_v1.i = tail call <8 x half> @llvm.arm.neon.vrecpe.v8f16(<8 x half> %a) + ret <8 x half> %vrecpeq_v1.i +} + +define dso_local <4 x half> @test_vrnd_f16(<4 x half> %a) { +; CHECKLABEL: test_vrnd_f16: +; CHECK: vrintz.f16 d0, d0 +; CHECK-NEXT: bx lr +entry: + %vrnd_v1.i = tail call <4 x half> @llvm.arm.neon.vrintz.v4f16(<4 x half> %a) + ret <4 x half> %vrnd_v1.i +} + +define dso_local <8 x half> @test_vrndq_f16(<8 x half> %a) { +; CHECKLABEL: test_vrndq_f16: +; CHECK: vrintz.f16 q0, q0 +; CHECK-NEXT: bx lr +entry: + %vrndq_v1.i = tail call <8 x half> @llvm.arm.neon.vrintz.v8f16(<8 x half> %a) + ret <8 x half> %vrndq_v1.i +} + +define dso_local <4 x half> @test_vrnda_f16(<4 x half> %a) { +; CHECKLABEL: test_vrnda_f16: +; CHECK: vrinta.f16 d0, d0 +; CHECK-NEXT: bx lr +entry: + %vrnda_v1.i = tail call <4 x half> @llvm.arm.neon.vrinta.v4f16(<4 x half> %a) + ret <4 x half> %vrnda_v1.i +} + +define dso_local <8 x half> @test_vrndaq_f16(<8 x half> %a) { +; CHECKLABEL: test_vrndaq_f16: +; CHECK: vrinta.f16 q0, q0 +; CHECK-NEXT: bx lr +entry: + %vrndaq_v1.i = tail call <8 x half> @llvm.arm.neon.vrinta.v8f16(<8 x half> %a) + ret <8 x half> %vrndaq_v1.i +} + +define dso_local <4 x half> @test_vrndm_f16(<4 x half> %a) { +; CHECKLABEL: test_vrndm_f16: +; CHECK: vrintm.f16 d0, d0 +; CHECK-NEXT: bx lr +entry: + %vrndm_v1.i = tail call <4 x half> @llvm.arm.neon.vrintm.v4f16(<4 x half> %a) + ret <4 x half> %vrndm_v1.i +} + +define dso_local <8 x half> @test_vrndmq_f16(<8 x half> %a) { +; CHECKLABEL: test_vrndmq_f16: +; CHECK: vrintm.f16 q0, q0 +; CHECK-NEXT: bx lr +entry: + %vrndmq_v1.i = tail call <8 x half> @llvm.arm.neon.vrintm.v8f16(<8 x half> %a) + ret <8 x half> %vrndmq_v1.i +} + +define dso_local <4 x half> @test_vrndn_f16(<4 x half> %a) { +; CHECKLABEL: test_vrndn_f16: +; CHECK: vrintn.f16 d0, d0 +; CHECK-NEXT: bx lr +entry: + %vrndn_v1.i = tail call <4 x half> @llvm.arm.neon.vrintn.v4f16(<4 x half> %a) + ret <4 x half> %vrndn_v1.i +} + +define dso_local <8 x half> @test_vrndnq_f16(<8 x half> %a) { +; CHECKLABEL: test_vrndnq_f16: +; CHECK: vrintn.f16 q0, q0 +; CHECK-NEXT: bx lr +entry: + %vrndnq_v1.i = tail call <8 x half> @llvm.arm.neon.vrintn.v8f16(<8 x half> %a) + ret <8 x half> %vrndnq_v1.i +} + +define dso_local <4 x half> @test_vrndp_f16(<4 x half> %a) { +; CHECKLABEL: test_vrndp_f16: +; CHECK: vrintp.f16 d0, d0 +; CHECK-NEXT: bx lr +entry: + %vrndp_v1.i = tail call <4 x half> @llvm.arm.neon.vrintp.v4f16(<4 x half> %a) + ret <4 x half> %vrndp_v1.i +} + +define dso_local <8 x half> @test_vrndpq_f16(<8 x half> %a) { +; CHECKLABEL: test_vrndpq_f16: +; CHECK: vrintp.f16 q0, q0 +; CHECK-NEXT: bx lr +entry: + %vrndpq_v1.i = tail call <8 x half> @llvm.arm.neon.vrintp.v8f16(<8 x half> %a) + ret <8 x half> %vrndpq_v1.i +} + +define dso_local <4 x half> @test_vrndx_f16(<4 x half> %a) { +; CHECKLABEL: test_vrndx_f16: +; CHECK: vrintx.f16 d0, d0 +; CHECK-NEXT: bx lr +entry: + %vrndx_v1.i = tail call <4 x half> @llvm.arm.neon.vrintx.v4f16(<4 x half> %a) + ret <4 x half> %vrndx_v1.i +} + +define dso_local <8 x half> @test_vrndxq_f16(<8 x half> %a) { +; CHECKLABEL: test_vrndxq_f16: +; CHECK: vrintx.f16 q0, q0 +; CHECK-NEXT: bx lr +entry: + %vrndxq_v1.i = tail call <8 x half> @llvm.arm.neon.vrintx.v8f16(<8 x half> %a) + ret <8 x half> %vrndxq_v1.i +} + +define dso_local <4 x half> @test_vrsqrte_f16(<4 x half> %a) { +; CHECKLABEL: test_vrsqrte_f16: +; CHECK: vrsqrte.f16 d0, d0 +; CHECK-NEXT: bx lr +entry: + %vrsqrte_v1.i = tail call <4 x half> @llvm.arm.neon.vrsqrte.v4f16(<4 x half> %a) + ret <4 x half> %vrsqrte_v1.i +} + +define dso_local <8 x half> @test_vrsqrteq_f16(<8 x half> %a) { +; CHECKLABEL: test_vrsqrteq_f16: +; CHECK: vrsqrte.f16 q0, q0 +; CHECK-NEXT: bx lr +entry: + %vrsqrteq_v1.i = tail call <8 x half> @llvm.arm.neon.vrsqrte.v8f16(<8 x half> %a) + ret <8 x half> %vrsqrteq_v1.i +} + +define dso_local <4 x half> @test_vadd_f16(<4 x half> %a, <4 x half> %b) { +; CHECKLABEL: test_vadd_f16: +; CHECK: vadd.f16 d0, d0, d1 +; CHECK-NEXT: bx lr +entry: + %add.i = fadd <4 x half> %a, %b + ret <4 x half> %add.i +} + +define dso_local <8 x half> @test_vaddq_f16(<8 x half> %a, <8 x half> %b) { +; CHECKLABEL: test_vaddq_f16: +; CHECK: vadd.f16 q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %add.i = fadd <8 x half> %a, %b + ret <8 x half> %add.i +} + +define dso_local <4 x half> @test_vabd_f16(<4 x half> %a, <4 x half> %b) { +; CHECKLABEL: test_vabd_f16: +; CHECK: vabd.f16 d0, d0, d1 +; CHECK-NEXT: bx lr +entry: + %vabd_v2.i = tail call <4 x half> @llvm.arm.neon.vabds.v4f16(<4 x half> %a, <4 x half> %b) + ret <4 x half> %vabd_v2.i +} + +define dso_local <8 x half> @test_vabdq_f16(<8 x half> %a, <8 x half> %b) { +; CHECKLABEL: test_vabdq_f16: +; CHECK: vabd.f16 q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %vabdq_v2.i = tail call <8 x half> @llvm.arm.neon.vabds.v8f16(<8 x half> %a, <8 x half> %b) + ret <8 x half> %vabdq_v2.i +} + +define dso_local <4 x i16> @test_vcage_f16(<4 x half> %a, <4 x half> %b) { +; CHECKLABEL: test_vcage_f16: +; CHECK: vacge.f16 d0, d0, d1 +; CHECK-NEXT: bx lr +entry: + %vcage_v2.i = tail call <4 x i16> @llvm.arm.neon.vacge.v4i16.v4f16(<4 x half> %a, <4 x half> %b) + ret <4 x i16> %vcage_v2.i +} + +define dso_local <8 x i16> @test_vcageq_f16(<8 x half> %a, <8 x half> %b) { +; CHECKLABEL: test_vcageq_f16: +; CHECK: vacge.f16 q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %vcageq_v2.i = tail call <8 x i16> @llvm.arm.neon.vacge.v8i16.v8f16(<8 x half> %a, <8 x half> %b) + ret <8 x i16> %vcageq_v2.i +} + +; FIXME (PR38404) +; +;define dso_local <4 x i16> @test_vcagt_f16(<4 x half> %a, <4 x half> %b) { +;entry: +; %vcagt_v2.i = tail call <4 x i16> @llvm.arm.neon.vacgt.v4i16.v4f16(<4 x half> %a, <4 x half> %b) +; ret <4 x i16> %vcagt_v2.i +;} +; +;define dso_local <8 x i16> @test_vcagtq_f16(<8 x half> %a, <8 x half> %b) { +;entry: +; %vcagtq_v2.i = tail call <8 x i16> @llvm.arm.neon.vacgt.v8i16.v8f16(<8 x half> %a, <8 x half> %b) +; ret <8 x i16> %vcagtq_v2.i +;} + +define dso_local <4 x i16> @test_vcale_f16(<4 x half> %a, <4 x half> %b) { +; CHECKLABEL: test_vcale_f16: +; CHECK: vacge.f16 d0, d1, d0 +; CHECK-NEXT: bx lr +entry: + %vcale_v2.i = tail call <4 x i16> @llvm.arm.neon.vacge.v4i16.v4f16(<4 x half> %b, <4 x half> %a) + ret <4 x i16> %vcale_v2.i +} + +define dso_local <8 x i16> @test_vcaleq_f16(<8 x half> %a, <8 x half> %b) { +; CHECKLABEL: test_vcaleq_f16: +; CHECK: vacge.f16 q0, q1, q0 +; CHECK-NEXT: bx lr +entry: + %vcaleq_v2.i = tail call <8 x i16> @llvm.arm.neon.vacge.v8i16.v8f16(<8 x half> %b, <8 x half> %a) + ret <8 x i16> %vcaleq_v2.i +} + +; FIXME (PR38404) +; +;define dso_local <4 x i16> @test_vcalt_f16(<4 x half> %a, <4 x half> %b) { +;entry: +; %vcalt_v2.i = tail call <4 x i16> @llvm.arm.neon.vacgt.v4i16.v4f16(<4 x half> %b, <4 x half> %a) +; ret <4 x i16> %vcalt_v2.i +;} + +;define dso_local <8 x i16> @test_vcaltq_f16(<8 x half> %a, <8 x half> %b) { +;entry: +; %vcaltq_v2.i = tail call <8 x i16> @llvm.arm.neon.vacgt.v8i16.v8f16(<8 x half> %b, <8 x half> %a) +; ret <8 x i16> %vcaltq_v2.i +;} + +define dso_local <4 x i16> @test_vceq_f16(<4 x half> %a, <4 x half> %b) { +; CHECKLABEL: test_vceq_f16: +; CHECK: vceq.f16 d0, d0, d1 +; CHECK-NEXT: bx lr +entry: + %cmp.i = fcmp oeq <4 x half> %a, %b + %sext.i = sext <4 x i1> %cmp.i to <4 x i16> + ret <4 x i16> %sext.i +} + +define dso_local <8 x i16> @test_vceqq_f16(<8 x half> %a, <8 x half> %b) { +; CHECKLABEL: test_vceqq_f16: +; CHECK: vceq.f16 q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %cmp.i = fcmp oeq <8 x half> %a, %b + %sext.i = sext <8 x i1> %cmp.i to <8 x i16> + ret <8 x i16> %sext.i +} + +define dso_local <4 x i16> @test_vcge_f16(<4 x half> %a, <4 x half> %b) { +; CHECKLABEL: test_vcge_f16: +; CHECK: vcge.f16 d0, d0, d1 +; CHECK-NEXT: bx lr +entry: + %cmp.i = fcmp oge <4 x half> %a, %b + %sext.i = sext <4 x i1> %cmp.i to <4 x i16> + ret <4 x i16> %sext.i +} + +define dso_local <8 x i16> @test_vcgeq_f16(<8 x half> %a, <8 x half> %b) { +; CHECKLABEL: test_vcgeq_f16: +; CHECK: vcge.f16 q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %cmp.i = fcmp oge <8 x half> %a, %b + %sext.i = sext <8 x i1> %cmp.i to <8 x i16> + ret <8 x i16> %sext.i +} + +define dso_local <4 x i16> @test_vcgt_f16(<4 x half> %a, <4 x half> %b) { +; CHECKLABEL: test_vcgt_f16: +; CHECK: vcgt.f16 d0, d0, d1 +; CHECK-NEXT: bx lr +entry: + %cmp.i = fcmp ogt <4 x half> %a, %b + %sext.i = sext <4 x i1> %cmp.i to <4 x i16> + ret <4 x i16> %sext.i +} + +define dso_local <8 x i16> @test_vcgtq_f16(<8 x half> %a, <8 x half> %b) { +; CHECKLABEL: test_vcgtq_f16: +; CHECK: vcgt.f16 q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %cmp.i = fcmp ogt <8 x half> %a, %b + %sext.i = sext <8 x i1> %cmp.i to <8 x i16> + ret <8 x i16> %sext.i +} + +define dso_local <4 x i16> @test_vcle_f16(<4 x half> %a, <4 x half> %b) { +; CHECKLABEL: test_vcle_f16: +; CHECK: vcge.f16 d0, d1, d0 +; CHECK-NEXT: bx lr +entry: + %cmp.i = fcmp ole <4 x half> %a, %b + %sext.i = sext <4 x i1> %cmp.i to <4 x i16> + ret <4 x i16> %sext.i +} + +define dso_local <8 x i16> @test_vcleq_f16(<8 x half> %a, <8 x half> %b) { +; CHECKLABEL: test_vcleq_f16: +; CHECK: vcge.f16 q0, q1, q0 +; CHECK-NEXT: bx lr +entry: + %cmp.i = fcmp ole <8 x half> %a, %b + %sext.i = sext <8 x i1> %cmp.i to <8 x i16> + ret <8 x i16> %sext.i +} + +define dso_local <4 x i16> @test_vclt_f16(<4 x half> %a, <4 x half> %b) { +; CHECKLABEL: test_vclt_f16: +; CHECK: vcgt.f16 d0, d1, d0 +; CHECK-NEXT: bx lr +entry: + %cmp.i = fcmp olt <4 x half> %a, %b + %sext.i = sext <4 x i1> %cmp.i to <4 x i16> + ret <4 x i16> %sext.i +} + +define dso_local <8 x i16> @test_vcltq_f16(<8 x half> %a, <8 x half> %b) { +; CHECKLABEL: test_vcltq_f16: +; CHECK: vcgt.f16 q0, q1, q0 +; CHECK-NEXT: bx lr +entry: + %cmp.i = fcmp olt <8 x half> %a, %b + %sext.i = sext <8 x i1> %cmp.i to <8 x i16> + ret <8 x i16> %sext.i +} + +define dso_local <4 x half> @test_vcvt_n_f16_s16(<4 x i16> %a) { +; CHECKLABEL: test_vcvt_n_f16_s16: +; CHECK: vcvt.f16.s16 d0, d0, #2 +; CHECK-NEXT: bx lr +entry: + %vcvt_n1 = tail call <4 x half> @llvm.arm.neon.vcvtfxs2fp.v4f16.v4i16(<4 x i16> %a, i32 2) + ret <4 x half> %vcvt_n1 +} + +declare <4 x half> @llvm.arm.neon.vcvtfxs2fp.v4f16.v4i16(<4 x i16>, i32) #2 + +define dso_local <8 x half> @test_vcvtq_n_f16_s16(<8 x i16> %a) { +; CHECKLABEL: test_vcvtq_n_f16_s16: +; CHECK: vcvt.f16.s16 q0, q0, #2 +; CHECK-NEXT: bx lr +entry: + %vcvt_n1 = tail call <8 x half> @llvm.arm.neon.vcvtfxs2fp.v8f16.v8i16(<8 x i16> %a, i32 2) + ret <8 x half> %vcvt_n1 +} + +declare <8 x half> @llvm.arm.neon.vcvtfxs2fp.v8f16.v8i16(<8 x i16>, i32) #2 + +define dso_local <4 x half> @test_vcvt_n_f16_u16(<4 x i16> %a) { +; CHECKLABEL: test_vcvt_n_f16_u16: +; CHECK: vcvt.f16.u16 d0, d0, #2 +; CHECK-NEXT: bx lr +entry: + %vcvt_n1 = tail call <4 x half> @llvm.arm.neon.vcvtfxu2fp.v4f16.v4i16(<4 x i16> %a, i32 2) + ret <4 x half> %vcvt_n1 +} + +declare <4 x half> @llvm.arm.neon.vcvtfxu2fp.v4f16.v4i16(<4 x i16>, i32) #2 + +define dso_local <8 x half> @test_vcvtq_n_f16_u16(<8 x i16> %a) { +; CHECKLABEL: test_vcvtq_n_f16_u16: +; CHECK: vcvt.f16.u16 q0, q0, #2 +; CHECK-NEXT: bx lr +entry: + %vcvt_n1 = tail call <8 x half> @llvm.arm.neon.vcvtfxu2fp.v8f16.v8i16(<8 x i16> %a, i32 2) + ret <8 x half> %vcvt_n1 +} + +declare <8 x half> @llvm.arm.neon.vcvtfxu2fp.v8f16.v8i16(<8 x i16>, i32) #2 + +define dso_local <4 x i16> @test_vcvt_n_s16_f16(<4 x half> %a) { +; CHECKLABEL: test_vcvt_n_s16_f16: +; CHECK: vcvt.s16.f16 d0, d0, #2 +; CHECK-NEXT: bx lr +entry: + %vcvt_n1 = tail call <4 x i16> @llvm.arm.neon.vcvtfp2fxs.v4i16.v4f16(<4 x half> %a, i32 2) + ret <4 x i16> %vcvt_n1 +} + +declare <4 x i16> @llvm.arm.neon.vcvtfp2fxs.v4i16.v4f16(<4 x half>, i32) #2 + +define dso_local <8 x i16> @test_vcvtq_n_s16_f16(<8 x half> %a) { +; CHECKLABEL: test_vcvtq_n_s16_f16: +; CHECK: vcvt.s16.f16 q0, q0, #2 +; CHECK-NEXT: bx lr +entry: + %vcvt_n1 = tail call <8 x i16> @llvm.arm.neon.vcvtfp2fxs.v8i16.v8f16(<8 x half> %a, i32 2) + ret <8 x i16> %vcvt_n1 +} + +declare <8 x i16> @llvm.arm.neon.vcvtfp2fxs.v8i16.v8f16(<8 x half>, i32) #2 + +define dso_local <4 x i16> @test_vcvt_n_u16_f16(<4 x half> %a) { +; CHECKLABEL: test_vcvt_n_u16_f16: +; CHECK: vcvt.u16.f16 d0, d0, #2 +; CHECK-NEXT: bx lr +entry: + %vcvt_n1 = tail call <4 x i16> @llvm.arm.neon.vcvtfp2fxu.v4i16.v4f16(<4 x half> %a, i32 2) + ret <4 x i16> %vcvt_n1 +} + +declare <4 x i16> @llvm.arm.neon.vcvtfp2fxu.v4i16.v4f16(<4 x half>, i32) #2 + +define dso_local <8 x i16> @test_vcvtq_n_u16_f16(<8 x half> %a) { +; CHECKLABEL: test_vcvtq_n_u16_f16: +; CHECK: vcvt.u16.f16 q0, q0, #2 +; CHECK-NEXT: bx lr +entry: + %vcvt_n1 = tail call <8 x i16> @llvm.arm.neon.vcvtfp2fxu.v8i16.v8f16(<8 x half> %a, i32 2) + ret <8 x i16> %vcvt_n1 +} + +declare <8 x i16> @llvm.arm.neon.vcvtfp2fxu.v8i16.v8f16(<8 x half>, i32) #2 + +define dso_local <4 x half> @test_vmax_f16(<4 x half> %a, <4 x half> %b) { +; CHECKLABEL: test_vmax_f16: +; CHECK: vmax.f16 d0, d0, d1 +; CHECK-NEXT: bx lr +entry: + %vmax_v2.i = tail call <4 x half> @llvm.arm.neon.vmaxs.v4f16(<4 x half> %a, <4 x half> %b) + ret <4 x half> %vmax_v2.i +} + +define dso_local <8 x half> @test_vmaxq_f16(<8 x half> %a, <8 x half> %b) { +; CHECKLABEL: test_vmaxq_f16: +; CHECK: vmax.f16 q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %vmaxq_v2.i = tail call <8 x half> @llvm.arm.neon.vmaxs.v8f16(<8 x half> %a, <8 x half> %b) + ret <8 x half> %vmaxq_v2.i +} + +; FIXME (PR38404) +; +;define dso_local <4 x half> @test_vmaxnm_f16(<4 x half> %a, <4 x half> %b) { +;entry: +; %vmaxnm_v2.i = tail call <4 x half> @llvm.arm.neon.vmaxnm.v4f16(<4 x half> %a, <4 x half> %b) +; ret <4 x half> %vmaxnm_v2.i +;} + +;define dso_local <8 x half> @test_vmaxnmq_f16(<8 x half> %a, <8 x half> %b) { +;entry: +; %vmaxnmq_v2.i = tail call <8 x half> @llvm.arm.neon.vmaxnm.v8f16(<8 x half> %a, <8 x half> %b) +; ret <8 x half> %vmaxnmq_v2.i +;} + +;define dso_local <4 x half> @test_vmin_f16(<4 x half> %a, <4 x half> %b) { +;entry: +; %vmin_v2.i = tail call <4 x half> @llvm.arm.neon.vmins.v4f16(<4 x half> %a, <4 x half> %b) +; ret <4 x half> %vmin_v2.i +;} + +;define dso_local <8 x half> @test_vminq_f16(<8 x half> %a, <8 x half> %b) { +;entry: +; %vminq_v2.i = tail call <8 x half> @llvm.arm.neon.vmins.v8f16(<8 x half> %a, <8 x half> %b) +; ret <8 x half> %vminq_v2.i +;} + +;define dso_local <4 x half> @test_vminnm_f16(<4 x half> %a, <4 x half> %b) { +;entry: +; %vminnm_v2.i = tail call <4 x half> @llvm.arm.neon.vminnm.v4f16(<4 x half> %a, <4 x half> %b) +; ret <4 x half> %vminnm_v2.i +;} + +;define dso_local <8 x half> @test_vminnmq_f16(<8 x half> %a, <8 x half> %b) { +;entry: +; %vminnmq_v2.i = tail call <8 x half> @llvm.arm.neon.vminnm.v8f16(<8 x half> %a, <8 x half> %b) +; ret <8 x half> %vminnmq_v2.i +;} + +define dso_local <4 x half> @test_vmul_f16(<4 x half> %a, <4 x half> %b) { +; CHECKLABEL: test_vmul_f16: +; CHECK: vmul.f16 d0, d0, d1 +; CHECK-NEXT: bx lr +entry: + %mul.i = fmul <4 x half> %a, %b + ret <4 x half> %mul.i +} + +define dso_local <8 x half> @test_vmulq_f16(<8 x half> %a, <8 x half> %b) { +; CHECKLABEL: test_vmulq_f16: +; CHECK: vmul.f16 q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %mul.i = fmul <8 x half> %a, %b + ret <8 x half> %mul.i +} + +define dso_local <4 x half> @test_vpadd_f16(<4 x half> %a, <4 x half> %b) { +; CHECKLABEL: test_vpadd_f16: +; CHECK: vpadd.f16 d0, d0, d1 +; CHECK-NEXT: bx lr +entry: + %vpadd_v2.i = tail call <4 x half> @llvm.arm.neon.vpadd.v4f16(<4 x half> %a, <4 x half> %b) + ret <4 x half> %vpadd_v2.i +} + +define dso_local <4 x half> @test_vpmax_f16(<4 x half> %a, <4 x half> %b) { +; CHECKLABEL: test_vpmax_f16: +; CHECK: vpmax.f16 d0, d0, d1 +; CHECK-NEXT: bx lr +entry: + %vpmax_v2.i = tail call <4 x half> @llvm.arm.neon.vpmaxs.v4f16(<4 x half> %a, <4 x half> %b) + ret <4 x half> %vpmax_v2.i +} + +define dso_local <4 x half> @test_vpmin_f16(<4 x half> %a, <4 x half> %b) { +; CHECKLABEL: test_vpmin_f16: +; CHECK: vpmin.f16 d0, d0, d1 +; CHECK-NEXT: bx lr +entry: + %vpmin_v2.i = tail call <4 x half> @llvm.arm.neon.vpmins.v4f16(<4 x half> %a, <4 x half> %b) + ret <4 x half> %vpmin_v2.i +} + +define dso_local <4 x half> @test_vrecps_f16(<4 x half> %a, <4 x half> %b) { +; CHECKLABEL: test_vrecps_f16: +; CHECK: vrecps.f16 d0, d0, d1 +; CHECK-NEXT: bx lr +entry: + %vrecps_v2.i = tail call <4 x half> @llvm.arm.neon.vrecps.v4f16(<4 x half> %a, <4 x half> %b) + ret <4 x half> %vrecps_v2.i +} + +define dso_local <8 x half> @test_vrecpsq_f16(<8 x half> %a, <8 x half> %b) { +; CHECKLABEL: test_vrecpsq_f16: +; CHECK: vrecps.f16 q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %vrecpsq_v2.i = tail call <8 x half> @llvm.arm.neon.vrecps.v8f16(<8 x half> %a, <8 x half> %b) + ret <8 x half> %vrecpsq_v2.i +} + +define dso_local <4 x half> @test_vrsqrts_f16(<4 x half> %a, <4 x half> %b) { +; CHECKLABEL: test_vrsqrts_f16: +; CHECK: vrsqrts.f16 d0, d0, d1 +; CHECK-NEXT: bx lr +entry: + %vrsqrts_v2.i = tail call <4 x half> @llvm.arm.neon.vrsqrts.v4f16(<4 x half> %a, <4 x half> %b) + ret <4 x half> %vrsqrts_v2.i +} + +define dso_local <8 x half> @test_vrsqrtsq_f16(<8 x half> %a, <8 x half> %b) { +; CHECKLABEL: test_vrsqrtsq_f16: +; CHECK: vrsqrts.f16 q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %vrsqrtsq_v2.i = tail call <8 x half> @llvm.arm.neon.vrsqrts.v8f16(<8 x half> %a, <8 x half> %b) + ret <8 x half> %vrsqrtsq_v2.i +} + +define dso_local <4 x half> @test_vsub_f16(<4 x half> %a, <4 x half> %b) { +; CHECKLABEL: test_vsub_f16: +; CHECK: vsub.f16 d0, d0, d1 +; CHECK-NEXT: bx lr +entry: + %sub.i = fsub <4 x half> %a, %b + ret <4 x half> %sub.i +} + +define dso_local <8 x half> @test_vsubq_f16(<8 x half> %a, <8 x half> %b) { +; CHECKLABEL: test_vsubq_f16: +; CHECK: vsub.f16 q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %sub.i = fsub <8 x half> %a, %b + ret <8 x half> %sub.i +} + +; FIXME (PR38404) +; +;define dso_local <4 x half> @test_vfma_f16(<4 x half> %a, <4 x half> %b, <4 x half> %c) { +;entry: +; %0 = tail call <4 x half> @llvm.fma.v4f16(<4 x half> %b, <4 x half> %c, <4 x half> %a) +; ret <4 x half> %0 +;} + +;define dso_local <8 x half> @test_vfmaq_f16(<8 x half> %a, <8 x half> %b, <8 x half> %c) { +;entry: +; %0 = tail call <8 x half> @llvm.fma.v8f16(<8 x half> %b, <8 x half> %c, <8 x half> %a) +; ret <8 x half> %0 +;} + +;define dso_local <4 x half> @test_vfms_f16(<4 x half> %a, <4 x half> %b, <4 x half> %c) { +;entry: +; %sub.i = fsub <4 x half> , %b +; %0 = tail call <4 x half> @llvm.fma.v4f16(<4 x half> %sub.i, <4 x half> %c, <4 x half> %a) +; ret <4 x half> %0 +;} + +;define dso_local <8 x half> @test_vfmsq_f16(<8 x half> %a, <8 x half> %b, <8 x half> %c) { +;entry: +; %sub.i = fsub <8 x half> , %b +; %0 = tail call <8 x half> @llvm.fma.v8f16(<8 x half> %sub.i, <8 x half> %c, <8 x half> %a) +; ret <8 x half> %0 +;} + +;define dso_local <4 x half> @test_vmul_lane_f16(<4 x half> %a, <4 x half> %b) { +;entry: +; %shuffle = shufflevector <4 x half> %b, <4 x half> undef, <4 x i32> +; %mul = fmul <4 x half> %shuffle, %a +; ret <4 x half> %mul +;} + +;define dso_local <8 x half> @test_vmulq_lane_f16(<8 x half> %a, <4 x half> %b) { +;entry: +; %shuffle = shufflevector <4 x half> %b, <4 x half> undef, <8 x i32> +; %mul = fmul <8 x half> %shuffle, %a +; ret <8 x half> %mul +;} + +;define dso_local <4 x half> @test_vmul_n_f16(<4 x half> %a, float %b.coerce) { +;entry: +; %0 = bitcast float %b.coerce to i32 +; %tmp.0.extract.trunc = trunc i32 %0 to i16 +; %1 = bitcast i16 %tmp.0.extract.trunc to half +; %vecinit = insertelement <4 x half> undef, half %1, i32 0 +; %vecinit4 = shufflevector <4 x half> %vecinit, <4 x half> undef, <4 x i32> zeroinitializer +; %mul = fmul <4 x half> %vecinit4, %a +; ret <4 x half> %mul +;} + +;define dso_local <8 x half> @test_vmulq_n_f16(<8 x half> %a, float %b.coerce) { +;entry: +; %0 = bitcast float %b.coerce to i32 +; %tmp.0.extract.trunc = trunc i32 %0 to i16 +; %1 = bitcast i16 %tmp.0.extract.trunc to half +; %vecinit = insertelement <8 x half> undef, half %1, i32 0 +; %vecinit8 = shufflevector <8 x half> %vecinit, <8 x half> undef, <8 x i32> zeroinitializer +; %mul = fmul <8 x half> %vecinit8, %a +; ret <8 x half> %mul +;} + +define dso_local <4 x half> @test_vbsl_f16(<4 x i16> %a, <4 x half> %b, <4 x half> %c) { +; CHECKLABEL: test_vbsl_f16: +; CHECK: vbsl d0, d1, d2 +; CHECK-NEXT: bx lr +entry: + %0 = bitcast <4 x i16> %a to <8 x i8> + %1 = bitcast <4 x half> %b to <8 x i8> + %2 = bitcast <4 x half> %c to <8 x i8> + %vbsl_v.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %0, <8 x i8> %1, <8 x i8> %2) + %3 = bitcast <8 x i8> %vbsl_v.i to <4 x half> + ret <4 x half> %3 +} + +define dso_local <8 x half> @test_vbslq_f16(<8 x i16> %a, <8 x half> %b, <8 x half> %c) { +; CHECKLABEL: test_vbslq_f16: +; CHECK: vbsl q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %0 = bitcast <8 x i16> %a to <16 x i8> + %1 = bitcast <8 x half> %b to <16 x i8> + %2 = bitcast <8 x half> %c to <16 x i8> + %vbslq_v.i = tail call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2) + %3 = bitcast <16 x i8> %vbslq_v.i to <8 x half> + ret <8 x half> %3 +} + +; FIXME (PR38404) +; +;define dso_local %struct.float16x4x2_t @test_vzip_f16(<4 x half> %a, <4 x half> %b) { +;entry: +; %vzip.i = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> +; %vzip1.i = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> +; %.fca.0.0.insert = insertvalue %struct.float16x4x2_t undef, <4 x half> %vzip.i, 0, 0 +; %.fca.0.1.insert = insertvalue %struct.float16x4x2_t %.fca.0.0.insert, <4 x half> %vzip1.i, 0, 1 +; ret %struct.float16x4x2_t %.fca.0.1.insert +;} +; +;define dso_local %struct.float16x8x2_t @test_vzipq_f16(<8 x half> %a, <8 x half> %b) { +;entry: +; %vzip.i = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> +; %vzip1.i = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> +; %.fca.0.0.insert = insertvalue %struct.float16x8x2_t undef, <8 x half> %vzip.i, 0, 0 +; %.fca.0.1.insert = insertvalue %struct.float16x8x2_t %.fca.0.0.insert, <8 x half> %vzip1.i, 0, 1 +; ret %struct.float16x8x2_t %.fca.0.1.insert +;} +; +;define dso_local %struct.float16x4x2_t @test_vuzp_f16(<4 x half> %a, <4 x half> %b) { +;entry: +; %vuzp.i = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> +; %vuzp1.i = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> +; %.fca.0.0.insert = insertvalue %struct.float16x4x2_t undef, <4 x half> %vuzp.i, 0, 0 +; %.fca.0.1.insert = insertvalue %struct.float16x4x2_t %.fca.0.0.insert, <4 x half> %vuzp1.i, 0, 1 +; ret %struct.float16x4x2_t %.fca.0.1.insert +;} +; +;define dso_local %struct.float16x8x2_t @test_vuzpq_f16(<8 x half> %a, <8 x half> %b) { +;entry: +; %vuzp.i = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> +; %vuzp1.i = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> +; %.fca.0.0.insert = insertvalue %struct.float16x8x2_t undef, <8 x half> %vuzp.i, 0, 0 +; %.fca.0.1.insert = insertvalue %struct.float16x8x2_t %.fca.0.0.insert, <8 x half> %vuzp1.i, 0, 1 +; ret %struct.float16x8x2_t %.fca.0.1.insert +;} +; +;define dso_local %struct.float16x4x2_t @test_vtrn_f16(<4 x half> %a, <4 x half> %b) { +;entry: +; %vtrn.i = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> +; %vtrn1.i = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> +; %.fca.0.0.insert = insertvalue %struct.float16x4x2_t undef, <4 x half> %vtrn.i, 0, 0 +; %.fca.0.1.insert = insertvalue %struct.float16x4x2_t %.fca.0.0.insert, <4 x half> %vtrn1.i, 0, 1 +; ret %struct.float16x4x2_t %.fca.0.1.insert +;} +; +;define dso_local %struct.float16x8x2_t @test_vtrnq_f16(<8 x half> %a, <8 x half> %b) { +;entry: +; %vtrn.i = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> +; %vtrn1.i = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> +; %.fca.0.0.insert = insertvalue %struct.float16x8x2_t undef, <8 x half> %vtrn.i, 0, 0 +; %.fca.0.1.insert = insertvalue %struct.float16x8x2_t %.fca.0.0.insert, <8 x half> %vtrn1.i, 0, 1 +; ret %struct.float16x8x2_t %.fca.0.1.insert +;} +; +;define dso_local <4 x half> @test_vmov_n_f16(float %a.coerce) { +;entry: +; %0 = bitcast float %a.coerce to i32 +; %tmp.0.extract.trunc = trunc i32 %0 to i16 +; %1 = bitcast i16 %tmp.0.extract.trunc to half +; %vecinit = insertelement <4 x half> undef, half %1, i32 0 +; %vecinit4 = shufflevector <4 x half> %vecinit, <4 x half> undef, <4 x i32> zeroinitializer +; ret <4 x half> %vecinit4 +;} +; +;define dso_local <8 x half> @test_vmovq_n_f16(float %a.coerce) { +;entry: +; %0 = bitcast float %a.coerce to i32 +; %tmp.0.extract.trunc = trunc i32 %0 to i16 +; %1 = bitcast i16 %tmp.0.extract.trunc to half +; %vecinit = insertelement <8 x half> undef, half %1, i32 0 +; %vecinit8 = shufflevector <8 x half> %vecinit, <8 x half> undef, <8 x i32> zeroinitializer +; ret <8 x half> %vecinit8 +;} +; +;define dso_local <4 x half> @test_vdup_n_f16(float %a.coerce) { +;entry: +; %0 = bitcast float %a.coerce to i32 +; %tmp.0.extract.trunc = trunc i32 %0 to i16 +; %1 = bitcast i16 %tmp.0.extract.trunc to half +; %vecinit = insertelement <4 x half> undef, half %1, i32 0 +; %vecinit4 = shufflevector <4 x half> %vecinit, <4 x half> undef, <4 x i32> zeroinitializer +; ret <4 x half> %vecinit4 +;} +; +;define dso_local <8 x half> @test_vdupq_n_f16(float %a.coerce) { +;entry: +; %0 = bitcast float %a.coerce to i32 +; %tmp.0.extract.trunc = trunc i32 %0 to i16 +; %1 = bitcast i16 %tmp.0.extract.trunc to half +; %vecinit = insertelement <8 x half> undef, half %1, i32 0 +; %vecinit8 = shufflevector <8 x half> %vecinit, <8 x half> undef, <8 x i32> zeroinitializer +; ret <8 x half> %vecinit8 +;} +; +;define dso_local <4 x half> @test_vdup_lane_f16(<4 x half> %a) { +;entry: +; %shuffle = shufflevector <4 x half> %a, <4 x half> undef, <4 x i32> +; ret <4 x half> %shuffle +;} +; +;define dso_local <8 x half> @test_vdupq_lane_f16(<4 x half> %a) { +;entry: +; %shuffle = shufflevector <4 x half> %a, <4 x half> undef, <8 x i32> +; ret <8 x half> %shuffle +;} +; +;define dso_local <4 x half> @test_vext_f16(<4 x half> %a, <4 x half> %b) { +;entry: +; %vext = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> +; ret <4 x half> %vext +;} +; +;define dso_local <8 x half> @test_vextq_f16(<8 x half> %a, <8 x half> %b) { +;entry: +; %vext = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> +; ret <8 x half> %vext +;} +; +;define dso_local <4 x half> @test_vrev64_f16(<4 x half> %a) { +;entry: +; %shuffle.i = shufflevector <4 x half> %a, <4 x half> undef, <4 x i32> +; ret <4 x half> %shuffle.i +;} +; +;define dso_local <8 x half> @test_vrev64q_f16(<8 x half> %a) { +;entry: +; %shuffle.i = shufflevector <8 x half> %a, <8 x half> undef, <8 x i32> +; ret <8 x half> %shuffle.i +;} + +declare <4 x half> @llvm.fabs.v4f16(<4 x half>) +declare <8 x half> @llvm.fabs.v8f16(<8 x half>) +declare <4 x i16> @llvm.arm.neon.vcvtas.v4i16.v4f16(<4 x half>) +declare <4 x i16> @llvm.arm.neon.vcvtau.v4i16.v4f16(<4 x half>) +declare <8 x i16> @llvm.arm.neon.vcvtas.v8i16.v8f16(<8 x half>) +declare <4 x i16> @llvm.arm.neon.vcvtms.v4i16.v4f16(<4 x half>) +declare <8 x i16> @llvm.arm.neon.vcvtms.v8i16.v8f16(<8 x half>) +declare <4 x i16> @llvm.arm.neon.vcvtmu.v4i16.v4f16(<4 x half>) +declare <8 x i16> @llvm.arm.neon.vcvtmu.v8i16.v8f16(<8 x half>) +declare <4 x i16> @llvm.arm.neon.vcvtns.v4i16.v4f16(<4 x half>) +declare <8 x i16> @llvm.arm.neon.vcvtns.v8i16.v8f16(<8 x half>) +declare <4 x i16> @llvm.arm.neon.vcvtnu.v4i16.v4f16(<4 x half>) +declare <8 x i16> @llvm.arm.neon.vcvtnu.v8i16.v8f16(<8 x half>) +declare <4 x i16> @llvm.arm.neon.vcvtps.v4i16.v4f16(<4 x half>) +declare <8 x i16> @llvm.arm.neon.vcvtps.v8i16.v8f16(<8 x half>) +declare <4 x i16> @llvm.arm.neon.vcvtpu.v4i16.v4f16(<4 x half>) +declare <8 x i16> @llvm.arm.neon.vcvtpu.v8i16.v8f16(<8 x half>) +declare <4 x half> @llvm.arm.neon.vrecpe.v4f16(<4 x half>) +declare <8 x half> @llvm.arm.neon.vrecpe.v8f16(<8 x half>) +declare <4 x half> @llvm.arm.neon.vrintz.v4f16(<4 x half>) +declare <8 x half> @llvm.arm.neon.vrintz.v8f16(<8 x half>) +declare <4 x half> @llvm.arm.neon.vrinta.v4f16(<4 x half>) +declare <8 x half> @llvm.arm.neon.vrinta.v8f16(<8 x half>) +declare <4 x half> @llvm.arm.neon.vrintm.v4f16(<4 x half>) +declare <8 x half> @llvm.arm.neon.vrintm.v8f16(<8 x half>) +declare <4 x half> @llvm.arm.neon.vrintn.v4f16(<4 x half>) +declare <8 x half> @llvm.arm.neon.vrintn.v8f16(<8 x half>) +declare <4 x half> @llvm.arm.neon.vrintp.v4f16(<4 x half>) +declare <8 x half> @llvm.arm.neon.vrintp.v8f16(<8 x half>) +declare <4 x half> @llvm.arm.neon.vrintx.v4f16(<4 x half>) +declare <8 x half> @llvm.arm.neon.vrintx.v8f16(<8 x half>) +declare <4 x half> @llvm.arm.neon.vrsqrte.v4f16(<4 x half>) +declare <8 x half> @llvm.arm.neon.vrsqrte.v8f16(<8 x half>) +declare <4 x half> @llvm.arm.neon.vabds.v4f16(<4 x half>, <4 x half>) +declare <8 x half> @llvm.arm.neon.vabds.v8f16(<8 x half>, <8 x half>) +declare <4 x i16> @llvm.arm.neon.vacge.v4i16.v4f16(<4 x half>, <4 x half>) +declare <8 x i16> @llvm.arm.neon.vacge.v8i16.v8f16(<8 x half>, <8 x half>) +declare <4 x i16> @llvm.arm.neon.vacgt.v4i16.v4f16(<4 x half>, <4 x half>) +declare <8 x i16> @llvm.arm.neon.vacgt.v8i16.v8f16(<8 x half>, <8 x half>) +declare <4 x half> @llvm.arm.neon.vmaxs.v4f16(<4 x half>, <4 x half>) +declare <8 x half> @llvm.arm.neon.vmaxs.v8f16(<8 x half>, <8 x half>) +declare <4 x half> @llvm.arm.neon.vmaxnm.v4f16(<4 x half>, <4 x half>) +declare <8 x half> @llvm.arm.neon.vmaxnm.v8f16(<8 x half>, <8 x half>) +declare <4 x half> @llvm.arm.neon.vmins.v4f16(<4 x half>, <4 x half>) +declare <8 x half> @llvm.arm.neon.vmins.v8f16(<8 x half>, <8 x half>) +declare <4 x half> @llvm.arm.neon.vminnm.v4f16(<4 x half>, <4 x half>) +declare <8 x half> @llvm.arm.neon.vminnm.v8f16(<8 x half>, <8 x half>) +declare <4 x half> @llvm.arm.neon.vpadd.v4f16(<4 x half>, <4 x half>) +declare <4 x half> @llvm.arm.neon.vpmaxs.v4f16(<4 x half>, <4 x half>) +declare <4 x half> @llvm.arm.neon.vpmins.v4f16(<4 x half>, <4 x half>) +declare <4 x half> @llvm.arm.neon.vrecps.v4f16(<4 x half>, <4 x half>) +declare <8 x half> @llvm.arm.neon.vrecps.v8f16(<8 x half>, <8 x half>) +declare <4 x half> @llvm.arm.neon.vrsqrts.v4f16(<4 x half>, <4 x half>) +declare <8 x half> @llvm.arm.neon.vrsqrts.v8f16(<8 x half>, <8 x half>) +declare <4 x half> @llvm.fma.v4f16(<4 x half>, <4 x half>, <4 x half>) +declare <8 x half> @llvm.fma.v8f16(<8 x half>, <8 x half>, <8 x half>) +declare <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8>, <8 x i8>, <8 x i8>) +declare <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) -- 2.7.4