From c9eaed514929f841d70d685a183658294e70a0df Mon Sep 17 00:00:00 2001 From: David Green Date: Sat, 28 Mar 2020 16:22:05 +0000 Subject: [PATCH] [ARM] MVE VMOV.i64 In the original batch of MVE VMOVimm code generation VMOV.i64 was left out due to the way it was done downstream. It turns out that it's fairly simple though. This adds the codegen for it, similar to NEON. Bigendian is technically incorrect in this version, which John is fixing in a Neon patch. --- llvm/lib/Target/ARM/ARMISelLowering.cpp | 2 +- llvm/lib/Target/ARM/ARMInstrMVE.td | 2 + llvm/test/CodeGen/Thumb2/mve-gather-ptrs.ll | 10 +- llvm/test/CodeGen/Thumb2/mve-masked-ldst.ll | 74 ++++-------- llvm/test/CodeGen/Thumb2/mve-sext.ll | 10 +- llvm/test/CodeGen/Thumb2/mve-vecreduce-add.ll | 100 ++-------------- llvm/test/CodeGen/Thumb2/mve-vecreduce-mla.ll | 66 ++--------- llvm/test/CodeGen/Thumb2/mve-vmovimm.ll | 162 +++----------------------- 8 files changed, 60 insertions(+), 366 deletions(-) diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 1c9d758..c1ebaef 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -7181,7 +7181,7 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, return DAG.getUNDEF(VT); if ((ST->hasNEON() && SplatBitSize <= 64) || - (ST->hasMVEIntegerOps() && SplatBitSize <= 32)) { + (ST->hasMVEIntegerOps() && SplatBitSize <= 64)) { // Check if an immediate VMOV works. EVT VmovVT; SDValue Val = isVMOVModifiedImm(SplatBits.getZExtValue(), diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td index bd12c4a5..22374bc 100644 --- a/llvm/lib/Target/ARM/ARMInstrMVE.td +++ b/llvm/lib/Target/ARM/ARMInstrMVE.td @@ -2389,6 +2389,8 @@ let Predicates = [HasMVEInt] in { (v8i16 (MVE_VMOVimmi16 nImmSplatI16:$simm))>; def : Pat<(v4i32 (ARMvmovImm timm:$simm)), (v4i32 (MVE_VMOVimmi32 nImmVMOVI32:$simm))>; + def : Pat<(v2i64 (ARMvmovImm timm:$simm)), + (v2i64 (MVE_VMOVimmi64 nImmSplatI64:$simm))>; def : Pat<(v8i16 (ARMvmvnImm timm:$simm)), (v8i16 (MVE_VMVNimmi16 nImmSplatI16:$simm))>; diff --git a/llvm/test/CodeGen/Thumb2/mve-gather-ptrs.ll b/llvm/test/CodeGen/Thumb2/mve-gather-ptrs.ll index e359388..c44668c 100644 --- a/llvm/test/CodeGen/Thumb2/mve-gather-ptrs.ll +++ b/llvm/test/CodeGen/Thumb2/mve-gather-ptrs.ll @@ -251,21 +251,13 @@ define arm_aapcs_vfpcc <2 x i32> @ptr_v2i16_zext(<2 x i16*>* %offptr) { ; CHECK-LABEL: ptr_v2i16_zext: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: ldrd r1, r0, [r0] -; CHECK-NEXT: adr r2, .LCPI9_0 +; CHECK-NEXT: vmov.i64 q0, #0xffff ; CHECK-NEXT: ldrh r0, [r0] -; CHECK-NEXT: vldrw.u32 q0, [r2] ; CHECK-NEXT: ldrh r1, [r1] ; CHECK-NEXT: vmov.32 q1[0], r1 ; CHECK-NEXT: vmov.32 q1[2], r0 ; CHECK-NEXT: vand q0, q1, q0 ; CHECK-NEXT: bx lr -; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI9_0: -; CHECK-NEXT: .long 65535 @ 0xffff -; CHECK-NEXT: .long 0 @ 0x0 -; CHECK-NEXT: .long 65535 @ 0xffff -; CHECK-NEXT: .long 0 @ 0x0 entry: %offs = load <2 x i16*>, <2 x i16*>* %offptr, align 4 %gather = call <2 x i16> @llvm.masked.gather.v2i16.v2p0i16(<2 x i16*> %offs, i32 2, <2 x i1> , <2 x i16> undef) diff --git a/llvm/test/CodeGen/Thumb2/mve-masked-ldst.ll b/llvm/test/CodeGen/Thumb2/mve-masked-ldst.ll index 3b49649..3814b0b 100644 --- a/llvm/test/CodeGen/Thumb2/mve-masked-ldst.ll +++ b/llvm/test/CodeGen/Thumb2/mve-masked-ldst.ll @@ -408,6 +408,7 @@ define void @foo_zext_v2i64_v2i32(<2 x i64> *%dest, <2 x i32> *%mask, <2 x i32> ; CHECK-LE-NEXT: ldrd lr, r12, [r1] ; CHECK-LE-NEXT: movs r1, #0 ; CHECK-LE-NEXT: @ implicit-def: $q1 +; CHECK-LE-NEXT: vmov.i64 q2, #0xffffffff ; CHECK-LE-NEXT: rsbs.w r3, lr, #0 ; CHECK-LE-NEXT: vmov.32 q0[0], lr ; CHECK-LE-NEXT: sbcs.w r3, r1, lr, asr #31 @@ -424,23 +425,21 @@ define void @foo_zext_v2i64_v2i32(<2 x i64> *%dest, <2 x i32> *%mask, <2 x i32> ; CHECK-LE-NEXT: bfi r1, lr, #0, #1 ; CHECK-LE-NEXT: vmov.32 q0[2], r12 ; CHECK-LE-NEXT: and r3, r1, #3 -; CHECK-LE-NEXT: adr.w r12, .LCPI7_0 +; CHECK-LE-NEXT: mov.w r12, #0 ; CHECK-LE-NEXT: lsls r1, r1, #31 ; CHECK-LE-NEXT: itt ne ; CHECK-LE-NEXT: ldrne r1, [r2] ; CHECK-LE-NEXT: vmovne.32 q1[0], r1 ; CHECK-LE-NEXT: lsls r1, r3, #30 -; CHECK-LE-NEXT: vmov r3, s0 ; CHECK-LE-NEXT: itt mi ; CHECK-LE-NEXT: ldrmi r1, [r2, #4] ; CHECK-LE-NEXT: vmovmi.32 q1[2], r1 +; CHECK-LE-NEXT: vmov r1, s0 ; CHECK-LE-NEXT: movs r2, #0 -; CHECK-LE-NEXT: vldrw.u32 q2, [r12] -; CHECK-LE-NEXT: mov.w r12, #0 ; CHECK-LE-NEXT: vand q1, q1, q2 -; CHECK-LE-NEXT: rsbs r1, r3, #0 -; CHECK-LE-NEXT: sbcs.w r1, r2, r3, asr #31 +; CHECK-LE-NEXT: rsbs r3, r1, #0 ; CHECK-LE-NEXT: vmov r3, s2 +; CHECK-LE-NEXT: sbcs.w r1, r2, r1, asr #31 ; CHECK-LE-NEXT: it lt ; CHECK-LE-NEXT: movlt.w r12, #1 ; CHECK-LE-NEXT: rsbs r1, r3, #0 @@ -460,13 +459,6 @@ define void @foo_zext_v2i64_v2i32(<2 x i64> *%dest, <2 x i32> *%mask, <2 x i32> ; CHECK-LE-NEXT: vstrmi d3, [r0, #8] ; CHECK-LE-NEXT: add sp, #4 ; CHECK-LE-NEXT: pop {r7, pc} -; CHECK-LE-NEXT: .p2align 4 -; CHECK-LE-NEXT: @ %bb.1: -; CHECK-LE-NEXT: .LCPI7_0: -; CHECK-LE-NEXT: .long 4294967295 @ 0xffffffff -; CHECK-LE-NEXT: .long 0 @ 0x0 -; CHECK-LE-NEXT: .long 4294967295 @ 0xffffffff -; CHECK-LE-NEXT: .long 0 @ 0x0 ; ; CHECK-BE-LABEL: foo_zext_v2i64_v2i32: ; CHECK-BE: @ %bb.0: @ %entry @@ -511,15 +503,13 @@ define void @foo_zext_v2i64_v2i32(<2 x i64> *%dest, <2 x i32> *%mask, <2 x i32> ; CHECK-BE-NEXT: .LBB7_4: @ %else2 ; CHECK-BE-NEXT: vrev64.32 q3, q2 ; CHECK-BE-NEXT: movs r2, #0 -; CHECK-BE-NEXT: vmov r3, s15 -; CHECK-BE-NEXT: adr.w r12, .LCPI7_0 -; CHECK-BE-NEXT: vldrb.u8 q0, [r12] +; CHECK-BE-NEXT: vmov r1, s15 ; CHECK-BE-NEXT: mov.w r12, #0 -; CHECK-BE-NEXT: vrev64.8 q2, q0 -; CHECK-BE-NEXT: vand q0, q1, q2 -; CHECK-BE-NEXT: rsbs r1, r3, #0 -; CHECK-BE-NEXT: sbcs.w r1, r2, r3, asr #31 +; CHECK-BE-NEXT: vmov.i64 q0, #0xffffffff +; CHECK-BE-NEXT: vand q0, q1, q0 +; CHECK-BE-NEXT: rsbs r3, r1, #0 ; CHECK-BE-NEXT: vmov r3, s13 +; CHECK-BE-NEXT: sbcs.w r1, r2, r1, asr #31 ; CHECK-BE-NEXT: it lt ; CHECK-BE-NEXT: movlt.w r12, #1 ; CHECK-BE-NEXT: rsbs r1, r3, #0 @@ -539,13 +529,6 @@ define void @foo_zext_v2i64_v2i32(<2 x i64> *%dest, <2 x i32> *%mask, <2 x i32> ; CHECK-BE-NEXT: vstrmi d1, [r0, #8] ; CHECK-BE-NEXT: add sp, #4 ; CHECK-BE-NEXT: pop {r7, pc} -; CHECK-BE-NEXT: .p2align 4 -; CHECK-BE-NEXT: @ %bb.5: -; CHECK-BE-NEXT: .LCPI7_0: -; CHECK-BE-NEXT: .long 0 @ 0x0 -; CHECK-BE-NEXT: .long 4294967295 @ 0xffffffff -; CHECK-BE-NEXT: .long 0 @ 0x0 -; CHECK-BE-NEXT: .long 4294967295 @ 0xffffffff entry: %0 = load <2 x i32>, <2 x i32>* %mask, align 4 %1 = icmp sgt <2 x i32> %0, zeroinitializer @@ -565,6 +548,7 @@ define void @foo_zext_v2i64_v2i32_unaligned(<2 x i64> *%dest, <2 x i32> *%mask, ; CHECK-LE-NEXT: ldrd lr, r12, [r1] ; CHECK-LE-NEXT: movs r1, #0 ; CHECK-LE-NEXT: @ implicit-def: $q1 +; CHECK-LE-NEXT: vmov.i64 q2, #0xffffffff ; CHECK-LE-NEXT: rsbs.w r3, lr, #0 ; CHECK-LE-NEXT: vmov.32 q0[0], lr ; CHECK-LE-NEXT: sbcs.w r3, r1, lr, asr #31 @@ -581,23 +565,21 @@ define void @foo_zext_v2i64_v2i32_unaligned(<2 x i64> *%dest, <2 x i32> *%mask, ; CHECK-LE-NEXT: bfi r1, lr, #0, #1 ; CHECK-LE-NEXT: vmov.32 q0[2], r12 ; CHECK-LE-NEXT: and r3, r1, #3 -; CHECK-LE-NEXT: adr.w r12, .LCPI8_0 +; CHECK-LE-NEXT: mov.w r12, #0 ; CHECK-LE-NEXT: lsls r1, r1, #31 ; CHECK-LE-NEXT: itt ne ; CHECK-LE-NEXT: ldrne r1, [r2] ; CHECK-LE-NEXT: vmovne.32 q1[0], r1 ; CHECK-LE-NEXT: lsls r1, r3, #30 -; CHECK-LE-NEXT: vmov r3, s0 ; CHECK-LE-NEXT: itt mi ; CHECK-LE-NEXT: ldrmi r1, [r2, #4] ; CHECK-LE-NEXT: vmovmi.32 q1[2], r1 +; CHECK-LE-NEXT: vmov r1, s0 ; CHECK-LE-NEXT: movs r2, #0 -; CHECK-LE-NEXT: vldrw.u32 q2, [r12] -; CHECK-LE-NEXT: mov.w r12, #0 ; CHECK-LE-NEXT: vand q1, q1, q2 -; CHECK-LE-NEXT: rsbs r1, r3, #0 -; CHECK-LE-NEXT: sbcs.w r1, r2, r3, asr #31 +; CHECK-LE-NEXT: rsbs r3, r1, #0 ; CHECK-LE-NEXT: vmov r3, s2 +; CHECK-LE-NEXT: sbcs.w r1, r2, r1, asr #31 ; CHECK-LE-NEXT: it lt ; CHECK-LE-NEXT: movlt.w r12, #1 ; CHECK-LE-NEXT: rsbs r1, r3, #0 @@ -619,13 +601,6 @@ define void @foo_zext_v2i64_v2i32_unaligned(<2 x i64> *%dest, <2 x i32> *%mask, ; CHECK-LE-NEXT: strdmi r1, r2, [r0, #8] ; CHECK-LE-NEXT: add sp, #4 ; CHECK-LE-NEXT: pop {r7, pc} -; CHECK-LE-NEXT: .p2align 4 -; CHECK-LE-NEXT: @ %bb.1: -; CHECK-LE-NEXT: .LCPI8_0: -; CHECK-LE-NEXT: .long 4294967295 @ 0xffffffff -; CHECK-LE-NEXT: .long 0 @ 0x0 -; CHECK-LE-NEXT: .long 4294967295 @ 0xffffffff -; CHECK-LE-NEXT: .long 0 @ 0x0 ; ; CHECK-BE-LABEL: foo_zext_v2i64_v2i32_unaligned: ; CHECK-BE: @ %bb.0: @ %entry @@ -670,15 +645,13 @@ define void @foo_zext_v2i64_v2i32_unaligned(<2 x i64> *%dest, <2 x i32> *%mask, ; CHECK-BE-NEXT: .LBB8_4: @ %else2 ; CHECK-BE-NEXT: vrev64.32 q3, q2 ; CHECK-BE-NEXT: movs r2, #0 -; CHECK-BE-NEXT: vmov r3, s15 -; CHECK-BE-NEXT: adr.w r12, .LCPI8_0 -; CHECK-BE-NEXT: vldrb.u8 q0, [r12] +; CHECK-BE-NEXT: vmov r1, s15 ; CHECK-BE-NEXT: mov.w r12, #0 -; CHECK-BE-NEXT: vrev64.8 q2, q0 -; CHECK-BE-NEXT: vand q0, q1, q2 -; CHECK-BE-NEXT: rsbs r1, r3, #0 -; CHECK-BE-NEXT: sbcs.w r1, r2, r3, asr #31 +; CHECK-BE-NEXT: vmov.i64 q0, #0xffffffff +; CHECK-BE-NEXT: vand q0, q1, q0 +; CHECK-BE-NEXT: rsbs r3, r1, #0 ; CHECK-BE-NEXT: vmov r3, s13 +; CHECK-BE-NEXT: sbcs.w r1, r2, r1, asr #31 ; CHECK-BE-NEXT: it lt ; CHECK-BE-NEXT: movlt.w r12, #1 ; CHECK-BE-NEXT: rsbs r1, r3, #0 @@ -700,13 +673,6 @@ define void @foo_zext_v2i64_v2i32_unaligned(<2 x i64> *%dest, <2 x i32> *%mask, ; CHECK-BE-NEXT: strdmi r2, r1, [r0, #8] ; CHECK-BE-NEXT: add sp, #4 ; CHECK-BE-NEXT: pop {r7, pc} -; CHECK-BE-NEXT: .p2align 4 -; CHECK-BE-NEXT: @ %bb.5: -; CHECK-BE-NEXT: .LCPI8_0: -; CHECK-BE-NEXT: .long 0 @ 0x0 -; CHECK-BE-NEXT: .long 4294967295 @ 0xffffffff -; CHECK-BE-NEXT: .long 0 @ 0x0 -; CHECK-BE-NEXT: .long 4294967295 @ 0xffffffff entry: %0 = load <2 x i32>, <2 x i32>* %mask, align 4 %1 = icmp sgt <2 x i32> %0, zeroinitializer diff --git a/llvm/test/CodeGen/Thumb2/mve-sext.ll b/llvm/test/CodeGen/Thumb2/mve-sext.ll index ffade88..f351e6d 100644 --- a/llvm/test/CodeGen/Thumb2/mve-sext.ll +++ b/llvm/test/CodeGen/Thumb2/mve-sext.ll @@ -430,17 +430,9 @@ entry: define arm_aapcs_vfpcc <2 x i64> @zext_v2i32_v2i64(<2 x i32> %src) { ; CHECK-LABEL: zext_v2i32_v2i64: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: adr r0, .LCPI20_0 -; CHECK-NEXT: vldrw.u32 q1, [r0] +; CHECK-NEXT: vmov.i64 q1, #0xffffffff ; CHECK-NEXT: vand q0, q0, q1 ; CHECK-NEXT: bx lr -; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI20_0: -; CHECK-NEXT: .long 4294967295 @ 0xffffffff -; CHECK-NEXT: .long 0 @ 0x0 -; CHECK-NEXT: .long 4294967295 @ 0xffffffff -; CHECK-NEXT: .long 0 @ 0x0 entry: %0 = zext <2 x i32> %src to <2 x i64> ret <2 x i64> %0 diff --git a/llvm/test/CodeGen/Thumb2/mve-vecreduce-add.ll b/llvm/test/CodeGen/Thumb2/mve-vecreduce-add.ll index ced01f0..7ea2927 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vecreduce-add.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vecreduce-add.ll @@ -36,8 +36,7 @@ entry: define arm_aapcs_vfpcc i64 @add_v2i32_v2i64_zext(<2 x i32> %x) { ; CHECK-LABEL: add_v2i32_v2i64_zext: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: adr r0, .LCPI3_0 -; CHECK-NEXT: vldrw.u32 q1, [r0] +; CHECK-NEXT: vmov.i64 q1, #0xffffffff ; CHECK-NEXT: vand q0, q0, q1 ; CHECK-NEXT: vmov r0, s2 ; CHECK-NEXT: vmov r3, s0 @@ -46,13 +45,6 @@ define arm_aapcs_vfpcc i64 @add_v2i32_v2i64_zext(<2 x i32> %x) { ; CHECK-NEXT: adds r0, r0, r3 ; CHECK-NEXT: adcs r1, r2 ; CHECK-NEXT: bx lr -; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI3_0: -; CHECK-NEXT: .long 4294967295 @ 0xffffffff -; CHECK-NEXT: .long 0 @ 0x0 -; CHECK-NEXT: .long 4294967295 @ 0xffffffff -; CHECK-NEXT: .long 0 @ 0x0 entry: %xx = zext <2 x i32> %x to <2 x i64> %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %xx) @@ -138,11 +130,10 @@ define arm_aapcs_vfpcc i64 @add_v8i16_v8i64_zext(<8 x i16> %x) { ; CHECK-LABEL: add_v8i16_v8i64_zext: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmov.u16 r0, q0[0] +; CHECK-NEXT: vmov.i64 q1, #0xffff ; CHECK-NEXT: vmov.32 q2[0], r0 ; CHECK-NEXT: vmov.u16 r0, q0[1] ; CHECK-NEXT: vmov.32 q2[2], r0 -; CHECK-NEXT: adr r0, .LCPI10_0 -; CHECK-NEXT: vldrw.u32 q1, [r0] ; CHECK-NEXT: vand q2, q2, q1 ; CHECK-NEXT: vmov r0, s10 ; CHECK-NEXT: vmov r1, s8 @@ -182,13 +173,6 @@ define arm_aapcs_vfpcc i64 @add_v8i16_v8i64_zext(<8 x i16> %x) { ; CHECK-NEXT: adds r0, r0, r3 ; CHECK-NEXT: adcs r1, r2 ; CHECK-NEXT: bx lr -; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI10_0: -; CHECK-NEXT: .long 65535 @ 0xffff -; CHECK-NEXT: .long 0 @ 0x0 -; CHECK-NEXT: .long 65535 @ 0xffff -; CHECK-NEXT: .long 0 @ 0x0 entry: %xx = zext <8 x i16> %x to <8 x i64> %z = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> %xx) @@ -265,21 +249,13 @@ entry: define arm_aapcs_vfpcc i64 @add_v2i16_v2i64_zext(<2 x i16> %x) { ; CHECK-LABEL: add_v2i16_v2i64_zext: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: adr r0, .LCPI12_0 -; CHECK-NEXT: vldrw.u32 q1, [r0] +; CHECK-NEXT: vmov.i64 q1, #0xffff ; CHECK-NEXT: vand q0, q0, q1 ; CHECK-NEXT: vmov r0, s2 ; CHECK-NEXT: vmov r1, s0 ; CHECK-NEXT: add r0, r1 ; CHECK-NEXT: vmov r1, s3 ; CHECK-NEXT: bx lr -; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI12_0: -; CHECK-NEXT: .long 65535 @ 0xffff -; CHECK-NEXT: .long 0 @ 0x0 -; CHECK-NEXT: .long 65535 @ 0xffff -; CHECK-NEXT: .long 0 @ 0x0 entry: %xx = zext <2 x i16> %x to <2 x i64> %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %xx) @@ -489,11 +465,10 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_zext(<16 x i8> %x) { ; CHECK-LABEL: add_v16i8_v16i64_zext: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmov.u8 r0, q0[0] +; CHECK-NEXT: vmov.i64 q1, #0xff ; CHECK-NEXT: vmov.32 q2[0], r0 ; CHECK-NEXT: vmov.u8 r0, q0[1] ; CHECK-NEXT: vmov.32 q2[2], r0 -; CHECK-NEXT: adr r0, .LCPI23_0 -; CHECK-NEXT: vldrw.u32 q1, [r0] ; CHECK-NEXT: vand q2, q2, q1 ; CHECK-NEXT: vmov r0, s10 ; CHECK-NEXT: vmov r1, s8 @@ -585,13 +560,6 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_zext(<16 x i8> %x) { ; CHECK-NEXT: adds r0, r0, r3 ; CHECK-NEXT: adcs r1, r2 ; CHECK-NEXT: bx lr -; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI23_0: -; CHECK-NEXT: .long 255 @ 0xff -; CHECK-NEXT: .long 0 @ 0x0 -; CHECK-NEXT: .long 255 @ 0xff -; CHECK-NEXT: .long 0 @ 0x0 entry: %xx = zext <16 x i8> %x to <16 x i64> %z = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> %xx) @@ -736,21 +704,13 @@ entry: define arm_aapcs_vfpcc i64 @add_v2i8_v2i64_zext(<2 x i8> %x) { ; CHECK-LABEL: add_v2i8_v2i64_zext: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: adr r0, .LCPI25_0 -; CHECK-NEXT: vldrw.u32 q1, [r0] +; CHECK-NEXT: vmov.i64 q1, #0xff ; CHECK-NEXT: vand q0, q0, q1 ; CHECK-NEXT: vmov r0, s2 ; CHECK-NEXT: vmov r1, s0 ; CHECK-NEXT: add r0, r1 ; CHECK-NEXT: vmov r1, s3 ; CHECK-NEXT: bx lr -; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI25_0: -; CHECK-NEXT: .long 255 @ 0xff -; CHECK-NEXT: .long 0 @ 0x0 -; CHECK-NEXT: .long 255 @ 0xff -; CHECK-NEXT: .long 0 @ 0x0 entry: %xx = zext <2 x i8> %x to <2 x i64> %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %xx) @@ -832,8 +792,7 @@ define arm_aapcs_vfpcc i64 @add_v2i32_v2i64_acc_zext(<2 x i32> %x, i64 %a) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} ; CHECK-NEXT: push {r7, lr} -; CHECK-NEXT: adr r2, .LCPI31_0 -; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vmov.i64 q1, #0xffffffff ; CHECK-NEXT: vand q0, q0, q1 ; CHECK-NEXT: vmov r2, s2 ; CHECK-NEXT: vmov r3, s0 @@ -844,13 +803,6 @@ define arm_aapcs_vfpcc i64 @add_v2i32_v2i64_acc_zext(<2 x i32> %x, i64 %a) { ; CHECK-NEXT: adds r0, r0, r2 ; CHECK-NEXT: adcs r1, r3 ; CHECK-NEXT: pop {r7, pc} -; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI31_0: -; CHECK-NEXT: .long 4294967295 @ 0xffffffff -; CHECK-NEXT: .long 0 @ 0x0 -; CHECK-NEXT: .long 4294967295 @ 0xffffffff -; CHECK-NEXT: .long 0 @ 0x0 entry: %xx = zext <2 x i32> %x to <2 x i64> %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %xx) @@ -947,11 +899,10 @@ define arm_aapcs_vfpcc i64 @add_v8i16_v8i64_acc_zext(<8 x i16> %x, i64 %a) { ; CHECK-NEXT: .save {r4, lr} ; CHECK-NEXT: push {r4, lr} ; CHECK-NEXT: vmov.u16 r2, q0[0] +; CHECK-NEXT: vmov.i64 q1, #0xffff ; CHECK-NEXT: vmov.32 q2[0], r2 ; CHECK-NEXT: vmov.u16 r2, q0[1] ; CHECK-NEXT: vmov.32 q2[2], r2 -; CHECK-NEXT: adr r2, .LCPI38_0 -; CHECK-NEXT: vldrw.u32 q1, [r2] ; CHECK-NEXT: vand q2, q2, q1 ; CHECK-NEXT: vmov r2, s10 ; CHECK-NEXT: vmov r3, s8 @@ -993,13 +944,6 @@ define arm_aapcs_vfpcc i64 @add_v8i16_v8i64_acc_zext(<8 x i16> %x, i64 %a) { ; CHECK-NEXT: adds r0, r0, r2 ; CHECK-NEXT: adcs r1, r3 ; CHECK-NEXT: pop {r4, pc} -; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI38_0: -; CHECK-NEXT: .long 65535 @ 0xffff -; CHECK-NEXT: .long 0 @ 0x0 -; CHECK-NEXT: .long 65535 @ 0xffff -; CHECK-NEXT: .long 0 @ 0x0 entry: %xx = zext <8 x i16> %x to <8 x i64> %z = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> %xx) @@ -1082,8 +1026,7 @@ entry: define arm_aapcs_vfpcc i64 @add_v2i16_v2i64_acc_zext(<2 x i16> %x, i64 %a) { ; CHECK-LABEL: add_v2i16_v2i64_acc_zext: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: adr r2, .LCPI40_0 -; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vmov.i64 q1, #0xffff ; CHECK-NEXT: vand q0, q0, q1 ; CHECK-NEXT: vmov r2, s2 ; CHECK-NEXT: vmov r3, s0 @@ -1092,13 +1035,6 @@ define arm_aapcs_vfpcc i64 @add_v2i16_v2i64_acc_zext(<2 x i16> %x, i64 %a) { ; CHECK-NEXT: adds r0, r0, r2 ; CHECK-NEXT: adcs r1, r3 ; CHECK-NEXT: bx lr -; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI40_0: -; CHECK-NEXT: .long 65535 @ 0xffff -; CHECK-NEXT: .long 0 @ 0x0 -; CHECK-NEXT: .long 65535 @ 0xffff -; CHECK-NEXT: .long 0 @ 0x0 entry: %xx = zext <2 x i16> %x to <2 x i64> %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %xx) @@ -1323,11 +1259,10 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_zext(<16 x i8> %x, i64 %a) { ; CHECK-NEXT: .save {r4, lr} ; CHECK-NEXT: push {r4, lr} ; CHECK-NEXT: vmov.u8 r2, q0[0] +; CHECK-NEXT: vmov.i64 q1, #0xff ; CHECK-NEXT: vmov.32 q2[0], r2 ; CHECK-NEXT: vmov.u8 r2, q0[1] ; CHECK-NEXT: vmov.32 q2[2], r2 -; CHECK-NEXT: adr r2, .LCPI51_0 -; CHECK-NEXT: vldrw.u32 q1, [r2] ; CHECK-NEXT: vand q2, q2, q1 ; CHECK-NEXT: vmov r2, s10 ; CHECK-NEXT: vmov r3, s8 @@ -1421,13 +1356,6 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_zext(<16 x i8> %x, i64 %a) { ; CHECK-NEXT: adds r0, r0, r2 ; CHECK-NEXT: adcs r1, r3 ; CHECK-NEXT: pop {r4, pc} -; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI51_0: -; CHECK-NEXT: .long 255 @ 0xff -; CHECK-NEXT: .long 0 @ 0x0 -; CHECK-NEXT: .long 255 @ 0xff -; CHECK-NEXT: .long 0 @ 0x0 entry: %xx = zext <16 x i8> %x to <16 x i64> %z = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> %xx) @@ -1578,8 +1506,7 @@ entry: define arm_aapcs_vfpcc i64 @add_v2i8_v2i64_acc_zext(<2 x i8> %x, i64 %a) { ; CHECK-LABEL: add_v2i8_v2i64_acc_zext: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: adr r2, .LCPI53_0 -; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vmov.i64 q1, #0xff ; CHECK-NEXT: vand q0, q0, q1 ; CHECK-NEXT: vmov r2, s2 ; CHECK-NEXT: vmov r3, s0 @@ -1588,13 +1515,6 @@ define arm_aapcs_vfpcc i64 @add_v2i8_v2i64_acc_zext(<2 x i8> %x, i64 %a) { ; CHECK-NEXT: adds r0, r0, r2 ; CHECK-NEXT: adcs r1, r3 ; CHECK-NEXT: bx lr -; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI53_0: -; CHECK-NEXT: .long 255 @ 0xff -; CHECK-NEXT: .long 0 @ 0x0 -; CHECK-NEXT: .long 255 @ 0xff -; CHECK-NEXT: .long 0 @ 0x0 entry: %xx = zext <2 x i8> %x to <2 x i64> %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %xx) diff --git a/llvm/test/CodeGen/Thumb2/mve-vecreduce-mla.ll b/llvm/test/CodeGen/Thumb2/mve-vecreduce-mla.ll index 9b6d668..690e2c3 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vecreduce-mla.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vecreduce-mla.ll @@ -172,8 +172,7 @@ entry: define arm_aapcs_vfpcc i64 @add_v2i16_v2i64_zext(<2 x i16> %x, <2 x i16> %y) { ; CHECK-LABEL: add_v2i16_v2i64_zext: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: adr r0, .LCPI12_0 -; CHECK-NEXT: vldrw.u32 q2, [r0] +; CHECK-NEXT: vmov.i64 q2, #0xffff ; CHECK-NEXT: vand q1, q1, q2 ; CHECK-NEXT: vand q0, q0, q2 ; CHECK-NEXT: vmov r0, s4 @@ -183,13 +182,6 @@ define arm_aapcs_vfpcc i64 @add_v2i16_v2i64_zext(<2 x i16> %x, <2 x i16> %y) { ; CHECK-NEXT: umull r0, r1, r1, r0 ; CHECK-NEXT: umlal r0, r1, r3, r2 ; CHECK-NEXT: bx lr -; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI12_0: -; CHECK-NEXT: .long 65535 @ 0xffff -; CHECK-NEXT: .long 0 @ 0x0 -; CHECK-NEXT: .long 65535 @ 0xffff -; CHECK-NEXT: .long 0 @ 0x0 entry: %xx = zext <2 x i16> %x to <2 x i64> %yy = zext <2 x i16> %y to <2 x i64> @@ -507,11 +499,10 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_zext(<16 x i8> %x, <16 x i8> %y) { ; CHECK-NEXT: vmov.u8 r1, q0[0] ; CHECK-NEXT: vmov.32 q3[0], r0 ; CHECK-NEXT: vmov.u8 r0, q1[1] -; CHECK-NEXT: vmov.32 q3[2], r0 -; CHECK-NEXT: adr r0, .LCPI23_0 -; CHECK-NEXT: vldrw.u32 q2, [r0] ; CHECK-NEXT: vmov.32 q4[0], r1 ; CHECK-NEXT: vmov.u8 r1, q0[1] +; CHECK-NEXT: vmov.32 q3[2], r0 +; CHECK-NEXT: vmov.i64 q2, #0xff ; CHECK-NEXT: vmov.32 q4[2], r1 ; CHECK-NEXT: vand q3, q3, q2 ; CHECK-NEXT: vand q4, q4, q2 @@ -703,13 +694,6 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_zext(<16 x i8> %x, <16 x i8> %y) { ; CHECK-NEXT: umlal r0, r1, r3, r2 ; CHECK-NEXT: vpop {d8, d9, d10, d11} ; CHECK-NEXT: pop {r7, pc} -; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI23_0: -; CHECK-NEXT: .long 255 @ 0xff -; CHECK-NEXT: .long 0 @ 0x0 -; CHECK-NEXT: .long 255 @ 0xff -; CHECK-NEXT: .long 0 @ 0x0 entry: %xx = zext <16 x i8> %x to <16 x i64> %yy = zext <16 x i8> %y to <16 x i64> @@ -888,8 +872,7 @@ entry: define arm_aapcs_vfpcc i64 @add_v2i8_v2i64_zext(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: add_v2i8_v2i64_zext: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: adr r0, .LCPI25_0 -; CHECK-NEXT: vldrw.u32 q2, [r0] +; CHECK-NEXT: vmov.i64 q2, #0xff ; CHECK-NEXT: vand q1, q1, q2 ; CHECK-NEXT: vand q0, q0, q2 ; CHECK-NEXT: vmov r0, s6 @@ -901,13 +884,6 @@ define arm_aapcs_vfpcc i64 @add_v2i8_v2i64_zext(<2 x i8> %x, <2 x i8> %y) { ; CHECK-NEXT: add r0, r2 ; CHECK-NEXT: orrs r1, r3 ; CHECK-NEXT: bx lr -; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI25_0: -; CHECK-NEXT: .long 255 @ 0xff -; CHECK-NEXT: .long 0 @ 0x0 -; CHECK-NEXT: .long 255 @ 0xff -; CHECK-NEXT: .long 0 @ 0x0 entry: %xx = zext <2 x i8> %x to <2 x i64> %yy = zext <2 x i8> %y to <2 x i64> @@ -1162,8 +1138,7 @@ define arm_aapcs_vfpcc i64 @add_v2i16_v2i64_acc_zext(<2 x i16> %x, <2 x i16> %y, ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} ; CHECK-NEXT: push {r7, lr} -; CHECK-NEXT: adr r2, .LCPI40_0 -; CHECK-NEXT: vldrw.u32 q2, [r2] +; CHECK-NEXT: vmov.i64 q2, #0xffff ; CHECK-NEXT: vand q1, q1, q2 ; CHECK-NEXT: vand q0, q0, q2 ; CHECK-NEXT: vmov r2, s4 @@ -1175,13 +1150,6 @@ define arm_aapcs_vfpcc i64 @add_v2i16_v2i64_acc_zext(<2 x i16> %x, <2 x i16> %y, ; CHECK-NEXT: adds r0, r0, r2 ; CHECK-NEXT: adc.w r1, r1, lr ; CHECK-NEXT: pop {r7, pc} -; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI40_0: -; CHECK-NEXT: .long 65535 @ 0xffff -; CHECK-NEXT: .long 0 @ 0x0 -; CHECK-NEXT: .long 65535 @ 0xffff -; CHECK-NEXT: .long 0 @ 0x0 entry: %xx = zext <2 x i16> %x to <2 x i64> %yy = zext <2 x i16> %y to <2 x i64> @@ -1514,17 +1482,16 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_zext(<16 x i8> %x, <16 x i8> %y ; CHECK-NEXT: vmov.u8 r3, q0[0] ; CHECK-NEXT: vmov.32 q3[0], r2 ; CHECK-NEXT: vmov.u8 r2, q1[1] -; CHECK-NEXT: vmov.32 q3[2], r2 -; CHECK-NEXT: adr r2, .LCPI51_0 -; CHECK-NEXT: vldrw.u32 q2, [r2] ; CHECK-NEXT: vmov.32 q4[0], r3 ; CHECK-NEXT: vmov.u8 r3, q0[1] -; CHECK-NEXT: vmov.u8 r4, q0[2] +; CHECK-NEXT: vmov.32 q3[2], r2 +; CHECK-NEXT: vmov.i64 q2, #0xff ; CHECK-NEXT: vmov.32 q4[2], r3 ; CHECK-NEXT: vand q3, q3, q2 ; CHECK-NEXT: vand q4, q4, q2 ; CHECK-NEXT: vmov r2, s14 ; CHECK-NEXT: vmov r3, s18 +; CHECK-NEXT: vmov.u8 r4, q0[2] ; CHECK-NEXT: umull r12, lr, r3, r2 ; CHECK-NEXT: vmov r3, s16 ; CHECK-NEXT: vmov r2, s12 @@ -1712,13 +1679,6 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_zext(<16 x i8> %x, <16 x i8> %y ; CHECK-NEXT: adcs r1, r3 ; CHECK-NEXT: vpop {d8, d9, d10, d11} ; CHECK-NEXT: pop {r4, r5, r7, pc} -; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI51_0: -; CHECK-NEXT: .long 255 @ 0xff -; CHECK-NEXT: .long 0 @ 0x0 -; CHECK-NEXT: .long 255 @ 0xff -; CHECK-NEXT: .long 0 @ 0x0 entry: %xx = zext <16 x i8> %x to <16 x i64> %yy = zext <16 x i8> %y to <16 x i64> @@ -1905,8 +1865,7 @@ define arm_aapcs_vfpcc i64 @add_v2i8_v2i64_acc_zext(<2 x i8> %x, <2 x i8> %y, i6 ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} ; CHECK-NEXT: push {r7, lr} -; CHECK-NEXT: adr r2, .LCPI53_0 -; CHECK-NEXT: vldrw.u32 q2, [r2] +; CHECK-NEXT: vmov.i64 q2, #0xff ; CHECK-NEXT: vand q1, q1, q2 ; CHECK-NEXT: vand q0, q0, q2 ; CHECK-NEXT: vmov r2, s6 @@ -1920,13 +1879,6 @@ define arm_aapcs_vfpcc i64 @add_v2i8_v2i64_acc_zext(<2 x i8> %x, <2 x i8> %y, i6 ; CHECK-NEXT: adds r0, r0, r2 ; CHECK-NEXT: adcs r1, r3 ; CHECK-NEXT: pop {r7, pc} -; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI53_0: -; CHECK-NEXT: .long 255 @ 0xff -; CHECK-NEXT: .long 0 @ 0x0 -; CHECK-NEXT: .long 255 @ 0xff -; CHECK-NEXT: .long 0 @ 0x0 entry: %xx = zext <2 x i8> %x to <2 x i64> %yy = zext <2 x i8> %y to <2 x i64> diff --git a/llvm/test/CodeGen/Thumb2/mve-vmovimm.ll b/llvm/test/CodeGen/Thumb2/mve-vmovimm.ll index 640aabc..2173112 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vmovimm.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vmovimm.ll @@ -355,30 +355,13 @@ entry: define arm_aapcs_vfpcc <2 x i64> @mov_int64_ff() { ; CHECKLE-LABEL: mov_int64_ff: ; CHECKLE: @ %bb.0: @ %entry -; CHECKLE-NEXT: adr r0, .LCPI19_0 -; CHECKLE-NEXT: vldrw.u32 q0, [r0] +; CHECKLE-NEXT: vmov.i64 q0, #0xff ; CHECKLE-NEXT: bx lr -; CHECKLE-NEXT: .p2align 4 -; CHECKLE-NEXT: @ %bb.1: -; CHECKLE-NEXT: .LCPI19_0: -; CHECKLE-NEXT: .long 255 @ double 1.2598673968951787E-321 -; CHECKLE-NEXT: .long 0 -; CHECKLE-NEXT: .long 255 @ double 1.2598673968951787E-321 -; CHECKLE-NEXT: .long 0 ; ; CHECKBE-LABEL: mov_int64_ff: ; CHECKBE: @ %bb.0: @ %entry -; CHECKBE-NEXT: adr r0, .LCPI19_0 -; CHECKBE-NEXT: vldrb.u8 q1, [r0] -; CHECKBE-NEXT: vrev64.8 q0, q1 +; CHECKBE-NEXT: vmov.i64 q0, #0xff00000000 ; CHECKBE-NEXT: bx lr -; CHECKBE-NEXT: .p2align 4 -; CHECKBE-NEXT: @ %bb.1: -; CHECKBE-NEXT: .LCPI19_0: -; CHECKBE-NEXT: .long 0 @ double 1.2598673968951787E-321 -; CHECKBE-NEXT: .long 255 -; CHECKBE-NEXT: .long 0 @ double 1.2598673968951787E-321 -; CHECKBE-NEXT: .long 255 entry: ret <2 x i64> < i64 255, i64 255 > } @@ -401,30 +384,13 @@ entry: define arm_aapcs_vfpcc <2 x i64> @mov_int64_ff0000ff0000ffff() { ; CHECKLE-LABEL: mov_int64_ff0000ff0000ffff: ; CHECKLE: @ %bb.0: @ %entry -; CHECKLE-NEXT: adr r0, .LCPI21_0 -; CHECKLE-NEXT: vldrw.u32 q0, [r0] +; CHECKLE-NEXT: vmov.i64 q0, #0xff0000ff0000ffff ; CHECKLE-NEXT: bx lr -; CHECKLE-NEXT: .p2align 4 -; CHECKLE-NEXT: @ %bb.1: -; CHECKLE-NEXT: .LCPI21_0: -; CHECKLE-NEXT: .long 65535 @ double -5.4874582226568829E+303 -; CHECKLE-NEXT: .long 4278190335 -; CHECKLE-NEXT: .long 65535 @ double -5.4874582226568829E+303 -; CHECKLE-NEXT: .long 4278190335 ; ; CHECKBE-LABEL: mov_int64_ff0000ff0000ffff: ; CHECKBE: @ %bb.0: @ %entry -; CHECKBE-NEXT: adr r0, .LCPI21_0 -; CHECKBE-NEXT: vldrb.u8 q1, [r0] -; CHECKBE-NEXT: vrev64.8 q0, q1 +; CHECKBE-NEXT: vmov.i64 q0, #0xffffff0000ff ; CHECKBE-NEXT: bx lr -; CHECKBE-NEXT: .p2align 4 -; CHECKBE-NEXT: @ %bb.1: -; CHECKBE-NEXT: .LCPI21_0: -; CHECKBE-NEXT: .long 4278190335 @ double -5.4874582226568829E+303 -; CHECKBE-NEXT: .long 65535 -; CHECKBE-NEXT: .long 4278190335 @ double -5.4874582226568829E+303 -; CHECKBE-NEXT: .long 65535 entry: ret <2 x i64> < i64 18374687574888349695, i64 18374687574888349695 > } @@ -463,30 +429,13 @@ entry: define arm_aapcs_vfpcc <16 x i8> @mov_int64_0f000f0f() { ; CHECKLE-LABEL: mov_int64_0f000f0f: ; CHECKLE: @ %bb.0: @ %entry -; CHECKLE-NEXT: adr r0, .LCPI23_0 -; CHECKLE-NEXT: vldrw.u32 q0, [r0] +; CHECKLE-NEXT: vmov.i64 q0, #0xff000000ff00ff ; CHECKLE-NEXT: bx lr -; CHECKLE-NEXT: .p2align 4 -; CHECKLE-NEXT: @ %bb.1: -; CHECKLE-NEXT: .LCPI23_0: -; CHECKLE-NEXT: .long 16711935 @ double 7.0632744699731897E-304 -; CHECKLE-NEXT: .long 16711680 -; CHECKLE-NEXT: .long 16711935 @ double 7.0632744699731897E-304 -; CHECKLE-NEXT: .long 16711680 ; ; CHECKBE-LABEL: mov_int64_0f000f0f: ; CHECKBE: @ %bb.0: @ %entry -; CHECKBE-NEXT: adr r0, .LCPI23_0 -; CHECKBE-NEXT: vldrb.u8 q1, [r0] -; CHECKBE-NEXT: vrev64.8 q0, q1 +; CHECKBE-NEXT: vmov.i64 q0, #0xff00ff00ff00 ; CHECKBE-NEXT: bx lr -; CHECKBE-NEXT: .p2align 4 -; CHECKBE-NEXT: @ %bb.1: -; CHECKBE-NEXT: .LCPI23_0: -; CHECKBE-NEXT: .long 4278255360 @ double -5.8276674374138332E+303 -; CHECKBE-NEXT: .long 65280 -; CHECKBE-NEXT: .long 4278255360 @ double -5.8276674374138332E+303 -; CHECKBE-NEXT: .long 65280 entry: ret <16 x i8> } @@ -494,30 +443,13 @@ entry: define arm_aapcs_vfpcc <8 x i16> @mov_int64_ff00ffff() { ; CHECKLE-LABEL: mov_int64_ff00ffff: ; CHECKLE: @ %bb.0: @ %entry -; CHECKLE-NEXT: adr r0, .LCPI24_0 -; CHECKLE-NEXT: vldrw.u32 q0, [r0] +; CHECKLE-NEXT: vmov.i64 q0, #0xffffffff0000ffff ; CHECKLE-NEXT: bx lr -; CHECKLE-NEXT: .p2align 4 -; CHECKLE-NEXT: @ %bb.1: -; CHECKLE-NEXT: .LCPI24_0: -; CHECKLE-NEXT: .long 65535 @ double NaN -; CHECKLE-NEXT: .long 4294967295 -; CHECKLE-NEXT: .long 65535 @ double NaN -; CHECKLE-NEXT: .long 4294967295 ; ; CHECKBE-LABEL: mov_int64_ff00ffff: ; CHECKBE: @ %bb.0: @ %entry -; CHECKBE-NEXT: adr r0, .LCPI24_0 -; CHECKBE-NEXT: vldrb.u8 q1, [r0] -; CHECKBE-NEXT: vrev64.8 q0, q1 +; CHECKBE-NEXT: vmov.i64 q0, #0xffffffffffff0000 ; CHECKBE-NEXT: bx lr -; CHECKBE-NEXT: .p2align 4 -; CHECKBE-NEXT: @ %bb.1: -; CHECKBE-NEXT: .LCPI24_0: -; CHECKBE-NEXT: .long 4294901760 @ double NaN -; CHECKBE-NEXT: .long 4294967295 -; CHECKBE-NEXT: .long 4294901760 @ double NaN -; CHECKBE-NEXT: .long 4294967295 entry: ret <8 x i16> } @@ -665,57 +597,18 @@ entry: define arm_aapcs_vfpcc <16 x i8> @test(<16 x i8> %i) { ; CHECKLE-LABEL: test: ; CHECKLE: @ %bb.0: @ %entry -; CHECKLE-NEXT: adr r0, .LCPI31_0 -; CHECKLE-NEXT: vldrw.u32 q1, [r0] +; CHECKLE-NEXT: vmov.i64 q1, #0xff000000ff00ff ; CHECKLE-NEXT: vorr q0, q0, q1 ; CHECKLE-NEXT: bx lr -; CHECKLE-NEXT: .p2align 4 -; CHECKLE-NEXT: @ %bb.1: -; CHECKLE-NEXT: .LCPI31_0: -; CHECKLE-NEXT: .byte 255 @ 0xff -; CHECKLE-NEXT: .byte 0 @ 0x0 -; CHECKLE-NEXT: .byte 255 @ 0xff -; CHECKLE-NEXT: .byte 0 @ 0x0 -; CHECKLE-NEXT: .byte 0 @ 0x0 -; CHECKLE-NEXT: .byte 0 @ 0x0 -; CHECKLE-NEXT: .byte 255 @ 0xff -; CHECKLE-NEXT: .byte 0 @ 0x0 -; CHECKLE-NEXT: .byte 255 @ 0xff -; CHECKLE-NEXT: .byte 0 @ 0x0 -; CHECKLE-NEXT: .byte 255 @ 0xff -; CHECKLE-NEXT: .byte 0 @ 0x0 -; CHECKLE-NEXT: .byte 0 @ 0x0 -; CHECKLE-NEXT: .byte 0 @ 0x0 -; CHECKLE-NEXT: .byte 255 @ 0xff -; CHECKLE-NEXT: .byte 0 @ 0x0 ; ; CHECKBE-LABEL: test: ; CHECKBE: @ %bb.0: @ %entry -; CHECKBE-NEXT: adr r0, .LCPI31_0 +; CHECKBE-NEXT: vmov.i64 q1, #0xff00ff00ff0000 +; CHECKBE-NEXT: vrev64.8 q2, q1 ; CHECKBE-NEXT: vrev64.8 q1, q0 -; CHECKBE-NEXT: vldrb.u8 q0, [r0] -; CHECKBE-NEXT: vorr q1, q1, q0 +; CHECKBE-NEXT: vorr q1, q1, q2 ; CHECKBE-NEXT: vrev64.8 q0, q1 ; CHECKBE-NEXT: bx lr -; CHECKBE-NEXT: .p2align 4 -; CHECKBE-NEXT: @ %bb.1: -; CHECKBE-NEXT: .LCPI31_0: -; CHECKBE-NEXT: .byte 255 @ 0xff -; CHECKBE-NEXT: .byte 0 @ 0x0 -; CHECKBE-NEXT: .byte 255 @ 0xff -; CHECKBE-NEXT: .byte 0 @ 0x0 -; CHECKBE-NEXT: .byte 0 @ 0x0 -; CHECKBE-NEXT: .byte 0 @ 0x0 -; CHECKBE-NEXT: .byte 255 @ 0xff -; CHECKBE-NEXT: .byte 0 @ 0x0 -; CHECKBE-NEXT: .byte 255 @ 0xff -; CHECKBE-NEXT: .byte 0 @ 0x0 -; CHECKBE-NEXT: .byte 255 @ 0xff -; CHECKBE-NEXT: .byte 0 @ 0x0 -; CHECKBE-NEXT: .byte 0 @ 0x0 -; CHECKBE-NEXT: .byte 0 @ 0x0 -; CHECKBE-NEXT: .byte 255 @ 0xff -; CHECKBE-NEXT: .byte 0 @ 0x0 entry: %o = or <16 x i8> %i, ret <16 x i8> %o @@ -724,41 +617,18 @@ entry: define arm_aapcs_vfpcc <8 x i16> @test2(<8 x i16> %i) { ; CHECKLE-LABEL: test2: ; CHECKLE: @ %bb.0: @ %entry -; CHECKLE-NEXT: adr r0, .LCPI32_0 -; CHECKLE-NEXT: vldrw.u32 q1, [r0] +; CHECKLE-NEXT: vmov.i64 q1, #0xffffffff0000ffff ; CHECKLE-NEXT: vorr q0, q0, q1 ; CHECKLE-NEXT: bx lr -; CHECKLE-NEXT: .p2align 4 -; CHECKLE-NEXT: @ %bb.1: -; CHECKLE-NEXT: .LCPI32_0: -; CHECKLE-NEXT: .short 65535 @ 0xffff -; CHECKLE-NEXT: .short 0 @ 0x0 -; CHECKLE-NEXT: .short 65535 @ 0xffff -; CHECKLE-NEXT: .short 65535 @ 0xffff -; CHECKLE-NEXT: .short 65535 @ 0xffff -; CHECKLE-NEXT: .short 0 @ 0x0 -; CHECKLE-NEXT: .short 65535 @ 0xffff -; CHECKLE-NEXT: .short 65535 @ 0xffff ; ; CHECKBE-LABEL: test2: ; CHECKBE: @ %bb.0: @ %entry -; CHECKBE-NEXT: adr r0, .LCPI32_0 +; CHECKBE-NEXT: vmov.i64 q1, #0xffffffffffff +; CHECKBE-NEXT: vrev64.16 q2, q1 ; CHECKBE-NEXT: vrev64.16 q1, q0 -; CHECKBE-NEXT: vldrh.u16 q0, [r0] -; CHECKBE-NEXT: vorr q1, q1, q0 +; CHECKBE-NEXT: vorr q1, q1, q2 ; CHECKBE-NEXT: vrev64.16 q0, q1 ; CHECKBE-NEXT: bx lr -; CHECKBE-NEXT: .p2align 4 -; CHECKBE-NEXT: @ %bb.1: -; CHECKBE-NEXT: .LCPI32_0: -; CHECKBE-NEXT: .short 65535 @ 0xffff -; CHECKBE-NEXT: .short 0 @ 0x0 -; CHECKBE-NEXT: .short 65535 @ 0xffff -; CHECKBE-NEXT: .short 65535 @ 0xffff -; CHECKBE-NEXT: .short 65535 @ 0xffff -; CHECKBE-NEXT: .short 0 @ 0x0 -; CHECKBE-NEXT: .short 65535 @ 0xffff -; CHECKBE-NEXT: .short 65535 @ 0xffff entry: %o = or <8 x i16> %i, ret <8 x i16> %o -- 2.7.4