From: David Green Date: Sat, 27 Nov 2021 13:21:09 +0000 (+0000) Subject: [ARM] Extra testing for v2i1 types. NFC X-Git-Tag: upstream/15.0.7~24671 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=1b2d58ba90cd72a8dfe1535b5fbecbefacc30016;p=platform%2Fupstream%2Fllvm.git [ARM] Extra testing for v2i1 types. NFC This adds extra tests for various operations from making the v2i1 type legal. --- diff --git a/llvm/test/CodeGen/Thumb2/active_lane_mask.ll b/llvm/test/CodeGen/Thumb2/active_lane_mask.ll index c39d9226bb9e..6127fdd40ea1 100644 --- a/llvm/test/CodeGen/Thumb2/active_lane_mask.ll +++ b/llvm/test/CodeGen/Thumb2/active_lane_mask.ll @@ -1,10 +1,82 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve %s -o - | FileCheck %s +define <2 x i64> @v2i64(i32 %index, i32 %TC, <2 x i64> %V1, <2 x i64> %V2) { +; CHECK-LABEL: v2i64: +; CHECK: @ %bb.0: +; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr} +; CHECK-NEXT: sub sp, #4 +; CHECK-NEXT: vmov q0[2], q0[0], r0, r0 +; CHECK-NEXT: vmov.i64 q1, #0xffffffff +; CHECK-NEXT: vand q0, q0, q1 +; CHECK-NEXT: vmov q2[2], q2[0], r1, r1 +; CHECK-NEXT: vmov r0, r12, d1 +; CHECK-NEXT: movs r7, #0 +; CHECK-NEXT: adds.w r8, r0, #1 +; CHECK-NEXT: adc lr, r12, #0 +; CHECK-NEXT: vmov r12, s0 +; CHECK-NEXT: vmov q0[2], q0[0], r12, r8 +; CHECK-NEXT: vand q0, q0, q1 +; CHECK-NEXT: vand q1, q2, q1 +; CHECK-NEXT: vmov r4, r5, d1 +; CHECK-NEXT: vmov.i32 q2, #0x1 +; CHECK-NEXT: vmov r1, r6, d3 +; CHECK-NEXT: subs r1, r4, r1 +; CHECK-NEXT: sbcs.w r1, r5, r6 +; CHECK-NEXT: vmov r5, r6, d0 +; CHECK-NEXT: mov.w r1, #0 +; CHECK-NEXT: it lo +; CHECK-NEXT: movlo r1, #1 +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: vmov r0, r1, d2 +; CHECK-NEXT: csetm r9, ne +; CHECK-NEXT: subs r0, r5, r0 +; CHECK-NEXT: sbcs.w r0, r6, r1 +; CHECK-NEXT: it lo +; CHECK-NEXT: movlo r7, #1 +; CHECK-NEXT: cmp r7, #0 +; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: vmov q0[2], q0[0], r0, r9 +; CHECK-NEXT: eor.w r0, r4, r8 +; CHECK-NEXT: orrs.w r0, r0, lr +; CHECK-NEXT: cset r0, ne +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: teq.w r5, r12 +; CHECK-NEXT: cset r1, ne +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: csetm r1, ne +; CHECK-NEXT: vmov q1[2], q1[0], r1, r0 +; CHECK-NEXT: veor q1, q1, q2 +; CHECK-NEXT: vldr d5, [sp, #32] +; CHECK-NEXT: vand q0, q1, q0 +; CHECK-NEXT: vmov d4, r2, r3 +; CHECK-NEXT: vmov r0, s2 +; CHECK-NEXT: vmov r1, s0 +; CHECK-NEXT: and r0, r0, #1 +; CHECK-NEXT: and r1, r1, #1 +; CHECK-NEXT: rsbs r0, r0, #0 +; CHECK-NEXT: rsbs r1, r1, #0 +; CHECK-NEXT: vmov q0[2], q0[0], r1, r0 +; CHECK-NEXT: vmov q0[3], q0[1], r1, r0 +; CHECK-NEXT: add r0, sp, #40 +; CHECK-NEXT: vldrw.u32 q1, [r0] +; CHECK-NEXT: vbic q1, q1, q0 +; CHECK-NEXT: vand q0, q2, q0 +; CHECK-NEXT: vorr q0, q0, q1 +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: add sp, #4 +; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc} + %active.lane.mask = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i32(i32 %index, i32 %TC) + %select = select <2 x i1> %active.lane.mask, <2 x i64> %V1, <2 x i64> %V2 + ret <2 x i64> %select +} + define <4 x i32> @v4i32(i32 %index, i32 %TC, <4 x i32> %V1, <4 x i32> %V2) { ; CHECK-LABEL: v4i32: ; CHECK: @ %bb.0: -; CHECK-NEXT: adr.w r12, .LCPI0_0 +; CHECK-NEXT: adr.w r12, .LCPI1_0 ; CHECK-NEXT: vdup.32 q1, r0 ; CHECK-NEXT: vldrw.u32 q0, [r12] ; CHECK-NEXT: vadd.i32 q0, q0, r0 @@ -23,7 +95,7 @@ define <4 x i32> @v4i32(i32 %index, i32 %TC, <4 x i32> %V1, <4 x i32> %V2) { ; CHECK-NEXT: bx lr ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI0_0: +; CHECK-NEXT: .LCPI1_0: ; CHECK-NEXT: .long 0 @ 0x0 ; CHECK-NEXT: .long 1 @ 0x1 ; CHECK-NEXT: .long 2 @ 0x2 @@ -36,7 +108,7 @@ define <4 x i32> @v4i32(i32 %index, i32 %TC, <4 x i32> %V1, <4 x i32> %V2) { define <7 x i32> @v7i32(i32 %index, i32 %TC, <7 x i32> %V1, <7 x i32> %V2) { ; CHECK-LABEL: v7i32: ; CHECK: @ %bb.0: -; CHECK-NEXT: adr r3, .LCPI1_0 +; CHECK-NEXT: adr r3, .LCPI2_0 ; CHECK-NEXT: vdup.32 q1, r1 ; CHECK-NEXT: vldrw.u32 q0, [r3] ; CHECK-NEXT: ldr r3, [sp, #32] @@ -57,7 +129,7 @@ define <7 x i32> @v7i32(i32 %index, i32 %TC, <7 x i32> %V1, <7 x i32> %V2) { ; CHECK-NEXT: ldr r2, [sp, #12] ; CHECK-NEXT: ldr r3, [sp, #4] ; CHECK-NEXT: vmov q3[3], q3[1], r3, r2 -; CHECK-NEXT: adr r2, .LCPI1_1 +; CHECK-NEXT: adr r2, .LCPI2_1 ; CHECK-NEXT: vpsel q2, q3, q2 ; CHECK-NEXT: vstrw.32 q2, [r0] ; CHECK-NEXT: vldrw.u32 q2, [r2] @@ -89,12 +161,12 @@ define <7 x i32> @v7i32(i32 %index, i32 %TC, <7 x i32> %V1, <7 x i32> %V2) { ; CHECK-NEXT: bx lr ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI1_0: +; CHECK-NEXT: .LCPI2_0: ; CHECK-NEXT: .long 0 @ 0x0 ; CHECK-NEXT: .long 1 @ 0x1 ; CHECK-NEXT: .long 2 @ 0x2 ; CHECK-NEXT: .long 3 @ 0x3 -; CHECK-NEXT: .LCPI1_1: +; CHECK-NEXT: .LCPI2_1: ; CHECK-NEXT: .long 4 @ 0x4 ; CHECK-NEXT: .long 5 @ 0x5 ; CHECK-NEXT: .long 6 @ 0x6 @@ -108,7 +180,7 @@ define <8 x i16> @v8i16(i32 %index, i32 %TC, <8 x i16> %V1, <8 x i16> %V2) { ; CHECK-LABEL: v8i16: ; CHECK: @ %bb.0: ; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13} -; CHECK-NEXT: adr.w r12, .LCPI2_0 +; CHECK-NEXT: adr.w r12, .LCPI3_0 ; CHECK-NEXT: vdup.32 q5, r1 ; CHECK-NEXT: vldrw.u32 q0, [r12] ; CHECK-NEXT: vmov.i8 q1, #0x0 @@ -121,7 +193,7 @@ define <8 x i16> @v8i16(i32 %index, i32 %TC, <8 x i16> %V1, <8 x i16> %V2) { ; CHECK-NEXT: vmov.16 q0[1], r12 ; CHECK-NEXT: vmov r1, r12, d9 ; CHECK-NEXT: vmov.16 q0[2], r1 -; CHECK-NEXT: adr r1, .LCPI2_1 +; CHECK-NEXT: adr r1, .LCPI3_1 ; CHECK-NEXT: vldrw.u32 q4, [r1] ; CHECK-NEXT: vmov.16 q0[3], r12 ; CHECK-NEXT: vadd.i32 q4, q4, r0 @@ -165,12 +237,12 @@ define <8 x i16> @v8i16(i32 %index, i32 %TC, <8 x i16> %V1, <8 x i16> %V2) { ; CHECK-NEXT: bx lr ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI2_0: +; CHECK-NEXT: .LCPI3_0: ; CHECK-NEXT: .long 0 @ 0x0 ; CHECK-NEXT: .long 1 @ 0x1 ; CHECK-NEXT: .long 2 @ 0x2 ; CHECK-NEXT: .long 3 @ 0x3 -; CHECK-NEXT: .LCPI2_1: +; CHECK-NEXT: .LCPI3_1: ; CHECK-NEXT: .long 4 @ 0x4 ; CHECK-NEXT: .long 5 @ 0x5 ; CHECK-NEXT: .long 6 @ 0x6 @@ -185,7 +257,7 @@ define <16 x i8> @v16i8(i32 %index, i32 %TC, <16 x i8> %V1, <16 x i8> %V2) { ; CHECK: @ %bb.0: ; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} ; CHECK-NEXT: sub sp, #16 -; CHECK-NEXT: adr.w r12, .LCPI3_0 +; CHECK-NEXT: adr.w r12, .LCPI4_0 ; CHECK-NEXT: vdup.32 q7, r1 ; CHECK-NEXT: vldrw.u32 q0, [r12] ; CHECK-NEXT: vmov.i8 q5, #0x0 @@ -198,7 +270,7 @@ define <16 x i8> @v16i8(i32 %index, i32 %TC, <16 x i8> %V1, <16 x i8> %V2) { ; CHECK-NEXT: vmov.16 q2[1], r12 ; CHECK-NEXT: vmov r1, r12, d1 ; CHECK-NEXT: vmov.16 q2[2], r1 -; CHECK-NEXT: adr r1, .LCPI3_1 +; CHECK-NEXT: adr r1, .LCPI4_1 ; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: vmov.16 q2[3], r12 ; CHECK-NEXT: vadd.i32 q3, q0, r0 @@ -228,7 +300,7 @@ define <16 x i8> @v16i8(i32 %index, i32 %TC, <16 x i8> %V1, <16 x i8> %V2) { ; CHECK-NEXT: vmov.8 q2[6], r1 ; CHECK-NEXT: vmov.u16 r1, q0[7] ; CHECK-NEXT: vmov.8 q2[7], r1 -; CHECK-NEXT: adr r1, .LCPI3_2 +; CHECK-NEXT: adr r1, .LCPI4_2 ; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: vadd.i32 q0, q0, r0 ; CHECK-NEXT: vcmp.u32 hi, q7, q0 @@ -239,7 +311,7 @@ define <16 x i8> @v16i8(i32 %index, i32 %TC, <16 x i8> %V1, <16 x i8> %V2) { ; CHECK-NEXT: vmov.16 q0[1], r12 ; CHECK-NEXT: vmov r1, r12, d13 ; CHECK-NEXT: vmov.16 q0[2], r1 -; CHECK-NEXT: adr r1, .LCPI3_3 +; CHECK-NEXT: adr r1, .LCPI4_3 ; CHECK-NEXT: vldrw.u32 q6, [r1] ; CHECK-NEXT: vmov.16 q0[3], r12 ; CHECK-NEXT: vadd.i32 q6, q6, r0 @@ -355,22 +427,22 @@ define <16 x i8> @v16i8(i32 %index, i32 %TC, <16 x i8> %V1, <16 x i8> %V2) { ; CHECK-NEXT: bx lr ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI3_0: +; CHECK-NEXT: .LCPI4_0: ; CHECK-NEXT: .long 0 @ 0x0 ; CHECK-NEXT: .long 1 @ 0x1 ; CHECK-NEXT: .long 2 @ 0x2 ; CHECK-NEXT: .long 3 @ 0x3 -; CHECK-NEXT: .LCPI3_1: +; CHECK-NEXT: .LCPI4_1: ; CHECK-NEXT: .long 4 @ 0x4 ; CHECK-NEXT: .long 5 @ 0x5 ; CHECK-NEXT: .long 6 @ 0x6 ; CHECK-NEXT: .long 7 @ 0x7 -; CHECK-NEXT: .LCPI3_2: +; CHECK-NEXT: .LCPI4_2: ; CHECK-NEXT: .long 8 @ 0x8 ; CHECK-NEXT: .long 9 @ 0x9 ; CHECK-NEXT: .long 10 @ 0xa ; CHECK-NEXT: .long 11 @ 0xb -; CHECK-NEXT: .LCPI3_3: +; CHECK-NEXT: .LCPI4_3: ; CHECK-NEXT: .long 12 @ 0xc ; CHECK-NEXT: .long 13 @ 0xd ; CHECK-NEXT: .long 14 @ 0xe @@ -388,12 +460,12 @@ define void @test_width2(i32* nocapture readnone %x, i32* nocapture %y, i8 zeroe ; CHECK-NEXT: vpush {d8, d9} ; CHECK-NEXT: sub sp, #8 ; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: beq.w .LBB4_3 +; CHECK-NEXT: beq.w .LBB5_3 ; CHECK-NEXT: @ %bb.1: @ %for.body.preheader ; CHECK-NEXT: adds r0, r2, #1 ; CHECK-NEXT: vmov q1[2], q1[0], r2, r2 ; CHECK-NEXT: bic r0, r0, #1 -; CHECK-NEXT: adr r2, .LCPI4_0 +; CHECK-NEXT: adr r2, .LCPI5_0 ; CHECK-NEXT: subs r0, #2 ; CHECK-NEXT: movs r3, #1 ; CHECK-NEXT: vmov.i64 q0, #0xffffffff @@ -401,7 +473,7 @@ define void @test_width2(i32* nocapture readnone %x, i32* nocapture %y, i8 zeroe ; CHECK-NEXT: add.w lr, r3, r0, lsr #1 ; CHECK-NEXT: mov.w r12, #0 ; CHECK-NEXT: vand q1, q1, q0 -; CHECK-NEXT: .LBB4_2: @ %vector.body +; CHECK-NEXT: .LBB5_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vmov q3[2], q3[0], r12, r12 ; CHECK-NEXT: vmov r6, r7, d3 @@ -474,15 +546,15 @@ define void @test_width2(i32* nocapture readnone %x, i32* nocapture %y, i8 zeroe ; CHECK-NEXT: vmovmi r0, s14 ; CHECK-NEXT: strmi r0, [r1, #4] ; CHECK-NEXT: adds r1, #8 -; CHECK-NEXT: le lr, .LBB4_2 -; CHECK-NEXT: .LBB4_3: @ %for.cond.cleanup +; CHECK-NEXT: le lr, .LBB5_2 +; CHECK-NEXT: .LBB5_3: @ %for.cond.cleanup ; CHECK-NEXT: add sp, #8 ; CHECK-NEXT: vpop {d8, d9} ; CHECK-NEXT: add sp, #4 ; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc} ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: @ %bb.4: -; CHECK-NEXT: .LCPI4_0: +; CHECK-NEXT: .LCPI5_0: ; CHECK-NEXT: .long 1 @ 0x1 ; CHECK-NEXT: .long 0 @ 0x0 ; CHECK-NEXT: .long 1 @ 0x1 diff --git a/llvm/test/CodeGen/Thumb2/mve-pred-ext.ll b/llvm/test/CodeGen/Thumb2/mve-pred-ext.ll index d5dfecba2630..eaceb1dd3f80 100644 --- a/llvm/test/CodeGen/Thumb2/mve-pred-ext.ll +++ b/llvm/test/CodeGen/Thumb2/mve-pred-ext.ll @@ -418,8 +418,6 @@ entry: ret <4 x float> %s } - - define arm_aapcs_vfpcc <8 x half> @uitofp_v8i1_v8f16(<8 x i16> %src) { ; CHECK-LABEL: uitofp_v8i1_v8f16: ; CHECK: @ %bb.0: @ %entry @@ -475,3 +473,168 @@ entry: %s = select <8 x i1> %0, <8 x half> , <8 x half> zeroinitializer ret <8 x half> %s } + + +define arm_aapcs_vfpcc <2 x double> @uitofp_v2i1_v2f64(<2 x i64> %src) { +; CHECK-LABEL: uitofp_v2i1_v2f64: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: .vsave {d8, d9} +; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: vmov r0, r1, d1 +; CHECK-NEXT: movs r3, #0 +; CHECK-NEXT: vmov lr, r12, d0 +; CHECK-NEXT: adr r2, .LCPI26_0 +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: rsbs r0, r0, #0 +; CHECK-NEXT: sbcs.w r0, r3, r1 +; CHECK-NEXT: mov.w r0, #0 +; CHECK-NEXT: it lt +; CHECK-NEXT: movlt r0, #1 +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: rsbs.w r1, lr, #0 +; CHECK-NEXT: sbcs.w r1, r3, r12 +; CHECK-NEXT: it lt +; CHECK-NEXT: movlt r3, #1 +; CHECK-NEXT: cmp r3, #0 +; CHECK-NEXT: csetm r1, ne +; CHECK-NEXT: vmov q1[2], q1[0], r1, r0 +; CHECK-NEXT: vand q4, q1, q0 +; CHECK-NEXT: vmov r0, r1, d9 +; CHECK-NEXT: bl __aeabi_ul2d +; CHECK-NEXT: vmov r2, r3, d8 +; CHECK-NEXT: vmov d9, r0, r1 +; CHECK-NEXT: mov r0, r2 +; CHECK-NEXT: mov r1, r3 +; CHECK-NEXT: bl __aeabi_ul2d +; CHECK-NEXT: vmov d8, r0, r1 +; CHECK-NEXT: vmov q0, q4 +; CHECK-NEXT: vpop {d8, d9} +; CHECK-NEXT: pop {r7, pc} +; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: .LCPI26_0: +; CHECK-NEXT: .long 1 @ 0x1 +; CHECK-NEXT: .long 0 @ 0x0 +; CHECK-NEXT: .long 1 @ 0x1 +; CHECK-NEXT: .long 0 @ 0x0 +entry: + %c = icmp sgt <2 x i64> %src, zeroinitializer + %0 = uitofp <2 x i1> %c to <2 x double> + ret <2 x double> %0 +} + +define arm_aapcs_vfpcc <2 x double> @sitofp_v2i1_v2f64(<2 x i64> %src) { +; CHECK-LABEL: sitofp_v2i1_v2f64: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r4, lr} +; CHECK-NEXT: push {r4, lr} +; CHECK-NEXT: .vsave {d8, d9} +; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: movs r3, #0 +; CHECK-NEXT: vmov r2, r12, d1 +; CHECK-NEXT: movs r4, #0 +; CHECK-NEXT: rsbs r0, r0, #0 +; CHECK-NEXT: sbcs.w r0, r3, r1 +; CHECK-NEXT: it lt +; CHECK-NEXT: movlt r4, #1 +; CHECK-NEXT: rsbs r0, r2, #0 +; CHECK-NEXT: sbcs.w r0, r3, r12 +; CHECK-NEXT: it lt +; CHECK-NEXT: movlt r3, #1 +; CHECK-NEXT: cmp r3, #0 +; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: mov r1, r0 +; CHECK-NEXT: bl __aeabi_l2d +; CHECK-NEXT: cmp r4, #0 +; CHECK-NEXT: vmov d9, r0, r1 +; CHECK-NEXT: csetm r2, ne +; CHECK-NEXT: mov r0, r2 +; CHECK-NEXT: mov r1, r2 +; CHECK-NEXT: bl __aeabi_l2d +; CHECK-NEXT: vmov d8, r0, r1 +; CHECK-NEXT: vmov q0, q4 +; CHECK-NEXT: vpop {d8, d9} +; CHECK-NEXT: pop {r4, pc} +entry: + %c = icmp sgt <2 x i64> %src, zeroinitializer + %0 = sitofp <2 x i1> %c to <2 x double> + ret <2 x double> %0 +} + +define arm_aapcs_vfpcc <2 x double> @fptoui_v2i1_v2f64(<2 x double> %src) { +; CHECK-LABEL: fptoui_v2i1_v2f64: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r4, r5, r7, lr} +; CHECK-NEXT: push {r4, r5, r7, lr} +; CHECK-NEXT: .vsave {d8, d9} +; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: vmov q4, q0 +; CHECK-NEXT: vmov r0, r1, d9 +; CHECK-NEXT: bl __aeabi_d2ulz +; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: mov r5, r1 +; CHECK-NEXT: vmov r0, r1, d8 +; CHECK-NEXT: bl __aeabi_d2ulz +; CHECK-NEXT: vmov q1[2], q1[0], r0, r4 +; CHECK-NEXT: adr r2, .LCPI28_0 +; CHECK-NEXT: vmov q1[3], q1[1], r1, r5 +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vmov r0, s6 +; CHECK-NEXT: vmov r1, s4 +; CHECK-NEXT: rsbs r0, r0, #0 +; CHECK-NEXT: rsbs r1, r1, #0 +; CHECK-NEXT: vmov q1[2], q1[0], r1, r0 +; CHECK-NEXT: vmov q1[3], q1[1], r1, r0 +; CHECK-NEXT: vand q0, q0, q1 +; CHECK-NEXT: vpop {d8, d9} +; CHECK-NEXT: pop {r4, r5, r7, pc} +; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: .LCPI28_0: +; CHECK-NEXT: .long 0 @ double 1 +; CHECK-NEXT: .long 1072693248 +; CHECK-NEXT: .long 0 @ double 1 +; CHECK-NEXT: .long 1072693248 +entry: + %0 = fptoui <2 x double> %src to <2 x i1> + %s = select <2 x i1> %0, <2 x double> , <2 x double> zeroinitializer + ret <2 x double> %s +} + +define arm_aapcs_vfpcc <2 x double> @fptosi_v2i1_v2f64(<2 x double> %src) { +; CHECK-LABEL: fptosi_v2i1_v2f64: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r4, r5, r7, lr} +; CHECK-NEXT: push {r4, r5, r7, lr} +; CHECK-NEXT: .vsave {d8, d9} +; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: vmov q4, q0 +; CHECK-NEXT: vmov r0, r1, d9 +; CHECK-NEXT: bl __aeabi_d2lz +; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: mov r5, r1 +; CHECK-NEXT: vmov r0, r1, d8 +; CHECK-NEXT: bl __aeabi_d2lz +; CHECK-NEXT: adr r2, .LCPI29_0 +; CHECK-NEXT: vmov q1[2], q1[0], r0, r4 +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vmov q1[3], q1[1], r1, r5 +; CHECK-NEXT: vand q0, q0, q1 +; CHECK-NEXT: vpop {d8, d9} +; CHECK-NEXT: pop {r4, r5, r7, pc} +; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: .LCPI29_0: +; CHECK-NEXT: .long 0 @ double 1 +; CHECK-NEXT: .long 1072693248 +; CHECK-NEXT: .long 0 @ double 1 +; CHECK-NEXT: .long 1072693248 +entry: + %0 = fptosi <2 x double> %src to <2 x i1> + %s = select <2 x i1> %0, <2 x double> , <2 x double> zeroinitializer + ret <2 x double> %s +} diff --git a/llvm/test/CodeGen/Thumb2/mve-pred-shuffle.ll b/llvm/test/CodeGen/Thumb2/mve-pred-shuffle.ll index 477db0718410..dcff285e50d6 100644 --- a/llvm/test/CodeGen/Thumb2/mve-pred-shuffle.ll +++ b/llvm/test/CodeGen/Thumb2/mve-pred-shuffle.ll @@ -1,6 +1,36 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s +define <2 x i64> @shuffle1_v2i64(<2 x i64> %src, <2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: shuffle1_v2i64: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: orrs r0, r1 +; CHECK-NEXT: cset r0, eq +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: orrs.w r1, r2, r3 +; CHECK-NEXT: cset r1, eq +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: csetm r1, ne +; CHECK-NEXT: vmov q0[2], q0[0], r1, r0 +; CHECK-NEXT: vmov q0[3], q0[1], r1, r0 +; CHECK-NEXT: add r0, sp, #16 +; CHECK-NEXT: vldrw.u32 q1, [r0] +; CHECK-NEXT: mov r0, sp +; CHECK-NEXT: vldrw.u32 q2, [r0] +; CHECK-NEXT: vbic q1, q1, q0 +; CHECK-NEXT: vand q0, q2, q0 +; CHECK-NEXT: vorr q0, q0, q1 +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: bx lr +entry: + %c = icmp eq <2 x i64> %src, zeroinitializer + %sh = shufflevector <2 x i1> %c, <2 x i1> undef, <2 x i32> + %s = select <2 x i1> %sh, <2 x i64> %a, <2 x i64> %b + ret <2 x i64> %s +} + define <4 x i32> @shuffle1_v4i32(<4 x i32> %src, <4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: shuffle1_v4i32: ; CHECK: @ %bb.0: @ %entry @@ -76,6 +106,36 @@ entry: ret <16 x i8> %s } +define <2 x i64> @shuffle2_v2i64(<2 x i64> %src, <2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: shuffle2_v2i64: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: orrs r2, r3 +; CHECK-NEXT: cset r2, eq +; CHECK-NEXT: cmp r2, #0 +; CHECK-NEXT: csetm r2, ne +; CHECK-NEXT: orrs r0, r1 +; CHECK-NEXT: cset r0, eq +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: vmov q0[2], q0[0], r0, r2 +; CHECK-NEXT: vmov q0[3], q0[1], r0, r2 +; CHECK-NEXT: add r0, sp, #16 +; CHECK-NEXT: vldrw.u32 q1, [r0] +; CHECK-NEXT: mov r0, sp +; CHECK-NEXT: vldrw.u32 q2, [r0] +; CHECK-NEXT: vbic q1, q1, q0 +; CHECK-NEXT: vand q0, q2, q0 +; CHECK-NEXT: vorr q0, q0, q1 +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: bx lr +entry: + %c = icmp eq <2 x i64> %src, zeroinitializer + %sh = shufflevector <2 x i1> %c, <2 x i1> undef, <2 x i32> + %s = select <2 x i1> %sh, <2 x i64> %a, <2 x i64> %b + ret <2 x i64> %s +} + define <4 x i32> @shuffle2_v4i32(<4 x i32> %src, <4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: shuffle2_v4i32: ; CHECK: @ %bb.0: @ %entry @@ -139,6 +199,31 @@ entry: ret <16 x i8> %s } +define <2 x i64> @shuffle3_v2i64(<2 x i64> %src, <2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: shuffle3_v2i64: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: orrs r0, r1 +; CHECK-NEXT: cset r0, eq +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: vdup.32 q0, r0 +; CHECK-NEXT: add r0, sp, #16 +; CHECK-NEXT: vldrw.u32 q1, [r0] +; CHECK-NEXT: mov r0, sp +; CHECK-NEXT: vldrw.u32 q2, [r0] +; CHECK-NEXT: vbic q1, q1, q0 +; CHECK-NEXT: vand q0, q2, q0 +; CHECK-NEXT: vorr q0, q0, q1 +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: bx lr +entry: + %c = icmp eq <2 x i64> %src, zeroinitializer + %sh = shufflevector <2 x i1> %c, <2 x i1> undef, <2 x i32> + %s = select <2 x i1> %sh, <2 x i64> %a, <2 x i64> %b + ret <2 x i64> %s +} + define <4 x i32> @shuffle3_v4i32(<4 x i32> %src, <4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: shuffle3_v4i32: ; CHECK: @ %bb.0: @ %entry @@ -220,6 +305,31 @@ entry: ret <16 x i8> %s } +define <2 x i64> @shuffle4_v2i64(<2 x i64> %src, <2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: shuffle4_v2i64: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: orrs.w r0, r2, r3 +; CHECK-NEXT: cset r0, eq +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: vdup.32 q0, r0 +; CHECK-NEXT: add r0, sp, #16 +; CHECK-NEXT: vldrw.u32 q1, [r0] +; CHECK-NEXT: mov r0, sp +; CHECK-NEXT: vldrw.u32 q2, [r0] +; CHECK-NEXT: vbic q1, q1, q0 +; CHECK-NEXT: vand q0, q2, q0 +; CHECK-NEXT: vorr q0, q0, q1 +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: bx lr +entry: + %c = icmp eq <2 x i64> %src, zeroinitializer + %sh = shufflevector <2 x i1> %c, <2 x i1> undef, <2 x i32> + %s = select <2 x i1> %sh, <2 x i64> %a, <2 x i64> %b + ret <2 x i64> %s +} + define <4 x i32> @shuffle4_v4i32(<4 x i32> %src, <4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: shuffle4_v4i32: ; CHECK: @ %bb.0: @ %entry @@ -306,6 +416,66 @@ entry: ret <16 x i8> %s } +define <2 x i64> @shuffle5_b_v2i64(<4 x i32> %src, <2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: shuffle5_b_v2i64: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov d1, r2, r3 +; CHECK-NEXT: vmov d0, r0, r1 +; CHECK-NEXT: vcmp.i32 eq, q0, zr +; CHECK-NEXT: vmrs r0, p0 +; CHECK-NEXT: ubfx r1, r0, #4, #1 +; CHECK-NEXT: and r0, r0, #1 +; CHECK-NEXT: rsbs r1, r1, #0 +; CHECK-NEXT: rsbs r0, r0, #0 +; CHECK-NEXT: vmov q0[2], q0[0], r0, r1 +; CHECK-NEXT: vmov q0[3], q0[1], r0, r1 +; CHECK-NEXT: add r0, sp, #16 +; CHECK-NEXT: vldrw.u32 q1, [r0] +; CHECK-NEXT: mov r0, sp +; CHECK-NEXT: vldrw.u32 q2, [r0] +; CHECK-NEXT: vbic q1, q1, q0 +; CHECK-NEXT: vand q0, q2, q0 +; CHECK-NEXT: vorr q0, q0, q1 +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: bx lr +entry: + %c = icmp eq <4 x i32> %src, zeroinitializer + %sh = shufflevector <4 x i1> %c, <4 x i1> undef, <2 x i32> + %s = select <2 x i1> %sh, <2 x i64> %a, <2 x i64> %b + ret <2 x i64> %s +} + +define <2 x i64> @shuffle5_t_v2i64(<4 x i32> %src, <2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: shuffle5_t_v2i64: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov d1, r2, r3 +; CHECK-NEXT: vmov d0, r0, r1 +; CHECK-NEXT: vcmp.i32 eq, q0, zr +; CHECK-NEXT: vmrs r0, p0 +; CHECK-NEXT: ubfx r1, r0, #12, #1 +; CHECK-NEXT: ubfx r0, r0, #8, #1 +; CHECK-NEXT: rsbs r1, r1, #0 +; CHECK-NEXT: rsbs r0, r0, #0 +; CHECK-NEXT: vmov q0[2], q0[0], r0, r1 +; CHECK-NEXT: vmov q0[3], q0[1], r0, r1 +; CHECK-NEXT: add r0, sp, #16 +; CHECK-NEXT: vldrw.u32 q1, [r0] +; CHECK-NEXT: mov r0, sp +; CHECK-NEXT: vldrw.u32 q2, [r0] +; CHECK-NEXT: vbic q1, q1, q0 +; CHECK-NEXT: vand q0, q2, q0 +; CHECK-NEXT: vorr q0, q0, q1 +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: bx lr +entry: + %c = icmp eq <4 x i32> %src, zeroinitializer + %sh = shufflevector <4 x i1> %c, <4 x i1> undef, <2 x i32> + %s = select <2 x i1> %sh, <2 x i64> %a, <2 x i64> %b + ret <2 x i64> %s +} + define <4 x i32> @shuffle5_b_v4i32(<8 x i16> %src, <4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: shuffle5_b_v4i32: ; CHECK: @ %bb.0: @ %entry @@ -450,6 +620,61 @@ entry: ret <8 x i16> %s } +define <4 x i32> @shuffle6_v2i64(<2 x i64> %src1, <2 x i64> %src2, <4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: shuffle6_v2i64: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: orrs r2, r3 +; CHECK-NEXT: cset r2, eq +; CHECK-NEXT: cmp r2, #0 +; CHECK-NEXT: csetm r2, ne +; CHECK-NEXT: orrs r0, r1 +; CHECK-NEXT: cset r0, eq +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: vmov q0[2], q0[0], r0, r2 +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: and r0, r0, #1 +; CHECK-NEXT: rsbs r1, r0, #0 +; CHECK-NEXT: movs r0, #0 +; CHECK-NEXT: bfi r0, r1, #0, #4 +; CHECK-NEXT: and r1, r2, #1 +; CHECK-NEXT: rsbs r1, r1, #0 +; CHECK-NEXT: bfi r0, r1, #4, #4 +; CHECK-NEXT: mov r1, sp +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: vmov r1, r2, d0 +; CHECK-NEXT: orrs r1, r2 +; CHECK-NEXT: cset r1, eq +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: csetm r1, ne +; CHECK-NEXT: and r1, r1, #1 +; CHECK-NEXT: rsbs r1, r1, #0 +; CHECK-NEXT: bfi r0, r1, #8, #4 +; CHECK-NEXT: vmov r1, r2, d1 +; CHECK-NEXT: orrs r1, r2 +; CHECK-NEXT: cset r1, eq +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: csetm r1, ne +; CHECK-NEXT: and r1, r1, #1 +; CHECK-NEXT: rsbs r1, r1, #0 +; CHECK-NEXT: bfi r0, r1, #12, #4 +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: add r0, sp, #32 +; CHECK-NEXT: vldrw.u32 q0, [r0] +; CHECK-NEXT: add r0, sp, #16 +; CHECK-NEXT: vldrw.u32 q1, [r0] +; CHECK-NEXT: vpsel q0, q1, q0 +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: bx lr +entry: + %c1 = icmp eq <2 x i64> %src1, zeroinitializer + %c2 = icmp eq <2 x i64> %src2, zeroinitializer + %sh = shufflevector <2 x i1> %c1, <2 x i1> %c2, <4 x i32> + %s = select <4 x i1> %sh, <4 x i32> %a, <4 x i32> %b + ret <4 x i32> %s +} + define <8 x i16> @shuffle6_v4i32(<4 x i32> %src1, <4 x i32> %src2, <8 x i16> %a, <8 x i16> %b) { ; CHECK-LABEL: shuffle6_v4i32: ; CHECK: @ %bb.0: @ %entry diff --git a/llvm/test/CodeGen/Thumb2/mve-pred-spill.ll b/llvm/test/CodeGen/Thumb2/mve-pred-spill.ll index 460e3c09f7e4..30978ee43348 100644 --- a/llvm/test/CodeGen/Thumb2/mve-pred-spill.ll +++ b/llvm/test/CodeGen/Thumb2/mve-pred-spill.ll @@ -2,10 +2,75 @@ ; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK-LE ; RUN: llc -mtriple=thumbebv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK-BE +declare arm_aapcs_vfpcc <2 x i64> @ext_i64(<2 x i64> %c) declare arm_aapcs_vfpcc <4 x i32> @ext_i32(<4 x i32> %c) declare arm_aapcs_vfpcc <8 x i16> @ext_i16(<8 x i16> %c) declare arm_aapcs_vfpcc <16 x i8> @ext_i8(<16 x i8> %c) +define arm_aapcs_vfpcc <2 x i64> @shuffle1_v2i64(<2 x i64> %src, <2 x i64> %a) { +; CHECK-LE-LABEL: shuffle1_v2i64: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: .save {r7, lr} +; CHECK-LE-NEXT: push {r7, lr} +; CHECK-LE-NEXT: .vsave {d8, d9, d10, d11} +; CHECK-LE-NEXT: vpush {d8, d9, d10, d11} +; CHECK-LE-NEXT: vmov r0, r1, d1 +; CHECK-LE-NEXT: orrs r0, r1 +; CHECK-LE-NEXT: vmov r1, r2, d0 +; CHECK-LE-NEXT: cset r0, eq +; CHECK-LE-NEXT: cmp r0, #0 +; CHECK-LE-NEXT: csetm r0, ne +; CHECK-LE-NEXT: orrs r1, r2 +; CHECK-LE-NEXT: cset r1, eq +; CHECK-LE-NEXT: cmp r1, #0 +; CHECK-LE-NEXT: csetm r1, ne +; CHECK-LE-NEXT: vmov q5[2], q5[0], r1, r0 +; CHECK-LE-NEXT: vmov q5[3], q5[1], r1, r0 +; CHECK-LE-NEXT: vand q4, q1, q5 +; CHECK-LE-NEXT: vmov q0, q4 +; CHECK-LE-NEXT: bl ext_i64 +; CHECK-LE-NEXT: vbic q0, q0, q5 +; CHECK-LE-NEXT: vorr q0, q4, q0 +; CHECK-LE-NEXT: vpop {d8, d9, d10, d11} +; CHECK-LE-NEXT: pop {r7, pc} +; +; CHECK-BE-LABEL: shuffle1_v2i64: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: .save {r7, lr} +; CHECK-BE-NEXT: push {r7, lr} +; CHECK-BE-NEXT: .vsave {d8, d9, d10, d11} +; CHECK-BE-NEXT: vpush {d8, d9, d10, d11} +; CHECK-BE-NEXT: vrev64.32 q2, q0 +; CHECK-BE-NEXT: vmov r0, r1, d5 +; CHECK-BE-NEXT: orrs r0, r1 +; CHECK-BE-NEXT: vmov r1, r2, d4 +; CHECK-BE-NEXT: cset r0, eq +; CHECK-BE-NEXT: cmp r0, #0 +; CHECK-BE-NEXT: csetm r0, ne +; CHECK-BE-NEXT: orrs r1, r2 +; CHECK-BE-NEXT: cset r1, eq +; CHECK-BE-NEXT: cmp r1, #0 +; CHECK-BE-NEXT: csetm r1, ne +; CHECK-BE-NEXT: vmov q0[2], q0[0], r1, r0 +; CHECK-BE-NEXT: vmov q0[3], q0[1], r1, r0 +; CHECK-BE-NEXT: vrev64.32 q2, q0 +; CHECK-BE-NEXT: vmov.i8 q0, #0xff +; CHECK-BE-NEXT: vand q4, q1, q2 +; CHECK-BE-NEXT: veor q5, q2, q0 +; CHECK-BE-NEXT: vmov q0, q4 +; CHECK-BE-NEXT: bl ext_i64 +; CHECK-BE-NEXT: vand q0, q0, q5 +; CHECK-BE-NEXT: vorr q0, q4, q0 +; CHECK-BE-NEXT: vpop {d8, d9, d10, d11} +; CHECK-BE-NEXT: pop {r7, pc} +entry: + %c = icmp eq <2 x i64> %src, zeroinitializer + %s1 = select <2 x i1> %c, <2 x i64> %a, <2 x i64> zeroinitializer + %ext = call arm_aapcs_vfpcc <2 x i64> @ext_i64(<2 x i64> %s1) + %s = select <2 x i1> %c, <2 x i64> %a, <2 x i64> %ext + ret <2 x i64> %s +} + define arm_aapcs_vfpcc <4 x i32> @shuffle1_v4i32(<4 x i32> %src, <4 x i32> %a) { ; CHECK-LE-LABEL: shuffle1_v4i32: ; CHECK-LE: @ %bb.0: @ %entry diff --git a/llvm/test/CodeGen/Thumb2/mve-vcmp.ll b/llvm/test/CodeGen/Thumb2/mve-vcmp.ll index 34fc2bbb86f3..2b23f9a88b1f 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vcmp.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vcmp.ll @@ -395,6 +395,41 @@ entry: ret <2 x i64> %s } +define arm_aapcs_vfpcc <2 x i64> @vcmp_slt_v2i64(<2 x i64> %src, <2 x i64> %srcb, <2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: vcmp_slt_v2i64: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: vmov r0, r12, d3 +; CHECK-NEXT: movs r1, #0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: subs r0, r2, r0 +; CHECK-NEXT: sbcs.w r0, r3, r12 +; CHECK-NEXT: vmov lr, r12, d2 +; CHECK-NEXT: vmov r3, r2, d0 +; CHECK-NEXT: mov.w r0, #0 +; CHECK-NEXT: it lt +; CHECK-NEXT: movlt r0, #1 +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: subs.w r3, r3, lr +; CHECK-NEXT: sbcs.w r2, r2, r12 +; CHECK-NEXT: it lt +; CHECK-NEXT: movlt r1, #1 +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: csetm r1, ne +; CHECK-NEXT: vmov q0[2], q0[0], r1, r0 +; CHECK-NEXT: vmov q0[3], q0[1], r1, r0 +; CHECK-NEXT: vbic q1, q3, q0 +; CHECK-NEXT: vand q0, q2, q0 +; CHECK-NEXT: vorr q0, q0, q1 +; CHECK-NEXT: pop {r7, pc} +entry: + %c = icmp slt <2 x i64> %src, %srcb + %s = select <2 x i1> %c, <2 x i64> %a, <2 x i64> %b + ret <2 x i64> %s +} + define arm_aapcs_vfpcc <2 x i32> @vcmp_eq_v2i32(<2 x i64> %src, <2 x i64> %srcb, <2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: vcmp_eq_v2i32: ; CHECK: @ %bb.0: @ %entry diff --git a/llvm/test/CodeGen/Thumb2/mve-vmovimm.ll b/llvm/test/CodeGen/Thumb2/mve-vmovimm.ll index ce4756b17b45..f8069f696781 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vmovimm.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vmovimm.ll @@ -507,3 +507,91 @@ entry: %s = select <4 x i1> %l699, <4 x i32> %a, <4 x i32> %b ret <4 x i32> %s } + +define arm_aapcs_vfpcc <2 x i64> @v2i1and_vmov(<2 x i64> %a, <2 x i64> %b, i32 %c) { +; CHECKBE-LABEL: v2i1and_vmov: +; CHECKBE: @ %bb.0: @ %entry +; CHECKBE-NEXT: .vsave {d8, d9} +; CHECKBE-NEXT: vpush {d8, d9} +; CHECKBE-NEXT: cmp r0, #0 +; CHECKBE-NEXT: adr r1, .LCPI37_0 +; CHECKBE-NEXT: cset r0, eq +; CHECKBE-NEXT: vldrw.u32 q3, [r1] +; CHECKBE-NEXT: vmov.32 q4[3], r0 +; CHECKBE-NEXT: rsbs r0, r0, #0 +; CHECKBE-NEXT: vand q3, q4, q3 +; CHECKBE-NEXT: vmov.i8 q2, #0xff +; CHECKBE-NEXT: vmov r1, s15 +; CHECKBE-NEXT: vmov q3[2], q3[0], r0, r1 +; CHECKBE-NEXT: vmov q3[3], q3[1], r0, r1 +; CHECKBE-NEXT: vrev64.32 q4, q3 +; CHECKBE-NEXT: veor q2, q4, q2 +; CHECKBE-NEXT: vand q0, q0, q4 +; CHECKBE-NEXT: vand q1, q1, q2 +; CHECKBE-NEXT: vorr q0, q0, q1 +; CHECKBE-NEXT: vpop {d8, d9} +; CHECKBE-NEXT: bx lr +; CHECKBE-NEXT: .p2align 4 +; CHECKBE-NEXT: @ %bb.1: +; CHECKBE-NEXT: .LCPI37_0: +; CHECKBE-NEXT: .zero 4 +; CHECKBE-NEXT: .long 1 @ 0x1 +; CHECKBE-NEXT: .zero 4 +; CHECKBE-NEXT: .long 0 @ 0x0 +entry: + %c1 = icmp eq i32 %c, zeroinitializer + %broadcast.splatinsert1967 = insertelement <2 x i1> undef, i1 %c1, i32 0 + %broadcast.splat1968 = shufflevector <2 x i1> %broadcast.splatinsert1967, <2 x i1> undef, <2 x i32> zeroinitializer + %l699 = and <2 x i1> %broadcast.splat1968, + %s = select <2 x i1> %l699, <2 x i64> %a, <2 x i64> %b + ret <2 x i64> %s +} + +define arm_aapcs_vfpcc <2 x i64> @v2i1or_vmov(<2 x i64> %a, <2 x i64> %b, i32 %c) { +; CHECKLE-LABEL: v2i1or_vmov: +; CHECKLE: @ %bb.0: @ %entry +; CHECKLE-NEXT: cmp r0, #0 +; CHECKLE-NEXT: vldr s8, .LCPI38_0 +; CHECKLE-NEXT: csetm r0, eq +; CHECKLE-NEXT: vmov s10, r0 +; CHECKLE-NEXT: vmov.f32 s9, s8 +; CHECKLE-NEXT: vmov.f32 s11, s10 +; CHECKLE-NEXT: vbic q1, q1, q2 +; CHECKLE-NEXT: vand q0, q0, q2 +; CHECKLE-NEXT: vorr q0, q0, q1 +; CHECKLE-NEXT: bx lr +; CHECKLE-NEXT: .p2align 2 +; CHECKLE-NEXT: @ %bb.1: +; CHECKLE-NEXT: .LCPI38_0: +; CHECKLE-NEXT: .long 0xffffffff @ float NaN +; +; CHECKBE-LABEL: v2i1or_vmov: +; CHECKBE: @ %bb.0: @ %entry +; CHECKBE-NEXT: .vsave {d8, d9} +; CHECKBE-NEXT: vpush {d8, d9} +; CHECKBE-NEXT: cmp r0, #0 +; CHECKBE-NEXT: vldr s8, .LCPI38_0 +; CHECKBE-NEXT: csetm r0, eq +; CHECKBE-NEXT: vmov.i8 q3, #0xff +; CHECKBE-NEXT: vmov s10, r0 +; CHECKBE-NEXT: vmov.f32 s9, s8 +; CHECKBE-NEXT: vmov.f32 s11, s10 +; CHECKBE-NEXT: vrev64.32 q4, q2 +; CHECKBE-NEXT: veor q2, q4, q3 +; CHECKBE-NEXT: vand q0, q0, q4 +; CHECKBE-NEXT: vand q1, q1, q2 +; CHECKBE-NEXT: vorr q0, q0, q1 +; CHECKBE-NEXT: vpop {d8, d9} +; CHECKBE-NEXT: bx lr +; CHECKBE-NEXT: .p2align 2 +; CHECKBE-NEXT: @ %bb.1: +; CHECKBE-NEXT: .LCPI38_0: +; CHECKBE-NEXT: .long 0xffffffff @ float NaN +entry: + %c1 = icmp eq i32 %c, zeroinitializer + %broadcast.splatinsert1967 = insertelement <2 x i1> undef, i1 %c1, i32 0 + %broadcast.splat1968 = shufflevector <2 x i1> %broadcast.splatinsert1967, <2 x i1> undef, <2 x i32> zeroinitializer + %l699 = or <2 x i1> %broadcast.splat1968, + %s = select <2 x i1> %l699, <2 x i64> %a, <2 x i64> %b + ret <2 x i64> %s +} diff --git a/llvm/test/CodeGen/Thumb2/mve-vpsel.ll b/llvm/test/CodeGen/Thumb2/mve-vpsel.ll index fa897c5fe9d9..fad1647618fe 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vpsel.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vpsel.ll @@ -37,6 +37,31 @@ entry: ret <4 x i32> %1 } +define arm_aapcs_vfpcc <2 x i64> @vpsel_i64(<2 x i64> %mask, <2 x i64> %src1, <2 x i64> %src2) { +; CHECK-LABEL: vpsel_i64: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov r0, r1, d1 +; CHECK-NEXT: vmov r2, r3, d0 +; CHECK-NEXT: orrs r0, r1 +; CHECK-NEXT: cset r0, ne +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: orrs.w r1, r2, r3 +; CHECK-NEXT: cset r1, ne +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: csetm r1, ne +; CHECK-NEXT: vmov q0[2], q0[0], r1, r0 +; CHECK-NEXT: vmov q0[3], q0[1], r1, r0 +; CHECK-NEXT: vbic q2, q2, q0 +; CHECK-NEXT: vand q0, q1, q0 +; CHECK-NEXT: vorr q0, q0, q2 +; CHECK-NEXT: bx lr +entry: + %0 = icmp ne <2 x i64> %mask, zeroinitializer + %1 = select <2 x i1> %0, <2 x i64> %src1, <2 x i64> %src2 + ret <2 x i64> %1 +} + define arm_aapcs_vfpcc <8 x half> @vpsel_f16(<8 x i16> %mask, <8 x half> %src1, <8 x half> %src2) { ; CHECK-LABEL: vpsel_f16: ; CHECK: @ %bb.0: @ %entry @@ -61,6 +86,31 @@ entry: ret <4 x float> %1 } +define arm_aapcs_vfpcc <2 x double> @vpsel_f64(<2 x i64> %mask, <2 x double> %src1, <2 x double> %src2) { +; CHECK-LABEL: vpsel_f64: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov r0, r1, d1 +; CHECK-NEXT: vmov r2, r3, d0 +; CHECK-NEXT: orrs r0, r1 +; CHECK-NEXT: cset r0, ne +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: orrs.w r1, r2, r3 +; CHECK-NEXT: cset r1, ne +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: csetm r1, ne +; CHECK-NEXT: vmov q0[2], q0[0], r1, r0 +; CHECK-NEXT: vmov q0[3], q0[1], r1, r0 +; CHECK-NEXT: vbic q2, q2, q0 +; CHECK-NEXT: vand q0, q1, q0 +; CHECK-NEXT: vorr q0, q0, q2 +; CHECK-NEXT: bx lr +entry: + %0 = icmp ne <2 x i64> %mask, zeroinitializer + %1 = select <2 x i1> %0, <2 x double> %src1, <2 x double> %src2 + ret <2 x double> %1 +} + define arm_aapcs_vfpcc <4 x i32> @foo(<4 x i32> %vec.ind) { ; CHECK-LABEL: foo: ; CHECK: @ %bb.0: