From: David Green Date: Tue, 31 Aug 2021 17:19:03 +0000 (+0100) Subject: [ARM] Add missing validForTailPredication for VMINNM/VMAXNM X-Git-Tag: upstream/15.0.7~32628 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=22c384129e95f286953c1019ce29704b475c5bec;p=platform%2Fupstream%2Fllvm.git [ARM] Add missing validForTailPredication for VMINNM/VMAXNM Apparently this was missing, preventing the generation of tail predication loops containing VMINNM, VMAXNM, VMINNMA and VMAXNMA. --- diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td index 0777532..325025d 100644 --- a/llvm/lib/Target/ARM/ARMInstrMVE.td +++ b/llvm/lib/Target/ARM/ARMInstrMVE.td @@ -1440,6 +1440,7 @@ class MVE_VMINMAXNM { @@ -4111,6 +4112,7 @@ class MVE_VMAXMINNMAThis Inner Loop Header: Depth=1 -; CHECK-NEXT: vctp.32 r3 -; CHECK-NEXT: vpstt -; CHECK-NEXT: vldrwt.u32 q0, [r0], #16 -; CHECK-NEXT: vldrwt.u32 q1, [r1], #16 -; CHECK-NEXT: subs r3, #4 +; CHECK-NEXT: vldrw.u32 q0, [r0], #16 +; CHECK-NEXT: vldrw.u32 q1, [r1], #16 ; CHECK-NEXT: vabs.f32 q0, q0 ; CHECK-NEXT: vminnm.f32 q0, q0, q1 -; CHECK-NEXT: vpst -; CHECK-NEXT: vstrwt.32 q0, [r2], #16 -; CHECK-NEXT: le lr, .LBB0_2 +; CHECK-NEXT: vstrw.32 q0, [r2], #16 +; CHECK-NEXT: letp lr, .LBB0_2 ; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-NEXT: pop {r7, pc} entry: @@ -70,23 +61,14 @@ define float @maxaf32(float* noalias nocapture readonly %s1, float* noalias noca ; CHECK-NEXT: it lt ; CHECK-NEXT: poplt {r7, pc} ; CHECK-NEXT: .LBB1_1: @ %vector.ph -; CHECK-NEXT: add.w r12, r3, #3 -; CHECK-NEXT: mov.w lr, #1 -; CHECK-NEXT: bic r12, r12, #3 -; CHECK-NEXT: sub.w r12, r12, #4 -; CHECK-NEXT: add.w r12, lr, r12, lsr #2 -; CHECK-NEXT: dls lr, r12 +; CHECK-NEXT: dlstp.32 lr, r3 ; CHECK-NEXT: .LBB1_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vctp.32 r3 -; CHECK-NEXT: subs r3, #4 -; CHECK-NEXT: vpstt -; CHECK-NEXT: vldrwt.u32 q0, [r1], #16 -; CHECK-NEXT: vldrwt.u32 q1, [r0], #16 +; CHECK-NEXT: vldrw.u32 q0, [r1], #16 +; CHECK-NEXT: vldrw.u32 q1, [r0], #16 ; CHECK-NEXT: vmaxnma.f32 q1, q0 -; CHECK-NEXT: vpst -; CHECK-NEXT: vstrwt.32 q1, [r2], #16 -; CHECK-NEXT: le lr, .LBB1_2 +; CHECK-NEXT: vstrw.32 q1, [r2], #16 +; CHECK-NEXT: letp lr, .LBB1_2 ; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-NEXT: pop {r7, pc} entry: @@ -131,25 +113,15 @@ define half @maxf16(half* noalias nocapture readonly %s1, half* noalias nocaptur ; CHECK-NEXT: it lt ; CHECK-NEXT: poplt {r7, pc} ; CHECK-NEXT: .LBB2_1: @ %vector.ph -; CHECK-NEXT: add.w r12, r3, #7 -; CHECK-NEXT: mov.w lr, #1 -; CHECK-NEXT: bic r12, r12, #7 -; CHECK-NEXT: sub.w r12, r12, #8 -; CHECK-NEXT: add.w r12, lr, r12, lsr #3 -; CHECK-NEXT: dls lr, r12 +; CHECK-NEXT: dlstp.16 lr, r3 ; CHECK-NEXT: .LBB2_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vctp.16 r3 -; CHECK-NEXT: vpst -; CHECK-NEXT: vldrht.u16 q0, [r0], #16 -; CHECK-NEXT: subs r3, #8 +; CHECK-NEXT: vldrh.u16 q0, [r0], #16 ; CHECK-NEXT: vabs.f16 q0, q0 -; CHECK-NEXT: vpst -; CHECK-NEXT: vldrht.u16 q1, [r1], #16 +; CHECK-NEXT: vldrh.u16 q1, [r1], #16 ; CHECK-NEXT: vmaxnm.f16 q0, q0, q1 -; CHECK-NEXT: vpst -; CHECK-NEXT: vstrht.16 q0, [r2], #16 -; CHECK-NEXT: le lr, .LBB2_2 +; CHECK-NEXT: vstrh.16 q0, [r2], #16 +; CHECK-NEXT: letp lr, .LBB2_2 ; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-NEXT: pop {r7, pc} entry: @@ -192,23 +164,14 @@ define half @minaf16(half* noalias nocapture readonly %s1, half* noalias nocaptu ; CHECK-NEXT: it lt ; CHECK-NEXT: poplt {r7, pc} ; CHECK-NEXT: .LBB3_1: @ %vector.ph -; CHECK-NEXT: add.w r12, r3, #7 -; CHECK-NEXT: mov.w lr, #1 -; CHECK-NEXT: bic r12, r12, #7 -; CHECK-NEXT: sub.w r12, r12, #8 -; CHECK-NEXT: add.w r12, lr, r12, lsr #3 -; CHECK-NEXT: dls lr, r12 +; CHECK-NEXT: dlstp.16 lr, r3 ; CHECK-NEXT: .LBB3_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vctp.16 r3 -; CHECK-NEXT: subs r3, #8 -; CHECK-NEXT: vpstt -; CHECK-NEXT: vldrht.u16 q0, [r1], #16 -; CHECK-NEXT: vldrht.u16 q1, [r0], #16 +; CHECK-NEXT: vldrh.u16 q0, [r1], #16 +; CHECK-NEXT: vldrh.u16 q1, [r0], #16 ; CHECK-NEXT: vminnma.f16 q1, q0 -; CHECK-NEXT: vpst -; CHECK-NEXT: vstrht.16 q1, [r2], #16 -; CHECK-NEXT: le lr, .LBB3_2 +; CHECK-NEXT: vstrh.16 q1, [r2], #16 +; CHECK-NEXT: letp lr, .LBB3_2 ; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-NEXT: pop {r7, pc} entry: diff --git a/llvm/unittests/Target/ARM/MachineInstrTest.cpp b/llvm/unittests/Target/ARM/MachineInstrTest.cpp index b027794..b9cfd9c 100644 --- a/llvm/unittests/Target/ARM/MachineInstrTest.cpp +++ b/llvm/unittests/Target/ARM/MachineInstrTest.cpp @@ -530,6 +530,10 @@ TEST(MachineInstrValidTailPredication, IsCorrect) { case MVE_VMAXu16: case MVE_VMAXu32: case MVE_VMAXu8: + case MVE_VMAXNMf16: + case MVE_VMAXNMf32: + case MVE_VMAXNMAf16: + case MVE_VMAXNMAf32: case MVE_VMINAs16: case MVE_VMINAs32: case MVE_VMINAs8: @@ -539,6 +543,10 @@ TEST(MachineInstrValidTailPredication, IsCorrect) { case MVE_VMINu16: case MVE_VMINu32: case MVE_VMINu8: + case MVE_VMINNMf16: + case MVE_VMINNMf32: + case MVE_VMINNMAf16: + case MVE_VMINNMAf32: case MVE_VMLADAVas16: case MVE_VMLADAVas32: case MVE_VMLADAVas8: