From b99ef32d041c992d0cb192bdee3e16b9a56de3a9 Mon Sep 17 00:00:00 2001 From: Simon Tatham Date: Mon, 6 Jan 2020 16:28:18 +0000 Subject: [PATCH] [ARM,MVE] Generate the right instruction for vmaxnmq_m_f16. Summary: Due to a copy-paste error in the isel patterns, the predicated version of this intrinsic was expanding to the `VMAXNMT.F32` instruction instead of `VMAXNMT.F16`. Similarly for vminnm. Reviewers: dmgreen, miyuki, MarkMurrayARM Reviewed By: dmgreen Subscribers: kristof.beyls, hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D72269 --- llvm/lib/Target/ARM/ARMInstrMVE.td | 4 ++-- llvm/test/CodeGen/Thumb2/mve-intrinsics/vmaxnmq.ll | 4 ++-- llvm/test/CodeGen/Thumb2/mve-intrinsics/vminnmq.ll | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td index 8945806..3189caf 100644 --- a/llvm/lib/Target/ARM/ARMInstrMVE.td +++ b/llvm/lib/Target/ARM/ARMInstrMVE.td @@ -1097,7 +1097,7 @@ let Predicates = [HasMVEFloat] in { (v4f32 MQPR:$inactive)))>; def : Pat<(v8f16 (int_arm_mve_max_predicated (v8f16 MQPR:$val1), (v8f16 MQPR:$val2), (v8i1 VCCR:$mask), (v8f16 MQPR:$inactive))), - (v8f16 (MVE_VMAXNMf32 (v8f16 MQPR:$val1), (v8f16 MQPR:$val2), + (v8f16 (MVE_VMAXNMf16 (v8f16 MQPR:$val1), (v8f16 MQPR:$val2), ARMVCCThen, (v8i1 VCCR:$mask), (v8f16 MQPR:$inactive)))>; } @@ -1117,7 +1117,7 @@ let Predicates = [HasMVEFloat] in { (v4f32 MQPR:$inactive)))>; def : Pat<(v8f16 (int_arm_mve_min_predicated (v8f16 MQPR:$val1), (v8f16 MQPR:$val2), (v8i1 VCCR:$mask), (v8f16 MQPR:$inactive))), - (v8f16 (MVE_VMINNMf32 (v8f16 MQPR:$val1), (v8f16 MQPR:$val2), + (v8f16 (MVE_VMINNMf16 (v8f16 MQPR:$val1), (v8f16 MQPR:$val2), ARMVCCThen, (v8i1 VCCR:$mask), (v8f16 MQPR:$inactive)))>; } diff --git a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vmaxnmq.ll b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vmaxnmq.ll index 54a1400..c1e45b8 100644 --- a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vmaxnmq.ll +++ b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vmaxnmq.ll @@ -30,7 +30,7 @@ define arm_aapcs_vfpcc <8 x half> @test_vmaxnmq_m_f16(<8 x half> %inactive, <8 x ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmsr p0, r0 ; CHECK-NEXT: vpst -; CHECK-NEXT: vmaxnmt.f32 q0, q1, q2 +; CHECK-NEXT: vmaxnmt.f16 q0, q1, q2 ; CHECK-NEXT: bx lr entry: %0 = zext i16 %p to i32 @@ -66,7 +66,7 @@ define arm_aapcs_vfpcc <8 x half> @test_vmaxnmq_x_f16(<8 x half> %a, <8 x half> ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmsr p0, r0 ; CHECK-NEXT: vpst -; CHECK-NEXT: vmaxnmt.f32 q0, q0, q1 +; CHECK-NEXT: vmaxnmt.f16 q0, q0, q1 ; CHECK-NEXT: bx lr entry: %0 = zext i16 %p to i32 diff --git a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vminnmq.ll b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vminnmq.ll index ae44909..9bbc2a9 100644 --- a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vminnmq.ll +++ b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vminnmq.ll @@ -30,7 +30,7 @@ define arm_aapcs_vfpcc <8 x half> @test_vminnmq_m_f16(<8 x half> %inactive, <8 x ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmsr p0, r0 ; CHECK-NEXT: vpst -; CHECK-NEXT: vminnmt.f32 q0, q1, q2 +; CHECK-NEXT: vminnmt.f16 q0, q1, q2 ; CHECK-NEXT: bx lr entry: %0 = zext i16 %p to i32 @@ -66,7 +66,7 @@ define arm_aapcs_vfpcc <8 x half> @test_vminnmq_x_f16(<8 x half> %a, <8 x half> ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmsr p0, r0 ; CHECK-NEXT: vpst -; CHECK-NEXT: vminnmt.f32 q0, q0, q1 +; CHECK-NEXT: vminnmt.f16 q0, q0, q1 ; CHECK-NEXT: bx lr entry: %0 = zext i16 %p to i32 -- 2.7.4