From d2a9ec29d0f8d4e144a428ef05024a5ba83f15d2 Mon Sep 17 00:00:00 2001 From: David Green Date: Thu, 4 Jul 2019 08:41:23 +0000 Subject: [PATCH] [ARM] MVE bitwise instruction patterns This adds patterns for the simpler VAND, VORR and VEOR bitwise vector instructions. It also adjusts the top16Zero PatLeaf to not match on vector instructions, which can otherwise cause problems. Code written by David Sherwood. Differential Revision: https://reviews.llvm.org/D63867 llvm-svn: 365113 --- llvm/lib/Target/ARM/ARMInstrMVE.td | 23 ++++++++ llvm/lib/Target/ARM/ARMInstrThumb2.td | 3 +- llvm/test/CodeGen/Thumb2/mve-bitarith.ll | 95 ++++++++++++++++++++++++++++++ llvm/test/CodeGen/Thumb2/mve-div-expand.ll | 2 +- llvm/test/CodeGen/Thumb2/mve-fmath.ll | 2 +- 5 files changed, 122 insertions(+), 3 deletions(-) create mode 100644 llvm/test/CodeGen/Thumb2/mve-bitarith.ll diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td index 9073b49..e997bae 100644 --- a/llvm/lib/Target/ARM/ARMInstrMVE.td +++ b/llvm/lib/Target/ARM/ARMInstrMVE.td @@ -1567,6 +1567,29 @@ foreach s=["s8", "s16", "s32", "u8", "u16", "u32", "i8", "i16", "i32", "f16", "f (MVE_VAND MQPR:$QdSrc, MQPR:$QnSrc, MQPR:$QmSrc, vpred_r:$vp)>; } +let Predicates = [HasMVEInt] in { + def : Pat<(v16i8 (and (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))), + (v16i8 (MVE_VAND (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>; + def : Pat<(v8i16 (and (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))), + (v8i16 (MVE_VAND (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>; + def : Pat<(v4i32 (and (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))), + (v4i32 (MVE_VAND (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>; + + def : Pat<(v16i8 (or (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))), + (v16i8 (MVE_VORR (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>; + def : Pat<(v8i16 (or (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))), + (v8i16 (MVE_VORR (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>; + def : Pat<(v4i32 (or (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))), + (v4i32 (MVE_VORR (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>; + + def : Pat<(v16i8 (xor (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))), + (v16i8 (MVE_VEOR (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>; + def : Pat<(v8i16 (xor (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))), + (v8i16 (MVE_VEOR (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>; + def : Pat<(v4i32 (xor (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))), + (v4i32 (MVE_VEOR (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>; +} + class MVE_bit_cmode cmode, dag inOps> : MVE_p<(outs MQPR:$Qd), inOps, NoItinerary, iname, suffix, "$Qd, $imm", vpred_n, "$Qd = $Qd_src"> { diff --git a/llvm/lib/Target/ARM/ARMInstrThumb2.td b/llvm/lib/Target/ARM/ARMInstrThumb2.td index 2d22c3e..7cbfaba 100644 --- a/llvm/lib/Target/ARM/ARMInstrThumb2.td +++ b/llvm/lib/Target/ARM/ARMInstrThumb2.td @@ -2756,7 +2756,8 @@ def : T2Pat<(and rGPR:$src, t2_so_imm_not:$imm), // top16Zero - answer true if the upper 16 bits of $src are 0, false otherwise def top16Zero: PatLeaf<(i32 rGPR:$src), [{ - return CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(32, 16)); + return !SDValue(N,0)->getValueType(0).isVector() && + CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(32, 16)); }]>; // so_imm_notSext is needed instead of so_imm_not, as the value of imm diff --git a/llvm/test/CodeGen/Thumb2/mve-bitarith.ll b/llvm/test/CodeGen/Thumb2/mve-bitarith.ll new file mode 100644 index 0000000..a39c8cb --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/mve-bitarith.ll @@ -0,0 +1,95 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s + +define arm_aapcs_vfpcc <16 x i8> @and_int8_t(<16 x i8> %src1, <16 x i8> %src2) { +; CHECK-LABEL: and_int8_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vand q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = and <16 x i8> %src1, %src2 + ret <16 x i8> %0 +} + +define arm_aapcs_vfpcc <8 x i16> @and_int16_t(<8 x i16> %src1, <8 x i16> %src2) { +; CHECK-LABEL: and_int16_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vand q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = and <8 x i16> %src1, %src2 + ret <8 x i16> %0 +} + +define arm_aapcs_vfpcc <4 x i32> @and_int32_t(<4 x i32> %src1, <4 x i32> %src2) { +; CHECK-LABEL: and_int32_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vand q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = and <4 x i32> %src1, %src2 + ret <4 x i32> %0 +} + + +define arm_aapcs_vfpcc <16 x i8> @or_int8_t(<16 x i8> %src1, <16 x i8> %src2) { +; CHECK-LABEL: or_int8_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vorr q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = or <16 x i8> %src1, %src2 + ret <16 x i8> %0 +} + +define arm_aapcs_vfpcc <8 x i16> @or_int16_t(<8 x i16> %src1, <8 x i16> %src2) { +; CHECK-LABEL: or_int16_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vorr q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = or <8 x i16> %src1, %src2 + ret <8 x i16> %0 +} + +define arm_aapcs_vfpcc <4 x i32> @or_int32_t(<4 x i32> %src1, <4 x i32> %src2) { +; CHECK-LABEL: or_int32_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vorr q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = or <4 x i32> %src1, %src2 + ret <4 x i32> %0 +} + + +define arm_aapcs_vfpcc <16 x i8> @xor_int8_t(<16 x i8> %src1, <16 x i8> %src2) { +; CHECK-LABEL: xor_int8_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: veor q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = xor <16 x i8> %src1, %src2 + ret <16 x i8> %0 +} + +define arm_aapcs_vfpcc <8 x i16> @xor_int16_t(<8 x i16> %src1, <8 x i16> %src2) { +; CHECK-LABEL: xor_int16_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: veor q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = xor <8 x i16> %src1, %src2 + ret <8 x i16> %0 +} + +define arm_aapcs_vfpcc <4 x i32> @xor_int32_t(<4 x i32> %src1, <4 x i32> %src2) { +; CHECK-LABEL: xor_int32_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: veor q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = xor <4 x i32> %src1, %src2 + ret <4 x i32> %0 +} + diff --git a/llvm/test/CodeGen/Thumb2/mve-div-expand.ll b/llvm/test/CodeGen/Thumb2/mve-div-expand.ll index 89361f0..02f2225 100644 --- a/llvm/test/CodeGen/Thumb2/mve-div-expand.ll +++ b/llvm/test/CodeGen/Thumb2/mve-div-expand.ll @@ -870,12 +870,12 @@ define arm_aapcs_vfpcc <8 x half> @frem_f16(<8 x half> %in1, <8 x half> %in2) { ; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13} ; CHECK-NEXT: .pad #64 ; CHECK-NEXT: sub sp, #64 -; CHECK-NEXT: vmov q5, q1 ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: vmov.u16 r0, q1[0] ; CHECK-NEXT: vmov s0, r0 ; CHECK-NEXT: vmov.u16 r0, q4[0] ; CHECK-NEXT: vmov s2, r0 +; CHECK-NEXT: vmov q5, q1 ; CHECK-NEXT: vcvtb.f32.f16 s2, s2 ; CHECK-NEXT: vstr s2, [sp, #56] ; CHECK-NEXT: vcvtb.f32.f16 s0, s0 diff --git a/llvm/test/CodeGen/Thumb2/mve-fmath.ll b/llvm/test/CodeGen/Thumb2/mve-fmath.ll index 5889043..5ce4e73 100644 --- a/llvm/test/CodeGen/Thumb2/mve-fmath.ll +++ b/llvm/test/CodeGen/Thumb2/mve-fmath.ll @@ -1042,12 +1042,12 @@ define arm_aapcs_vfpcc <8 x half> @pow_float16_t(<8 x half> %src1, <8 x half> %s ; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13} ; CHECK-NEXT: .pad #64 ; CHECK-NEXT: sub sp, #64 -; CHECK-NEXT: vmov q5, q1 ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: vmov.u16 r0, q1[0] ; CHECK-NEXT: vmov s0, r0 ; CHECK-NEXT: vmov.u16 r0, q4[0] ; CHECK-NEXT: vmov s2, r0 +; CHECK-NEXT: vmov q5, q1 ; CHECK-NEXT: vcvtb.f32.f16 s2, s2 ; CHECK-NEXT: vstr s2, [sp, #56] ; CHECK-NEXT: vcvtb.f32.f16 s0, s0 -- 2.7.4