From 02cd8a6b915a9dab32fdd91167f875ce5f67ebd4 Mon Sep 17 00:00:00 2001 From: David Green Date: Wed, 22 Sep 2021 12:07:52 +0100 Subject: [PATCH] [ARM] Allow smaller VMOVL in tail predicated loops This allows VMOVL in tail predicated loops so long as the the vector size the VMOVL is extending into is less than or equal to the size of the VCTP in the tail predicated loop. These cases represent a sign-extend-inreg (or zero-extend-inreg), which needn't block tail predication as in https://godbolt.org/z/hdTsEbx8Y. For this a vecsize has been added to the TSFlag bits of MVE instructions, which stores the size of the elements that the MVE instruction operates on. In the case of multiple size (such as a MVE_VMOVLs8bh that extends from i8 to i16, the largest size was be chosen). The sizes are encoded as 00 = i8, 01 = i16, 10 = i32 and 11 = i64, which often (but not always) comes from the instruction encoding directly. A unit test was added, and although only a subset of the vecsizes are currently used, the rest should be useful for other cases. Differential Revision: https://reviews.llvm.org/D109706 --- llvm/lib/Target/ARM/ARMInstrFormats.td | 2 + llvm/lib/Target/ARM/ARMInstrMVE.td | 576 ++++++++-------- llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp | 32 +- llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h | 8 + llvm/test/CodeGen/Thumb2/mve-vmovlloop.ll | 65 +- llvm/unittests/Target/ARM/MachineInstrTest.cpp | 883 +++++++++++++++++++++++++ 6 files changed, 1234 insertions(+), 332 deletions(-) diff --git a/llvm/lib/Target/ARM/ARMInstrFormats.td b/llvm/lib/Target/ARM/ARMInstrFormats.td index b00f974..de35137 100644 --- a/llvm/lib/Target/ARM/ARMInstrFormats.td +++ b/llvm/lib/Target/ARM/ARMInstrFormats.td @@ -408,6 +408,7 @@ class InstTemplate VecSize = 0; bit validForTailPredication = 0; bit retainsPreviousHalfElement = 0; bit horizontalReduction = 0; @@ -428,6 +429,7 @@ class InstTemplate pattern> + string ops, string cstr, bits<2> vecsize, list pattern> : Thumb2XI, Requires<[HasMVEInt]> { let D = MVEDomain; let DecoderNamespace = "MVE"; + let VecSize = vecsize; } // MVE_p is used for most predicated instructions, to add the cluster @@ -406,22 +407,22 @@ class MVE_MI pattern=[]> + bits<2> vecsize, list pattern=[]> : MVE_MI { + ops, !strconcat(cstr, vpred.vpred_constraint), vecsize, pattern> { let Inst{31-29} = 0b111; let Inst{27-26} = 0b11; } class MVE_f pattern=[]> - : MVE_p { + bits<2> vecsize, list pattern=[]> + : MVE_p { let Predicates = [HasMVEFloat]; } @@ -599,11 +600,11 @@ def MVE_URSHRL : MVE_ScalarShiftDRegImm<"urshrl", 0b01, 0b1>; class MVE_rDest pattern=[]> + string ops, string cstr, bits<2> vecsize, list pattern=[]> // Always use vpred_n and not vpred_r: with the output register being // a GPR and not a vector register, there can't be any question of // what to put in its inactive lanes. - : MVE_p { + : MVE_p { let Inst{25-23} = 0b101; let Inst{11-9} = 0b111; @@ -613,7 +614,7 @@ class MVE_rDest size> : MVE_rDest<(outs rGPR:$Rda), (ins rGPR:$Rda_src, MQPR:$Qn, MQPR:$Qm), NoItinerary, "vabav", suffix, "$Rda, $Qn, $Qm", "$Rda = $Rda_src", - []> { + size, []> { bits<4> Qm; bits<4> Qn; bits<4> Rda; @@ -666,7 +667,7 @@ defm MVE_VABAVu32 : MVE_VABAV_m; class MVE_VADDV size, list pattern=[]> : MVE_rDest<(outs tGPREven:$Rda), iops, NoItinerary, - iname, suffix, "$Rda, $Qm", cstr, pattern> { + iname, suffix, "$Rda, $Qm", cstr, size, pattern> { bits<3> Qm; bits<4> Rda; @@ -764,7 +765,7 @@ defm MVE_VADDVu32 : MVE_VADDV_A; class MVE_VADDLV pattern=[]> : MVE_rDest<(outs tGPREven:$RdaLo, tGPROdd:$RdaHi), iops, NoItinerary, iname, - suffix, "$RdaLo, $RdaHi, $Qm", cstr, pattern> { + suffix, "$RdaLo, $RdaHi, $Qm", cstr, 0b10, pattern> { bits<3> Qm; bits<4> RdaLo; bits<4> RdaHi; @@ -836,7 +837,7 @@ class MVE_VMINMAXNMV pattern=[]> : MVE_rDest<(outs rGPR:$RdaDest), (ins rGPR:$RdaSrc, MQPR:$Qm), NoItinerary, iname, suffix, "$RdaSrc, $Qm", - "$RdaDest = $RdaSrc", pattern> { + "$RdaDest = $RdaSrc", !if(sz, 0b01, 0b10), pattern> { bits<3> Qm; bits<4> RdaDest; @@ -897,7 +898,7 @@ defm MVE_VMAXNMAV: MVE_VMINMAXNMV_fty<"vmaxnmav", 0, 0, "int_arm_mve_maxnmav">; class MVE_VMINMAXV size, bit bit_17, bit bit_7, list pattern=[]> : MVE_rDest<(outs rGPR:$RdaDest), (ins rGPR:$RdaSrc, MQPR:$Qm), NoItinerary, - iname, suffix, "$RdaSrc, $Qm", "$RdaDest = $RdaSrc", pattern> { + iname, suffix, "$RdaSrc, $Qm", "$RdaDest = $RdaSrc", size, pattern> { bits<3> Qm; bits<4> RdaDest; @@ -1020,9 +1021,10 @@ defm MVE_VMINAV : MVE_VMINMAXAV_ty<"vminav", 1, "int_arm_mve_minav">; defm MVE_VMAXAV : MVE_VMINMAXAV_ty<"vmaxav", 0, "int_arm_mve_maxav">; class MVE_VMLAMLSDAV + bit sz, bit bit_28, bit A, bit X, bit bit_8, bit bit_0, + bits<2> vecsize> : MVE_rDest<(outs tGPREven:$RdaDest), iops, NoItinerary, iname, suffix, - "$RdaDest, $Qn, $Qm", cstr, []> { + "$RdaDest, $Qn, $Qm", cstr, vecsize, []> { bits<4> RdaDest; bits<3> Qm; bits<3> Qn; @@ -1050,11 +1052,11 @@ multiclass MVE_VMLAMLSDAV_A { def ""#x#VTI.Suffix : MVE_VMLAMLSDAV; + sz, bit_28, 0b0, X, bit_8, bit_0, VTI.Size>; def "a"#x#VTI.Suffix : MVE_VMLAMLSDAV; + sz, bit_28, 0b1, X, bit_8, bit_0, VTI.Size>; let Predicates = [HasMVEInt] in { def : Pat<(i32 (int_arm_mve_vmldava (i32 VTI.Unsigned), @@ -1255,9 +1257,9 @@ foreach acc = ["", "a"] in { // Base class for VMLALDAV and VMLSLDAV, VRMLALDAVH, VRMLSLDAVH class MVE_VMLALDAVBase pattern=[]> + bits<2> vecsize, list pattern=[]> : MVE_rDest<(outs tGPREven:$RdaLoDest, tGPROdd:$RdaHiDest), iops, NoItinerary, - iname, suffix, "$RdaLoDest, $RdaHiDest, $Qn, $Qm", cstr, pattern> { + iname, suffix, "$RdaLoDest, $RdaHiDest, $Qn, $Qm", cstr, vecsize, pattern> { bits<4> RdaLoDest; bits<4> RdaHiDest; bits<3> Qm; @@ -1285,35 +1287,35 @@ class MVE_VMLALDAVBase pattern=[]> { + bit sz, bit bit_28, bit X, bit bit_8, bit bit_0, + bits<2> vecsize, list pattern=[]> { def ""#x#suffix : MVE_VMLALDAVBase< iname # x, suffix, (ins MQPR:$Qn, MQPR:$Qm), "", - sz, bit_28, 0b0, X, bit_8, bit_0, pattern>; + sz, bit_28, 0b0, X, bit_8, bit_0, vecsize, pattern>; def "a"#x#suffix : MVE_VMLALDAVBase< iname # "a" # x, suffix, (ins tGPREven:$RdaLoSrc, tGPROdd:$RdaHiSrc, MQPR:$Qn, MQPR:$Qm), "$RdaLoDest = $RdaLoSrc,$RdaHiDest = $RdaHiSrc", - sz, bit_28, 0b1, X, bit_8, bit_0, pattern>; + sz, bit_28, 0b1, X, bit_8, bit_0, vecsize, pattern>; } multiclass MVE_VMLALDAVBase_AX pattern=[]> { + bit bit_8, bit bit_0, bits<2> vecsize, list pattern=[]> { defm "" : MVE_VMLALDAVBase_A; + bit_28, 0b0, bit_8, bit_0, vecsize, pattern>; defm "" : MVE_VMLALDAVBase_A; + bit_28, 0b1, bit_8, bit_0, vecsize, pattern>; } -multiclass MVE_VRMLALDAVH_multi pattern=[]> { - defm "" : MVE_VMLALDAVBase_AX<"vrmlaldavh", "s"#suffix, - 0b0, 0b0, 0b1, 0b0, pattern>; - defm "" : MVE_VMLALDAVBase_A<"vrmlaldavh", "", "u"#suffix, - 0b0, 0b1, 0b0, 0b1, 0b0, pattern>; +multiclass MVE_VRMLALDAVH_multi pattern=[]> { + defm "" : MVE_VMLALDAVBase_AX<"vrmlaldavh", "s"#VTI.BitsSuffix, + 0b0, 0b0, 0b1, 0b0, VTI.Size, pattern>; + defm "" : MVE_VMLALDAVBase_A<"vrmlaldavh", "", "u"#VTI.BitsSuffix, + 0b0, 0b1, 0b0, 0b1, 0b0, VTI.Size, pattern>; } -defm MVE_VRMLALDAVH : MVE_VRMLALDAVH_multi<"32">; +defm MVE_VRMLALDAVH : MVE_VRMLALDAVH_multi; // vrmlalvh aliases for vrmlaldavh def : MVEInstAlias<"vrmlalvh${vp}.s32\t$RdaLo, $RdaHi, $Qn, $Qm", @@ -1333,14 +1335,15 @@ def : MVEInstAlias<"vrmlalvha${vp}.u32\t$RdaLo, $RdaHi, $Qn, $Qm", tGPREven:$RdaLo, tGPROdd:$RdaHi, MQPR:$Qn, MQPR:$Qm, vpred_n:$vp)>; -multiclass MVE_VMLALDAV_multi pattern=[]> { - defm "" : MVE_VMLALDAVBase_AX<"vmlaldav", "s"#suffix, sz, 0b0, 0b0, 0b0, pattern>; - defm "" : MVE_VMLALDAVBase_A<"vmlaldav", "", "u"#suffix, - sz, 0b1, 0b0, 0b0, 0b0, pattern>; +multiclass MVE_VMLALDAV_multi pattern=[]> { + defm "" : MVE_VMLALDAVBase_AX<"vmlaldav", "s"#VTI.BitsSuffix, + VTI.Size{1}, 0b0, 0b0, 0b0, VTI.Size, pattern>; + defm "" : MVE_VMLALDAVBase_A<"vmlaldav", "", "u"#VTI.BitsSuffix, + VTI.Size{1}, 0b1, 0b0, 0b0, 0b0, VTI.Size, pattern>; } -defm MVE_VMLALDAV : MVE_VMLALDAV_multi<"16", 0b0>; -defm MVE_VMLALDAV : MVE_VMLALDAV_multi<"32", 0b1>; +defm MVE_VMLALDAV : MVE_VMLALDAV_multi; +defm MVE_VMLALDAV : MVE_VMLALDAV_multi; let Predicates = [HasMVEInt] in { def : Pat<(ARMVMLALVs (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)), @@ -1393,22 +1396,22 @@ foreach acc = ["", "a"] in { } multiclass MVE_VMLSLDAV_multi pattern=[]> { - defm "" : MVE_VMLALDAVBase_AX; + bit bit_28, bits<2> vecsize, list pattern=[]> { + defm "" : MVE_VMLALDAVBase_AX; } -defm MVE_VMLSLDAV : MVE_VMLSLDAV_multi<"vmlsldav", "s16", 0b0, 0b0>; -defm MVE_VMLSLDAV : MVE_VMLSLDAV_multi<"vmlsldav", "s32", 0b1, 0b0>; -defm MVE_VRMLSLDAVH : MVE_VMLSLDAV_multi<"vrmlsldavh", "s32", 0b0, 0b1>; +defm MVE_VMLSLDAV : MVE_VMLSLDAV_multi<"vmlsldav", "s16", 0b0, 0b0, 0b01>; +defm MVE_VMLSLDAV : MVE_VMLSLDAV_multi<"vmlsldav", "s32", 0b1, 0b0, 0b10>; +defm MVE_VRMLSLDAVH : MVE_VMLSLDAV_multi<"vrmlsldavh", "s32", 0b0, 0b1, 0b10>; // end of mve_rDest instructions // start of mve_comp instructions class MVE_comp pattern=[]> + string cstr, bits<2> vecsize, list pattern=[]> : MVE_p<(outs MQPR:$Qd), (ins MQPR:$Qn, MQPR:$Qm), itin, iname, suffix, - "$Qd, $Qn, $Qm", vpred_r, cstr, pattern> { + "$Qd, $Qn, $Qm", vpred_r, cstr, vecsize, pattern> { bits<4> Qd; bits<4> Qn; bits<4> Qm; @@ -1425,15 +1428,15 @@ class MVE_comp sz, bit bit_21, list pattern=[]> - : MVE_comp { + : MVE_comp { let Inst{28} = 0b1; let Inst{25-24} = 0b11; let Inst{23} = 0b0; let Inst{21} = bit_21; - let Inst{20} = sz; + let Inst{20} = sz{0}; let Inst{11} = 0b1; let Inst{8} = 0b1; let Inst{6} = 0b1; @@ -1444,7 +1447,7 @@ class MVE_VMINMAXNM { - def "" : MVE_VMINMAXNM; + def "" : MVE_VMINMAXNM; let Predicates = [HasMVEFloat] in { defm : MVE_TwoOpPattern(NAME)>; @@ -1459,7 +1462,7 @@ defm MVE_VMINNMf16 : MVE_VMINMAXNM_m<"vminnm", 0b1, MVE_v8f16, fminnum, int_arm_ class MVE_VMINMAX size, bit bit_4, list pattern=[]> - : MVE_comp { + : MVE_comp { let Inst{28} = U; let Inst{25-24} = 0b11; @@ -1505,8 +1508,8 @@ defm MVE_VMAXu32 : MVE_VMAX; // start of mve_bit instructions class MVE_bit_arith pattern=[]> - : MVE_p { + string ops, string cstr, bits<2> vecsize, list pattern=[]> + : MVE_p { bits<4> Qd; bits<4> Qm; @@ -1517,7 +1520,7 @@ class MVE_bit_arith { + "vbic", "", "$Qd, $Qn, $Qm", "", 0b00> { bits<4> Qn; let Inst{28} = 0b0; @@ -1533,9 +1536,10 @@ def MVE_VBIC : MVE_bit_arith<(outs MQPR:$Qd), (ins MQPR:$Qn, MQPR:$Qm), let validForTailPredication = 1; } -class MVE_VREV size, bits<2> bit_8_7, string cstr=""> +class MVE_VREV size, bits<2> bit_8_7, + bits<2> vecsize, string cstr=""> : MVE_bit_arith<(outs MQPR:$Qd), (ins MQPR:$Qm), iname, - suffix, "$Qd, $Qm", cstr> { + suffix, "$Qd, $Qm", cstr, vecsize> { let Inst{28} = 0b1; let Inst{25-23} = 0b111; @@ -1549,14 +1553,14 @@ class MVE_VREV size, bits<2> bit_8_7, strin let Inst{0} = 0b0; } -def MVE_VREV64_8 : MVE_VREV<"vrev64", "8", 0b00, 0b00, "@earlyclobber $Qd">; -def MVE_VREV64_16 : MVE_VREV<"vrev64", "16", 0b01, 0b00, "@earlyclobber $Qd">; -def MVE_VREV64_32 : MVE_VREV<"vrev64", "32", 0b10, 0b00, "@earlyclobber $Qd">; +def MVE_VREV64_8 : MVE_VREV<"vrev64", "8", 0b00, 0b00, 0b11, "@earlyclobber $Qd">; +def MVE_VREV64_16 : MVE_VREV<"vrev64", "16", 0b01, 0b00, 0b11, "@earlyclobber $Qd">; +def MVE_VREV64_32 : MVE_VREV<"vrev64", "32", 0b10, 0b00, 0b11, "@earlyclobber $Qd">; -def MVE_VREV32_8 : MVE_VREV<"vrev32", "8", 0b00, 0b01>; -def MVE_VREV32_16 : MVE_VREV<"vrev32", "16", 0b01, 0b01>; +def MVE_VREV32_8 : MVE_VREV<"vrev32", "8", 0b00, 0b01, 0b10>; +def MVE_VREV32_16 : MVE_VREV<"vrev32", "16", 0b01, 0b01, 0b10>; -def MVE_VREV16_8 : MVE_VREV<"vrev16", "8", 0b00, 0b10>; +def MVE_VREV16_8 : MVE_VREV<"vrev16", "8", 0b00, 0b10, 0b01>; let Predicates = [HasMVEInt] in { def : Pat<(v8i16 (bswap (v8i16 MQPR:$src))), @@ -1591,7 +1595,7 @@ let Predicates = [HasMVEInt] in { } def MVE_VMVN : MVE_bit_arith<(outs MQPR:$Qd), (ins MQPR:$Qm), - "vmvn", "", "$Qd, $Qm", ""> { + "vmvn", "", "$Qd, $Qm", "", 0b00> { let Inst{28} = 0b1; let Inst{25-23} = 0b111; let Inst{21-16} = 0b110000; @@ -1614,7 +1618,7 @@ let Predicates = [HasMVEInt] in { class MVE_bit_ops bit_21_20, bit bit_28> : MVE_bit_arith<(outs MQPR:$Qd), (ins MQPR:$Qn, MQPR:$Qm), - iname, "", "$Qd, $Qn, $Qm", ""> { + iname, "", "$Qd, $Qn, $Qm", "", 0b00> { bits<4> Qn; let Inst{28} = bit_28; @@ -1685,9 +1689,9 @@ let Predicates = [HasMVEInt] in { int_arm_mve_orn_predicated, (? ), MVE_VORN>; } -class MVE_bit_cmode +class MVE_bit_cmode vecsize> : MVE_p<(outs MQPR:$Qd), inOps, NoItinerary, - iname, suffix, "$Qd, $imm", vpred_n, "$Qd = $Qd_src"> { + iname, suffix, "$Qd, $imm", vpred_n, "$Qd = $Qd_src", vecsize> { bits<12> imm; bits<4> Qd; @@ -1710,7 +1714,7 @@ class MVE_bit_cmode multiclass MVE_bit_cmode_p { def "" : MVE_bit_cmode { + (ins MQPR:$Qd_src, imm_type:$imm), VTI.Size> { let Inst{5} = opcode; let validForTailPredication = 1; } @@ -1802,6 +1806,7 @@ class MVE_VMOV_lane_32 let Inst{16} = Idx{1}; let Inst{21} = Idx{0}; + let VecSize = 0b10; let Predicates = [HasFPRegsV8_1M]; } @@ -1813,6 +1818,8 @@ class MVE_VMOV_lane_16 let Inst{16} = Idx{2}; let Inst{21} = Idx{1}; let Inst{6} = Idx{0}; + + let VecSize = 0b01; } class MVE_VMOV_lane_8 @@ -1823,6 +1830,8 @@ class MVE_VMOV_lane_8 let Inst{21} = Idx{2}; let Inst{6} = Idx{1}; let Inst{5} = Idx{0}; + + let VecSize = 0b00; } def MVE_VMOV_from_lane_32 : MVE_VMOV_lane_32< MVE_VMOV_from_lane>; @@ -1933,7 +1942,7 @@ let Predicates = [HasMVEInt] in { class MVE_int size, list pattern=[]> : MVE_p<(outs MQPR:$Qd), (ins MQPR:$Qn, MQPR:$Qm), NoItinerary, - iname, suffix, "$Qd, $Qn, $Qm", vpred_r, "", pattern> { + iname, suffix, "$Qd, $Qn, $Qm", vpred_r, "", size, pattern> { bits<4> Qd; bits<4> Qn; bits<4> Qm; @@ -2351,9 +2360,9 @@ defm MVE_VHSUBu8 : MVE_VHSUB; defm MVE_VHSUBu16 : MVE_VHSUB; defm MVE_VHSUBu32 : MVE_VHSUB; -class MVE_VDUP pattern=[]> +class MVE_VDUP vecsize, list pattern=[]> : MVE_p<(outs MQPR:$Qd), (ins rGPR:$Rt), NoItinerary, - "vdup", suffix, "$Qd, $Rt", vpred_r, "", pattern> { + "vdup", suffix, "$Qd, $Rt", vpred_r, "", vecsize, pattern> { bits<4> Qd; bits<4> Rt; @@ -2372,9 +2381,9 @@ class MVE_VDUP pattern=[]> let validForTailPredication = 1; } -def MVE_VDUP32 : MVE_VDUP<"32", 0b0, 0b0>; -def MVE_VDUP16 : MVE_VDUP<"16", 0b0, 0b1>; -def MVE_VDUP8 : MVE_VDUP<"8", 0b1, 0b0>; +def MVE_VDUP32 : MVE_VDUP<"32", 0b0, 0b0, 0b10>; +def MVE_VDUP16 : MVE_VDUP<"16", 0b0, 0b1, 0b01>; +def MVE_VDUP8 : MVE_VDUP<"8", 0b1, 0b0, 0b00>; let Predicates = [HasMVEInt] in { def : Pat<(v16i8 (ARMvdup (i32 rGPR:$elem))), @@ -2421,7 +2430,7 @@ let Predicates = [HasMVEInt] in { class MVEIntSingleSrc size, list pattern=[]> : MVE_p<(outs MQPR:$Qd), (ins MQPR:$Qm), NoItinerary, - iname, suffix, "$Qd, $Qm", vpred_r, "", pattern> { + iname, suffix, "$Qd, $Qm", vpred_r, "", size, pattern> { bits<4> Qd; bits<4> Qm; @@ -2566,9 +2575,9 @@ defm MVE_VQABSNEG_Ps32 : vqabsneg_pattern; class MVE_mod_imm cmode, bit op, - dag iops, list pattern=[]> + dag iops, bits<2> vecsize, list pattern=[]> : MVE_p<(outs MQPR:$Qd), iops, NoItinerary, iname, suffix, "$Qd, $imm", - vpred_r, "", pattern> { + vpred_r, "", vecsize, pattern> { bits<13> imm; bits<4> Qd; @@ -2591,21 +2600,21 @@ class MVE_mod_imm cmode, bit op, let isReMaterializable = 1 in { let isAsCheapAsAMove = 1 in { -def MVE_VMOVimmi8 : MVE_mod_imm<"vmov", "i8", {1,1,1,0}, 0b0, (ins nImmSplatI8:$imm)>; -def MVE_VMOVimmi16 : MVE_mod_imm<"vmov", "i16", {1,0,?,0}, 0b0, (ins nImmSplatI16:$imm)> { +def MVE_VMOVimmi8 : MVE_mod_imm<"vmov", "i8", {1,1,1,0}, 0b0, (ins nImmSplatI8:$imm), 0b00>; +def MVE_VMOVimmi16 : MVE_mod_imm<"vmov", "i16", {1,0,?,0}, 0b0, (ins nImmSplatI16:$imm), 0b01> { let Inst{9} = imm{9}; } -def MVE_VMOVimmi32 : MVE_mod_imm<"vmov", "i32", {?,?,?,?}, 0b0, (ins nImmVMOVI32:$imm)> { +def MVE_VMOVimmi32 : MVE_mod_imm<"vmov", "i32", {?,?,?,?}, 0b0, (ins nImmVMOVI32:$imm), 0b10> { let Inst{11-8} = imm{11-8}; } -def MVE_VMOVimmi64 : MVE_mod_imm<"vmov", "i64", {1,1,1,0}, 0b1, (ins nImmSplatI64:$imm)>; -def MVE_VMOVimmf32 : MVE_mod_imm<"vmov", "f32", {1,1,1,1}, 0b0, (ins nImmVMOVF32:$imm)>; +def MVE_VMOVimmi64 : MVE_mod_imm<"vmov", "i64", {1,1,1,0}, 0b1, (ins nImmSplatI64:$imm), 0b11>; +def MVE_VMOVimmf32 : MVE_mod_imm<"vmov", "f32", {1,1,1,1}, 0b0, (ins nImmVMOVF32:$imm), 0b10>; } // let isAsCheapAsAMove = 1 -def MVE_VMVNimmi16 : MVE_mod_imm<"vmvn", "i16", {1,0,?,0}, 0b1, (ins nImmSplatI16:$imm)> { +def MVE_VMVNimmi16 : MVE_mod_imm<"vmvn", "i16", {1,0,?,0}, 0b1, (ins nImmSplatI16:$imm), 0b01> { let Inst{9} = imm{9}; } -def MVE_VMVNimmi32 : MVE_mod_imm<"vmvn", "i32", {?,?,?,?}, 0b1, (ins nImmVMOVI32:$imm)> { +def MVE_VMVNimmi32 : MVE_mod_imm<"vmvn", "i32", {?,?,?,?}, 0b1, (ins nImmVMOVI32:$imm), 0b10> { let Inst{11-8} = imm{11-8}; } } // let isReMaterializable = 1 @@ -2642,7 +2651,7 @@ class MVE_VMINMAXA size, bit bit_12, list pattern=[]> : MVE_p<(outs MQPR:$Qd), (ins MQPR:$Qd_src, MQPR:$Qm), NoItinerary, iname, suffix, "$Qd, $Qm", vpred_n, "$Qd = $Qd_src", - pattern> { + size, pattern> { bits<4> Qd; bits<4> Qm; @@ -2701,7 +2710,7 @@ defm MVE_VMAXAs32 : MVE_VMAXA; def MVE_VSHLC : MVE_p<(outs rGPR:$RdmDest, MQPR:$Qd), (ins MQPR:$QdSrc, rGPR:$RdmSrc, long_shift:$imm), NoItinerary, "vshlc", "", "$QdSrc, $RdmSrc, $imm", - vpred_n, "$RdmDest = $RdmSrc,$Qd = $QdSrc"> { + vpred_n, "$RdmDest = $RdmSrc,$Qd = $QdSrc", 0b10> { bits<5> imm; bits<4> Qd; bits<4> RdmDest; @@ -2718,8 +2727,8 @@ def MVE_VSHLC : MVE_p<(outs rGPR:$RdmDest, MQPR:$Qd), class MVE_shift_imm pattern=[]> - : MVE_p { + bits<2> vecsize, list pattern=[]> + : MVE_p { bits<4> Qd; bits<4> Qm; @@ -2733,7 +2742,7 @@ class MVE_VMOVL sz, bit U, bit top, list pattern=[]> : MVE_shift_imm<(outs MQPR:$Qd), (ins MQPR:$Qm), iname, suffix, "$Qd, $Qm", vpred_r, "", - pattern> { + sz, pattern> { let Inst{28} = U; let Inst{25-23} = 0b101; let Inst{21} = 0b1; @@ -2799,9 +2808,9 @@ let Predicates = [HasMVEInt] in { class MVE_VSHLL_imm pattern=[]> + Operand immtype, bits<2> vecsize, list pattern=[]> : MVE_shift_imm<(outs MQPR:$Qd), (ins MQPR:$Qm, immtype:$imm), - iname, suffix, "$Qd, $Qm, $imm", vpred_r, "", pattern> { + iname, suffix, "$Qd, $Qm, $imm", vpred_r, "", vecsize, pattern> { let Inst{28} = U; let Inst{25-23} = 0b101; let Inst{21} = 0b1; @@ -2822,7 +2831,7 @@ class MVE_VSHLL_imm pattern=[]> - : MVE_VSHLL_imm { + : MVE_VSHLL_imm { bits<3> imm; let Inst{20-19} = 0b01; let Inst{18-16} = imm; @@ -2830,7 +2839,7 @@ class MVE_VSHLL_imm8 pattern=[]> - : MVE_VSHLL_imm { + : MVE_VSHLL_imm { bits<4> imm; let Inst{20} = 0b1; let Inst{19-16} = imm; @@ -2848,7 +2857,7 @@ def MVE_VSHLL_immu16th : MVE_VSHLL_imm16<"vshllt", "u16", 0b1, 0b1>; class MVE_VSHLL_by_lane_width size, bit U, string ops, list pattern=[]> : MVE_shift_imm<(outs MQPR:$Qd), (ins MQPR:$Qm), - iname, suffix, ops, vpred_r, "", pattern> { + iname, suffix, ops, vpred_r, "", !if(size, 0b10, 0b01), pattern> { let Inst{28} = U; let Inst{25-23} = 0b100; let Inst{21-20} = 0b11; @@ -2910,15 +2919,15 @@ foreach VTI = [MVE_v16s8, MVE_v8s16, MVE_v16u8, MVE_v8u16] in foreach top = [0, 1] in defm : MVE_VSHLL_patterns; -class MVE_shift_imm_partial +class MVE_shift_imm_partial vecsize> : MVE_shift_imm<(outs MQPR:$Qd), (ins MQPR:$QdSrc, MQPR:$Qm, imm:$imm), - iname, suffix, "$Qd, $Qm, $imm", vpred_n, "$Qd = $QdSrc"> { + iname, suffix, "$Qd, $Qm, $imm", vpred_n, "$Qd = $QdSrc", vecsize> { Operand immediateType = imm; } class MVE_VxSHRN - : MVE_shift_imm_partial { + Operand imm, bits<2> vecsize> + : MVE_shift_imm_partial { bits<5> imm; let Inst{28} = bit_28; @@ -2933,35 +2942,35 @@ class MVE_VxSHRN { +def MVE_VRSHRNi16bh : MVE_VxSHRN<"vrshrnb", "i16", 0b0, 0b1, shr_imm8, 0b01> { let Inst{20-19} = 0b01; } -def MVE_VRSHRNi16th : MVE_VxSHRN<"vrshrnt", "i16", 0b1, 0b1, shr_imm8> { +def MVE_VRSHRNi16th : MVE_VxSHRN<"vrshrnt", "i16", 0b1, 0b1, shr_imm8, 0b01> { let Inst{20-19} = 0b01; } -def MVE_VRSHRNi32bh : MVE_VxSHRN<"vrshrnb", "i32", 0b0, 0b1, shr_imm16> { +def MVE_VRSHRNi32bh : MVE_VxSHRN<"vrshrnb", "i32", 0b0, 0b1, shr_imm16, 0b10> { let Inst{20} = 0b1; } -def MVE_VRSHRNi32th : MVE_VxSHRN<"vrshrnt", "i32", 0b1, 0b1, shr_imm16> { +def MVE_VRSHRNi32th : MVE_VxSHRN<"vrshrnt", "i32", 0b1, 0b1, shr_imm16, 0b10> { let Inst{20} = 0b1; } -def MVE_VSHRNi16bh : MVE_VxSHRN<"vshrnb", "i16", 0b0, 0b0, shr_imm8> { +def MVE_VSHRNi16bh : MVE_VxSHRN<"vshrnb", "i16", 0b0, 0b0, shr_imm8, 0b01> { let Inst{20-19} = 0b01; } -def MVE_VSHRNi16th : MVE_VxSHRN<"vshrnt", "i16", 0b1, 0b0, shr_imm8> { +def MVE_VSHRNi16th : MVE_VxSHRN<"vshrnt", "i16", 0b1, 0b0, shr_imm8, 0b01> { let Inst{20-19} = 0b01; } -def MVE_VSHRNi32bh : MVE_VxSHRN<"vshrnb", "i32", 0b0, 0b0, shr_imm16> { +def MVE_VSHRNi32bh : MVE_VxSHRN<"vshrnb", "i32", 0b0, 0b0, shr_imm16, 0b10> { let Inst{20} = 0b1; } -def MVE_VSHRNi32th : MVE_VxSHRN<"vshrnt", "i32", 0b1, 0b0, shr_imm16> { +def MVE_VSHRNi32th : MVE_VxSHRN<"vshrnt", "i32", 0b1, 0b0, shr_imm16, 0b10> { let Inst{20} = 0b1; } class MVE_VxQRSHRUN - : MVE_shift_imm_partial { + Operand imm, bits<2> vecsize> + : MVE_shift_imm_partial { bits<5> imm; let Inst{28} = bit_28; @@ -2977,42 +2986,42 @@ class MVE_VxQRSHRUN { + "vqrshrunb", "s16", 0b1, 0b0, shr_imm8, 0b01> { let Inst{20-19} = 0b01; } def MVE_VQRSHRUNs16th : MVE_VxQRSHRUN< - "vqrshrunt", "s16", 0b1, 0b1, shr_imm8> { + "vqrshrunt", "s16", 0b1, 0b1, shr_imm8, 0b01> { let Inst{20-19} = 0b01; } def MVE_VQRSHRUNs32bh : MVE_VxQRSHRUN< - "vqrshrunb", "s32", 0b1, 0b0, shr_imm16> { + "vqrshrunb", "s32", 0b1, 0b0, shr_imm16, 0b10> { let Inst{20} = 0b1; } def MVE_VQRSHRUNs32th : MVE_VxQRSHRUN< - "vqrshrunt", "s32", 0b1, 0b1, shr_imm16> { + "vqrshrunt", "s32", 0b1, 0b1, shr_imm16, 0b10> { let Inst{20} = 0b1; } def MVE_VQSHRUNs16bh : MVE_VxQRSHRUN< - "vqshrunb", "s16", 0b0, 0b0, shr_imm8> { + "vqshrunb", "s16", 0b0, 0b0, shr_imm8, 0b01> { let Inst{20-19} = 0b01; } def MVE_VQSHRUNs16th : MVE_VxQRSHRUN< - "vqshrunt", "s16", 0b0, 0b1, shr_imm8> { + "vqshrunt", "s16", 0b0, 0b1, shr_imm8, 0b01> { let Inst{20-19} = 0b01; } def MVE_VQSHRUNs32bh : MVE_VxQRSHRUN< - "vqshrunb", "s32", 0b0, 0b0, shr_imm16> { + "vqshrunb", "s32", 0b0, 0b0, shr_imm16, 0b10> { let Inst{20} = 0b1; } def MVE_VQSHRUNs32th : MVE_VxQRSHRUN< - "vqshrunt", "s32", 0b0, 0b1, shr_imm16> { + "vqshrunt", "s32", 0b0, 0b1, shr_imm16, 0b10> { let Inst{20} = 0b1; } class MVE_VxQRSHRN - : MVE_shift_imm_partial { + Operand imm, bits<2> vecsize> + : MVE_shift_imm_partial { bits<5> imm; let Inst{25-23} = 0b101; @@ -3027,19 +3036,19 @@ class MVE_VxQRSHRN { - def s16 : MVE_VxQRSHRN { + def s16 : MVE_VxQRSHRN { let Inst{28} = 0b0; let Inst{20-19} = 0b01; } - def u16 : MVE_VxQRSHRN { + def u16 : MVE_VxQRSHRN { let Inst{28} = 0b1; let Inst{20-19} = 0b01; } - def s32 : MVE_VxQRSHRN { + def s32 : MVE_VxQRSHRN { let Inst{28} = 0b0; let Inst{20} = 0b1; } - def u32 : MVE_VxQRSHRN { + def u32 : MVE_VxQRSHRN { let Inst{28} = 0b1; let Inst{20} = 0b1; } @@ -3114,7 +3123,7 @@ defm : MVE_VSHRN_patterns; class MVE_shift_by_vec size, bit bit_4, bit bit_8> : MVE_p<(outs MQPR:$Qd), (ins MQPR:$Qm, MQPR:$Qn), NoItinerary, - iname, suffix, "$Qd, $Qm, $Qn", vpred_r, "", []> { + iname, suffix, "$Qd, $Qm, $Qn", vpred_r, "", size, []> { // Shift instructions which take a vector of shift counts bits<4> Qd; bits<4> Qm; @@ -3189,8 +3198,8 @@ let Predicates = [HasMVEInt] in { class MVE_shift_with_imm pattern=[]> - : MVE_p { + bits<2> vecsize, list pattern=[]> + : MVE_p { bits<4> Qd; bits<4> Qm; @@ -3213,10 +3222,10 @@ class MVE_shift_with_imm +class MVE_VSxI_imm vecsize> : MVE_shift_with_imm { + "$Qd, $Qm, $imm", vpred_n, "$Qd = $Qd_src", vecsize> { bits<6> imm; let Inst{28} = 0b1; let Inst{25-24} = 0b11; @@ -3228,27 +3237,27 @@ class MVE_VSxI_imm Operand immediateType = immType; } -def MVE_VSRIimm8 : MVE_VSxI_imm<"vsri", "8", 0b0, shr_imm8> { +def MVE_VSRIimm8 : MVE_VSxI_imm<"vsri", "8", 0b0, shr_imm8, 0b00> { let Inst{21-19} = 0b001; } -def MVE_VSRIimm16 : MVE_VSxI_imm<"vsri", "16", 0b0, shr_imm16> { +def MVE_VSRIimm16 : MVE_VSxI_imm<"vsri", "16", 0b0, shr_imm16, 0b01> { let Inst{21-20} = 0b01; } -def MVE_VSRIimm32 : MVE_VSxI_imm<"vsri", "32", 0b0, shr_imm32> { +def MVE_VSRIimm32 : MVE_VSxI_imm<"vsri", "32", 0b0, shr_imm32, 0b10> { let Inst{21} = 0b1; } -def MVE_VSLIimm8 : MVE_VSxI_imm<"vsli", "8", 0b1, imm0_7> { +def MVE_VSLIimm8 : MVE_VSxI_imm<"vsli", "8", 0b1, imm0_7, 0b00> { let Inst{21-19} = 0b001; } -def MVE_VSLIimm16 : MVE_VSxI_imm<"vsli", "16", 0b1, imm0_15> { +def MVE_VSLIimm16 : MVE_VSxI_imm<"vsli", "16", 0b1, imm0_15, 0b01> { let Inst{21-20} = 0b01; } -def MVE_VSLIimm32 : MVE_VSxI_imm<"vsli", "32", 0b1,imm0_31> { +def MVE_VSLIimm32 : MVE_VSxI_imm<"vsli", "32", 0b1,imm0_31, 0b10> { let Inst{21} = 0b1; } @@ -3277,7 +3286,7 @@ defm : MVE_VSxI_patterns; class MVE_VQSHL_imm : MVE_shift_with_imm<"vqshl", VTI_.Suffix, (outs MQPR:$Qd), (ins MQPR:$Qm, immType:$imm), "$Qd, $Qm, $imm", - vpred_r, ""> { + vpred_r, "", VTI_.Size> { bits<6> imm; let Inst{28} = VTI_.Unsigned; @@ -3317,7 +3326,7 @@ let unpred_int = int_arm_mve_vqshl_imm, class MVE_VQSHLU_imm : MVE_shift_with_imm<"vqshlu", VTI_.Suffix, (outs MQPR:$Qd), (ins MQPR:$Qm, immType:$imm), "$Qd, $Qm, $imm", - vpred_r, ""> { + vpred_r, "", VTI_.Size> { bits<6> imm; let Inst{28} = 0b1; @@ -3347,7 +3356,7 @@ let unpred_int = int_arm_mve_vqshlu_imm, class MVE_VRSHR_imm : MVE_shift_with_imm<"vrshr", VTI_.Suffix, (outs MQPR:$Qd), (ins MQPR:$Qm, immType:$imm), "$Qd, $Qm, $imm", - vpred_r, ""> { + vpred_r, "", VTI_.Size> { bits<6> imm; let Inst{28} = VTI_.Unsigned; @@ -3421,10 +3430,10 @@ defm : MVE_shift_imm_patterns; defm : MVE_shift_imm_patterns; defm : MVE_shift_imm_patterns; -class MVE_VSHR_imm +class MVE_VSHR_imm vecsize> : MVE_shift_with_imm<"vshr", suffix, (outs MQPR:$Qd), !con((ins MQPR:$Qm), imm), "$Qd, $Qm, $imm", - vpred_r, ""> { + vpred_r, "", vecsize> { bits<6> imm; let Inst{25-24} = 0b11; @@ -3432,40 +3441,40 @@ class MVE_VSHR_imm let Inst{10-8} = 0b000; } -def MVE_VSHR_imms8 : MVE_VSHR_imm<"s8", (ins shr_imm8:$imm)> { +def MVE_VSHR_imms8 : MVE_VSHR_imm<"s8", (ins shr_imm8:$imm), 0b00> { let Inst{28} = 0b0; let Inst{21-19} = 0b001; } -def MVE_VSHR_immu8 : MVE_VSHR_imm<"u8", (ins shr_imm8:$imm)> { +def MVE_VSHR_immu8 : MVE_VSHR_imm<"u8", (ins shr_imm8:$imm), 0b00> { let Inst{28} = 0b1; let Inst{21-19} = 0b001; } -def MVE_VSHR_imms16 : MVE_VSHR_imm<"s16", (ins shr_imm16:$imm)> { +def MVE_VSHR_imms16 : MVE_VSHR_imm<"s16", (ins shr_imm16:$imm), 0b01> { let Inst{28} = 0b0; let Inst{21-20} = 0b01; } -def MVE_VSHR_immu16 : MVE_VSHR_imm<"u16", (ins shr_imm16:$imm)> { +def MVE_VSHR_immu16 : MVE_VSHR_imm<"u16", (ins shr_imm16:$imm), 0b01> { let Inst{28} = 0b1; let Inst{21-20} = 0b01; } -def MVE_VSHR_imms32 : MVE_VSHR_imm<"s32", (ins shr_imm32:$imm)> { +def MVE_VSHR_imms32 : MVE_VSHR_imm<"s32", (ins shr_imm32:$imm), 0b10> { let Inst{28} = 0b0; let Inst{21} = 0b1; } -def MVE_VSHR_immu32 : MVE_VSHR_imm<"u32", (ins shr_imm32:$imm)> { +def MVE_VSHR_immu32 : MVE_VSHR_imm<"u32", (ins shr_imm32:$imm), 0b10> { let Inst{28} = 0b1; let Inst{21} = 0b1; } -class MVE_VSHL_imm +class MVE_VSHL_imm vecsize> : MVE_shift_with_imm<"vshl", suffix, (outs MQPR:$Qd), !con((ins MQPR:$Qm), imm), "$Qd, $Qm, $imm", - vpred_r, ""> { + vpred_r, "", vecsize> { bits<6> imm; let Inst{28} = 0b0; @@ -3474,15 +3483,15 @@ class MVE_VSHL_imm let Inst{10-8} = 0b101; } -def MVE_VSHL_immi8 : MVE_VSHL_imm<"i8", (ins imm0_7:$imm)> { +def MVE_VSHL_immi8 : MVE_VSHL_imm<"i8", (ins imm0_7:$imm), 0b00> { let Inst{21-19} = 0b001; } -def MVE_VSHL_immi16 : MVE_VSHL_imm<"i16", (ins imm0_15:$imm)> { +def MVE_VSHL_immi16 : MVE_VSHL_imm<"i16", (ins imm0_15:$imm), 0b01> { let Inst{21-20} = 0b01; } -def MVE_VSHL_immi32 : MVE_VSHL_imm<"i32", (ins imm0_31:$imm)> { +def MVE_VSHL_immi32 : MVE_VSHL_imm<"i32", (ins imm0_31:$imm), 0b10> { let Inst{21} = 0b1; } @@ -3526,8 +3535,8 @@ let Predicates = [HasMVEInt] in { // start of MVE Floating Point instructions class MVE_float pattern=[]> - : MVE_f { + vpred_ops vpred, string cstr, bits<2> vecsize, list pattern=[]> + : MVE_f { bits<4> Qm; let Inst{12} = 0b0; @@ -3540,7 +3549,7 @@ class MVE_float op, string suffix, bits<2> size, list pattern=[]> : MVE_float { + (ins MQPR:$Qm), "$Qd, $Qm", vpred_r, "", size, pattern> { bits<4> Qd; let Inst{28} = 0b1; @@ -3587,16 +3596,16 @@ defm MVE_VRINTf32 : MVE_VRINT_ops; class MVEFloatArithNeon pattern=[]> - : MVE_float { + vpred_ops vpred, string cstr, bits<2> vecsize, list pattern=[]> + : MVE_float { let Inst{20} = size; let Inst{16} = 0b0; } -class MVE_VMUL_fp pattern=[]> - : MVEFloatArithNeon size, list pattern=[]> + : MVEFloatArithNeon { + size, pattern> { bits<4> Qd; bits<4> Qn; @@ -3614,7 +3623,7 @@ class MVE_VMUL_fp pattern=[]> multiclass MVE_VMULT_fp_m { - def "" : MVE_VMUL_fp; + def "" : MVE_VMUL_fp; defvar Inst = !cast(NAME); let Predicates = [HasMVEFloat] in { @@ -3628,10 +3637,10 @@ multiclass MVE_VMUL_fp_m defm MVE_VMULf32 : MVE_VMUL_fp_m; defm MVE_VMULf16 : MVE_VMUL_fp_m; -class MVE_VCMLA - : MVEFloatArithNeon<"vcmla", suffix, size, (outs MQPR:$Qd), +class MVE_VCMLA size> + : MVEFloatArithNeon<"vcmla", suffix, size{1}, (outs MQPR:$Qd), (ins MQPR:$Qd_src, MQPR:$Qn, MQPR:$Qm, complexrotateop:$rot), - "$Qd, $Qn, $Qm, $rot", vpred_n, "$Qd = $Qd_src", []> { + "$Qd, $Qn, $Qm, $rot", vpred_n, "$Qd = $Qd_src", size, []> { bits<4> Qd; bits<4> Qn; bits<2> rot; @@ -3648,8 +3657,8 @@ class MVE_VCMLA let Inst{4} = 0b0; } -multiclass MVE_VCMLA_m { - def "" : MVE_VCMLA; +multiclass MVE_VCMLA_m { + def "" : MVE_VCMLA; defvar Inst = !cast(NAME); let Predicates = [HasMVEFloat] in { @@ -3671,16 +3680,16 @@ multiclass MVE_VCMLA_m { } } -defm MVE_VCMLAf16 : MVE_VCMLA_m; -defm MVE_VCMLAf32 : MVE_VCMLA_m; +defm MVE_VCMLAf16 : MVE_VCMLA_m; +defm MVE_VCMLAf32 : MVE_VCMLA_m; -class MVE_VADDSUBFMA_fp size, bit bit_4, bit bit_8, bit bit_21, dag iops=(ins), vpred_ops vpred=vpred_r, string cstr="", list pattern=[]> - : MVEFloatArithNeon { + vpred, cstr, size, pattern> { bits<4> Qd; bits<4> Qn; @@ -3698,7 +3707,7 @@ class MVE_VADDSUBFMA_fp { - def "" : MVE_VADDSUBFMA_fp; defvar Inst = !cast(NAME); defvar pred_int = int_arm_mve_fma_predicated; @@ -3739,7 +3748,7 @@ defm MVE_VFMSf16 : MVE_VFMA_fp_multi<"vfms", 1, MVE_v8f16>; multiclass MVE_VADDSUB_fp_m { - def "" : MVE_VADDSUBFMA_fp { + def "" : MVE_VADDSUBFMA_fp { let validForTailPredication = 1; } defvar Inst = !cast(NAME); @@ -3760,10 +3769,10 @@ defm MVE_VADDf16 : MVE_VADD_fp_m; defm MVE_VSUBf32 : MVE_VSUB_fp_m; defm MVE_VSUBf16 : MVE_VSUB_fp_m; -class MVE_VCADD - : MVEFloatArithNeon<"vcadd", suffix, size, (outs MQPR:$Qd), +class MVE_VCADD size, string cstr=""> + : MVEFloatArithNeon<"vcadd", suffix, size{1}, (outs MQPR:$Qd), (ins MQPR:$Qn, MQPR:$Qm, complexrotateopodd:$rot), - "$Qd, $Qn, $Qm, $rot", vpred_r, cstr, []> { + "$Qd, $Qn, $Qm, $rot", vpred_r, cstr, size, []> { bits<4> Qd; bits<4> Qn; bit rot; @@ -3781,8 +3790,8 @@ class MVE_VCADD let Inst{4} = 0b0; } -multiclass MVE_VCADD_m { - def "" : MVE_VCADD; +multiclass MVE_VCADD_m { + def "" : MVE_VCADD; defvar Inst = !cast(NAME); let Predicates = [HasMVEFloat] in { @@ -3802,12 +3811,12 @@ multiclass MVE_VCADD_m { } } -defm MVE_VCADDf16 : MVE_VCADD_m; -defm MVE_VCADDf32 : MVE_VCADD_m; +defm MVE_VCADDf16 : MVE_VCADD_m; +defm MVE_VCADDf32 : MVE_VCADD_m; -class MVE_VABD_fp +class MVE_VABD_fp size> : MVE_float<"vabd", suffix, (outs MQPR:$Qd), (ins MQPR:$Qn, MQPR:$Qm), - "$Qd, $Qn, $Qm", vpred_r, ""> { + "$Qd, $Qn, $Qm", vpred_r, "", size> { bits<4> Qd; bits<4> Qn; @@ -3815,7 +3824,7 @@ class MVE_VABD_fp let Inst{25-23} = 0b110; let Inst{22} = Qd{3}; let Inst{21} = 0b1; - let Inst{20} = size; + let Inst{20} = size{0}; let Inst{19-17} = Qn{2-0}; let Inst{16} = 0b0; let Inst{15-13} = Qd{2-0}; @@ -3827,7 +3836,7 @@ class MVE_VABD_fp multiclass MVE_VABDT_fp_m { - def "" : MVE_VABD_fp; + def "" : MVE_VABD_fp; defvar Inst = !cast(NAME); let Predicates = [HasMVEFloat] in { @@ -3847,7 +3856,7 @@ multiclass MVE_VABD_fp_m : MVE_VABDT_fp_m; defm MVE_VABDf32 : MVE_VABD_fp_m; -defm MVE_VABDf16 : MVE_VABD_fp_m; +defm MVE_VABDf16 : MVE_VABD_fp_m; let Predicates = [HasMVEFloat] in { def : Pat<(v8f16 (fabs (fsub (v8f16 MQPR:$Qm), (v8f16 MQPR:$Qn)))), @@ -3860,7 +3869,7 @@ class MVE_VCVT_fix : MVE_float<"vcvt", suffix, (outs MQPR:$Qd), (ins MQPR:$Qm, imm_operand_type:$imm6), - "$Qd, $Qm, $imm6", vpred_r, "", []> { + "$Qd, $Qm, $imm6", vpred_r, "", !if(fsi, 0b10, 0b01), []> { bits<4> Qd; bits<6> imm6; @@ -3943,7 +3952,7 @@ defm MVE_VCVTu32f32_fix : MVE_VCVT_fix_f32_m<0b1, 0b1, MVE_v4u32, MVE_v4f32>; class MVE_VCVT_fp_int_anpm size, bit op, string anpm, bits<2> rm, list pattern=[]> : MVE_float { + (ins MQPR:$Qm), "$Qd, $Qm", vpred_r, "", size, pattern> { bits<4> Qd; let Inst{28} = 0b1; @@ -4000,7 +4009,7 @@ defm MVE_VCVTu32f32 : MVE_VCVT_fp_int_anpm_outer; class MVE_VCVT_fp_int size, bit toint, bit unsigned, list pattern=[]> : MVE_float<"vcvt", suffix, (outs MQPR:$Qd), - (ins MQPR:$Qm), "$Qd, $Qm", vpred_r, "", pattern> { + (ins MQPR:$Qm), "$Qd, $Qm", vpred_r, "", size, pattern> { bits<4> Qd; let Inst{28} = 0b1; @@ -4063,7 +4072,7 @@ let Predicates = [HasMVEFloat] in { class MVE_VABSNEG_fp size, bit negate, list pattern=[]> : MVE_float { + (ins MQPR:$Qm), "$Qd, $Qm", vpred_r, "", size, pattern> { bits<4> Qd; let Inst{28} = 0b1; @@ -4102,15 +4111,15 @@ defm MVE_VNEGf16 : MVE_VABSNEG_fp_m<"vneg", fneg, int_arm_mve_neg_predicated, defm MVE_VNEGf32 : MVE_VABSNEG_fp_m<"vneg", fneg, int_arm_mve_neg_predicated, MVE_v4f32, 1>; -class MVE_VMAXMINNMA size, bit bit_12, list pattern=[]> : MVE_f<(outs MQPR:$Qd), (ins MQPR:$Qd_src, MQPR:$Qm), NoItinerary, iname, suffix, "$Qd, $Qm", vpred_n, "$Qd = $Qd_src", - pattern> { + size, pattern> { bits<4> Qd; bits<4> Qm; - let Inst{28} = size; + let Inst{28} = size{0}; let Inst{25-23} = 0b100; let Inst{22} = Qd{3}; let Inst{21-16} = 0b111111; @@ -4129,7 +4138,7 @@ class MVE_VMAXMINNMA { - def "" : MVE_VMAXMINNMA; + def "" : MVE_VMAXMINNMA; defvar Inst = !cast(NAME); let Predicates = [HasMVEInt] in { @@ -4163,9 +4172,9 @@ defm MVE_VMINNMAf16 : MVE_VMINNMA; // start of MVE compares class MVE_VCMPqq bits_21_20, - VCMPPredicateOperand predtype, list pattern=[]> + VCMPPredicateOperand predtype, bits<2> vecsize, list pattern=[]> : MVE_p<(outs VCCR:$P0), (ins MQPR:$Qn, MQPR:$Qm, predtype:$fc), - NoItinerary, "vcmp", suffix, "$fc, $Qn, $Qm", vpred_n, "", pattern> { + NoItinerary, "vcmp", suffix, "$fc, $Qn, $Qm", vpred_n, "", vecsize, pattern> { // Base class for comparing two vector registers bits<3> fc; bits<4> Qn; @@ -4200,24 +4209,24 @@ class MVE_VCMPqq bits_21_20, } class MVE_VCMPqqf - : MVE_VCMPqq { + : MVE_VCMPqq { let Predicates = [HasMVEFloat]; } class MVE_VCMPqqi size> - : MVE_VCMPqq { + : MVE_VCMPqq { let Inst{12} = 0b0; let Inst{0} = 0b0; } class MVE_VCMPqqu size> - : MVE_VCMPqq { + : MVE_VCMPqq { let Inst{12} = 0b0; let Inst{0} = 0b1; } class MVE_VCMPqqs size> - : MVE_VCMPqq { + : MVE_VCMPqq { let Inst{12} = 0b1; } @@ -4237,9 +4246,9 @@ def MVE_VCMPs16 : MVE_VCMPqqs<"s16", 0b01>; def MVE_VCMPs32 : MVE_VCMPqqs<"s32", 0b10>; class MVE_VCMPqr bits_21_20, - VCMPPredicateOperand predtype, list pattern=[]> + VCMPPredicateOperand predtype, bits<2> vecsize, list pattern=[]> : MVE_p<(outs VCCR:$P0), (ins MQPR:$Qn, GPRwithZR:$Rm, predtype:$fc), - NoItinerary, "vcmp", suffix, "$fc, $Qn, $Rm", vpred_n, "", pattern> { + NoItinerary, "vcmp", suffix, "$fc, $Qn, $Rm", vpred_n, "", vecsize, pattern> { // Base class for comparing a vector register with a scalar bits<3> fc; bits<4> Qn; @@ -4265,24 +4274,24 @@ class MVE_VCMPqr bits_21_20, } class MVE_VCMPqrf - : MVE_VCMPqr { + : MVE_VCMPqr { let Predicates = [HasMVEFloat]; } class MVE_VCMPqri size> - : MVE_VCMPqr { + : MVE_VCMPqr { let Inst{12} = 0b0; let Inst{5} = 0b0; } class MVE_VCMPqru size> - : MVE_VCMPqr { + : MVE_VCMPqr { let Inst{12} = 0b0; let Inst{5} = 0b1; } class MVE_VCMPqrs size> - : MVE_VCMPqr { + : MVE_VCMPqr { let Inst{12} = 0b1; } @@ -4490,9 +4499,9 @@ let Predicates = [HasMVEInt] in { class MVE_qDest_qSrc pattern=[]> + bits<2> vecsize, list pattern=[]> : MVE_p { + ops, vpred, cstr, vecsize, pattern> { bits<4> Qd; bits<4> Qm; @@ -4507,10 +4516,11 @@ class MVE_qDest_qSrc size, string cstr="", list pattern=[]> + string suffix, bits<2> size, string cstr="", + list pattern=[]> : MVE_qDest_qSrc { + vpred_n, "$Qd = $Qd_src"#cstr, size, pattern> { bits<4> Qn; let Inst{28} = subtract; @@ -4560,14 +4570,15 @@ defm MVE_VQDMLSDHX : MVE_VQxDMLxDH_multi<"vqdmlsdhx", 0b1, 0b0, 0b1>; defm MVE_VQRDMLSDH : MVE_VQxDMLxDH_multi<"vqrdmlsdh", 0b0, 0b1, 0b1>; defm MVE_VQRDMLSDHX : MVE_VQxDMLxDH_multi<"vqrdmlsdhx", 0b1, 0b1, 0b1>; -class MVE_VCMUL +class MVE_VCMUL size, string cstr=""> : MVE_qDest_qSrc { + "$Qd, $Qn, $Qm, $rot", vpred_r, cstr, size, + []> { bits<4> Qn; bits<2> rot; - let Inst{28} = size; + let Inst{28} = size{1}; let Inst{21-20} = 0b11; let Inst{19-17} = Qn{2-0}; let Inst{16} = 0b0; @@ -4580,8 +4591,8 @@ class MVE_VCMUL } multiclass MVE_VCMUL_m { - def "" : MVE_VCMUL; + string cstr=""> { + def "" : MVE_VCMUL; defvar Inst = !cast(NAME); let Predicates = [HasMVEFloat] in { @@ -4601,14 +4612,14 @@ multiclass MVE_VCMUL_m; -defm MVE_VCMULf32 : MVE_VCMUL_m<"vcmul", MVE_v4f32, 0b1, "@earlyclobber $Qd">; +defm MVE_VCMULf16 : MVE_VCMUL_m<"vcmul", MVE_v8f16>; +defm MVE_VCMULf32 : MVE_VCMUL_m<"vcmul", MVE_v4f32, "@earlyclobber $Qd">; class MVE_VMULL bits_21_20, - bit T, string cstr, list pattern=[]> + bit T, string cstr, bits<2> vecsize, list pattern=[]> : MVE_qDest_qSrc { + vpred_r, cstr, vecsize, pattern> { bits<4> Qd; bits<4> Qn; bits<4> Qm; @@ -4627,9 +4638,9 @@ class MVE_VMULL bits_21_20, multiclass MVE_VMULL_m { + bit Top, bits<2> vecsize, string cstr=""> { def "" : MVE_VMULL<"vmull" # !if(Top, "t", "b"), VTI.Suffix, VTI.Unsigned, - VTI.Size, Top, cstr>; + VTI.Size, Top, cstr, vecsize>; defvar Inst = !cast(NAME); let Predicates = [HasMVEInt] in { @@ -4656,43 +4667,43 @@ multiclass MVE_VMULL_m; + int_arm_mve_mull_int_predicated, 0b0, 0b01>; defm MVE_VMULLTs8 : MVE_VMULL_m; + int_arm_mve_mull_int_predicated, 0b1, 0b01>; defm MVE_VMULLBs16 : MVE_VMULL_m; + int_arm_mve_mull_int_predicated, 0b0, 0b10>; defm MVE_VMULLTs16 : MVE_VMULL_m; + int_arm_mve_mull_int_predicated, 0b1, 0b10>; defm MVE_VMULLBs32 : MVE_VMULL_m; defm MVE_VMULLTs32 : MVE_VMULL_m; defm MVE_VMULLBu8 : MVE_VMULL_m; + int_arm_mve_mull_int_predicated, 0b0, 0b01>; defm MVE_VMULLTu8 : MVE_VMULL_m; + int_arm_mve_mull_int_predicated, 0b1, 0b01>; defm MVE_VMULLBu16 : MVE_VMULL_m; + int_arm_mve_mull_int_predicated, 0b0, 0b10>; defm MVE_VMULLTu16 : MVE_VMULL_m; + int_arm_mve_mull_int_predicated, 0b1, 0b10>; defm MVE_VMULLBu32 : MVE_VMULL_m; defm MVE_VMULLTu32 : MVE_VMULL_m; defm MVE_VMULLBp8 : MVE_VMULL_m; + int_arm_mve_mull_poly_predicated, 0b0, 0b01>; defm MVE_VMULLTp8 : MVE_VMULL_m; + int_arm_mve_mull_poly_predicated, 0b1, 0b01>; defm MVE_VMULLBp16 : MVE_VMULL_m; + int_arm_mve_mull_poly_predicated, 0b0, 0b10>; defm MVE_VMULLTp16 : MVE_VMULL_m; + int_arm_mve_mull_poly_predicated, 0b1, 0b10>; let Predicates = [HasMVEInt] in { def : Pat<(v2i64 (ARMvmulls (v4i32 MQPR:$src1), (v4i32 MQPR:$src2))), @@ -4742,7 +4753,7 @@ class MVE_VxMULH size, bit round, list pattern=[]> : MVE_qDest_qSrc { + vpred_r, "", size, pattern> { bits<4> Qn; let Inst{28} = U; @@ -4807,7 +4818,7 @@ class MVE_VxMOVxN size, bit T, list pattern=[]> : MVE_qDest_qSrc { + vpred_n, "$Qd = $Qd_src", !if(size, 0b10, 0b01), pattern> { let Inst{28} = bit_28; let Inst{21-20} = 0b11; @@ -4952,7 +4963,7 @@ class MVE_VCVT_ff : MVE_qDest_qSrc { + vpred, cstr, 0b10, []> { let Inst{28} = op; let Inst{21-16} = 0b111111; let Inst{12} = T; @@ -5015,7 +5026,7 @@ class MVE_VxCADD size, bit halve, string cstr=""> : MVE_qDest_qSrc { + "$Qd, $Qn, $Qm, $rot", vpred_r, cstr, size, []> { bits<4> Qn; bit rot; @@ -5063,7 +5074,7 @@ class MVE_VADCSBC pattern=[]> : MVE_qDest_qSrc { + "$Qd, $Qn, $Qm", vpred_r, "", 0b10, pattern> { bits<4> Qn; let Inst{28} = subtract; @@ -5090,7 +5101,7 @@ class MVE_VQDMULL pattern=[]> : MVE_qDest_qSrc { + vpred_r, cstr, !if(size, 0b10, 0b01), pattern> { bits<4> Qn; let Inst{28} = size; @@ -5139,8 +5150,8 @@ defm MVE_VQDMULLs32 : MVE_VQDMULL_halves; // start of mve_qDest_rSrc class MVE_qr_base pattern=[]> - : MVE_p { + vpred_ops vpred, string cstr, bits<2> vecsize, list pattern=[]> + : MVE_p { bits<4> Qd; bits<4> Qn; bits<4> Rm; @@ -5156,19 +5167,19 @@ class MVE_qr_base pattern=[]> +class MVE_qDest_rSrc vecsize, list pattern=[]> : MVE_qr_base<(outs MQPR:$Qd), (ins MQPR:$Qn, rGPR:$Rm), - iname, suffix, "$Qd, $Qn, $Rm", vpred_r, cstr, - pattern>; + iname, suffix, "$Qd, $Qn, $Rm", vpred_r, cstr, + vecsize, pattern>; -class MVE_qDestSrc_rSrc pattern=[]> +class MVE_qDestSrc_rSrc vecsize, list pattern=[]> : MVE_qr_base<(outs MQPR:$Qd), (ins MQPR:$Qd_src, MQPR:$Qn, rGPR:$Rm), - iname, suffix, "$Qd, $Qn, $Rm", vpred_n, "$Qd = $Qd_src", - pattern>; + iname, suffix, "$Qd, $Qn, $Rm", vpred_n, "$Qd = $Qd_src", + vecsize, pattern>; -class MVE_qDest_single_rSrc pattern=[]> +class MVE_qDest_single_rSrc vecsize, list pattern=[]> : MVE_p<(outs MQPR:$Qd), (ins MQPR:$Qd_src, rGPR:$Rm), NoItinerary, iname, - suffix, "$Qd, $Rm", vpred_n, "$Qd = $Qd_src", pattern> { + suffix, "$Qd, $Rm", vpred_n, "$Qd = $Qd_src", vecsize, pattern> { bits<4> Qd; bits<4> Rm; @@ -5206,7 +5217,7 @@ multiclass MVE_vec_scalar_int_pat_m size, bit bit_5, bit bit_12, bit bit_16, bit bit_28> - : MVE_qDest_rSrc { + : MVE_qDest_rSrc { let Inst{28} = bit_28; let Inst{21-20} = size; @@ -5274,7 +5285,7 @@ defm MVE_VQSUB_qr_u32 : MVE_VQSUB_qr_m; class MVE_VQDMULL_qr pattern=[]> - : MVE_qDest_rSrc { + : MVE_qDest_rSrc { let Inst{28} = size; let Inst{21-20} = 0b11; @@ -5319,12 +5330,12 @@ defm MVE_VQDMULL_qr_s16 : MVE_VQDMULL_qr_halves; defm MVE_VQDMULL_qr_s32 : MVE_VQDMULL_qr_halves; class MVE_VxADDSUB_qr bits_21_20, bit subtract, - list pattern=[]> - : MVE_qDest_rSrc { + bit bit_28, bits<2> size, bit subtract, + bits<2> vecsize, list pattern=[]> + : MVE_qDest_rSrc { let Inst{28} = bit_28; - let Inst{21-20} = bits_21_20; + let Inst{21-20} = size; let Inst{16} = 0b0; let Inst{12} = subtract; let Inst{8} = 0b1; @@ -5334,7 +5345,7 @@ class MVE_VxADDSUB_qr { - def "" : MVE_VxADDSUB_qr; + def "" : MVE_VxADDSUB_qr; defm : MVE_vec_scalar_int_pat_m(NAME), VTI, unpred_int, pred_int, 1, 1>; } @@ -5363,7 +5374,7 @@ defm MVE_VHSUB_qr_u32 : MVE_VHSUB_qr_m; multiclass MVE_VADDSUB_qr_f { - def "" : MVE_VxADDSUB_qr; + def "" : MVE_VxADDSUB_qr; defm : MVE_TwoOpPatternDup(NAME)>; } @@ -5382,7 +5393,7 @@ let Predicates = [HasMVEFloat] in { class MVE_VxSHL_qr size, bit bit_7, bit bit_17, list pattern=[]> - : MVE_qDest_single_rSrc { + : MVE_qDest_single_rSrc { let Inst{28} = U; let Inst{25-23} = 0b100; @@ -5444,7 +5455,7 @@ let Predicates = [HasMVEInt] in { } class MVE_VBRSR size, list pattern=[]> - : MVE_qDest_rSrc { + : MVE_qDest_rSrc { let Inst{28} = 0b1; let Inst{21-20} = size; @@ -5494,7 +5505,7 @@ let Predicates = [HasMVEFloat] in { } class MVE_VMUL_qr_int size> - : MVE_qDest_rSrc { + : MVE_qDest_rSrc { let Inst{28} = 0b0; let Inst{21-20} = size; @@ -5518,11 +5529,11 @@ defm MVE_VMUL_qr_i16 : MVE_VMUL_qr_int_m; defm MVE_VMUL_qr_i32 : MVE_VMUL_qr_int_m; class MVE_VxxMUL_qr bits_21_20, list pattern=[]> - : MVE_qDest_rSrc { + bit bit_28, bits<2> size, bits<2> vecsize, list pattern=[]> + : MVE_qDest_rSrc { let Inst{28} = bit_28; - let Inst{21-20} = bits_21_20; + let Inst{21-20} = size; let Inst{16} = 0b1; let Inst{12} = 0b0; let Inst{8} = 0b0; @@ -5532,7 +5543,7 @@ class MVE_VxxMUL_qr { - def "" : MVE_VxxMUL_qr; + def "" : MVE_VxxMUL_qr; let Predicates = [HasMVEInt] in { defm : MVE_TwoOpPatternDup(NAME)>; @@ -5558,7 +5569,7 @@ defm MVE_VQRDMULH_qr_s32 : MVE_VQRDMULH_qr_m; multiclass MVE_VxxMUL_qr_f_m { let validForTailPredication = 1 in - def "" : MVE_VxxMUL_qr<"vmul", VTI.Suffix, VTI.Size{0}, 0b11>; + def "" : MVE_VxxMUL_qr<"vmul", VTI.Suffix, VTI.Size{0}, 0b11, VTI.Size>; defm : MVE_TwoOpPatternDup(NAME)>; } @@ -5570,8 +5581,8 @@ let Predicates = [HasMVEFloat] in { class MVE_VFMAMLA_qr bits_21_20, bit S, - list pattern=[]> - : MVE_qDestSrc_rSrc { + bits<2> vecsize, list pattern=[]> + : MVE_qDestSrc_rSrc { let Inst{28} = bit_28; let Inst{21-20} = bits_21_20; @@ -5586,7 +5597,7 @@ class MVE_VFMAMLA_qr { def "": MVE_VFMAMLA_qr; + scalar_addend, VTI.Size>; defvar Inst = !cast(NAME); defvar pred_int = !cast("int_arm_mve_" # iname # "_n_predicated"); defvar v1 = (VTI.Vec MQPR:$v1); @@ -5628,7 +5639,7 @@ defm MVE_VMLAS_qr_u32 : MVE_VMLA_qr_multi<"vmlas", MVE_v4u32, 0b1>; multiclass MVE_VFMA_qr_multi { - def "": MVE_VFMAMLA_qr; + def "": MVE_VFMAMLA_qr; defvar Inst = !cast(NAME); defvar pred_int = int_arm_mve_fma_predicated; defvar v1 = (VTI.Vec MQPR:$v1); @@ -5677,7 +5688,7 @@ let Predicates = [HasMVEFloat] in { class MVE_VQDMLAH_qr size, bit bit_5, bit bit_12, list pattern=[]> - : MVE_qDestSrc_rSrc { + : MVE_qDestSrc_rSrc { let Inst{28} = U; let Inst{21-20} = size; @@ -5722,7 +5733,7 @@ class MVE_VxDUP size, bit bit_12, ValueType VT, SDPatternOperator vxdup> : MVE_p<(outs MQPR:$Qd, tGPREven:$Rn), (ins tGPREven:$Rn_src, MVE_VIDUP_imm:$imm), NoItinerary, - iname, suffix, "$Qd, $Rn, $imm", vpred_r, "$Rn = $Rn_src", + iname, suffix, "$Qd, $Rn, $imm", vpred_r, "$Rn = $Rn_src", size, [(set (VT MQPR:$Qd), (i32 tGPREven:$Rn), (vxdup (i32 tGPREven:$Rn_src), (i32 imm:$imm)))]> { bits<4> Qd; @@ -5757,7 +5768,7 @@ class MVE_VxWDUP size, bit bit_12, list pattern=[]> : MVE_p<(outs MQPR:$Qd, tGPREven:$Rn), (ins tGPREven:$Rn_src, tGPROdd:$Rm, MVE_VIDUP_imm:$imm), NoItinerary, - iname, suffix, "$Qd, $Rn, $Rm, $imm", vpred_r, "$Rn = $Rn_src", + iname, suffix, "$Qd, $Rn, $Rm, $imm", vpred_r, "$Rn = $Rn_src", size, pattern> { bits<4> Qd; bits<4> Rm; @@ -5792,7 +5803,7 @@ def MVE_VDWDUPu32 : MVE_VxWDUP<"vdwdup", "u32", 0b10, 0b1>; let isReMaterializable = 1 in class MVE_VCTPInst size, list pattern=[]> : MVE_p<(outs VCCR:$P0), (ins rGPR:$Rn), NoItinerary, "vctp", suffix, - "$Rn", vpred_n, "", pattern> { + "$Rn", vpred_n, "", size, pattern> { bits<4> Rn; let Inst{28-27} = 0b10; @@ -5849,6 +5860,7 @@ class MVE_VMOV_64bit let Inst{4} = idx2; let Inst{3-0} = Rt{3-0}; + let VecSize = 0b10; let hasSideEffects = 0; } @@ -5937,7 +5949,7 @@ class MVE_vldst24_base stage, bits<2> size, bit load, dag Oops, dag loadIops, dag wbIops, string iname, string ops, string cstr, list pattern=[]> - : MVE_MI { + : MVE_MI { bits<4> VQd; bits<4> Rn; @@ -6135,8 +6147,8 @@ def MVE_memD: MVE_memsz<0b11, 3, ?, "d", ["", "u", "s", "f"]>; // input values. class MVE_VLDRSTR_base pattern=[]> - : MVE_p { + string ops, string cstr, bits<2> vecsize, list pattern=[]> + : MVE_p { bits<3> Qd; let Inst{28} = U; @@ -6172,7 +6184,7 @@ class MVE_VLDRSTR_base - : MVE_VLDRSTR_base { + : MVE_VLDRSTR_base { bits<12> addr; let Inst{23} = addr{7}; let Inst{19-16} = addr{11-8}; @@ -6187,7 +6199,7 @@ class MVE_VLDRSTR_cw size, dag oops, dag iops, string asm, string suffix, IndexMode im, string ops, string cstr> - : MVE_VLDRSTR_base { + : MVE_VLDRSTR_base { bits<11> addr; let Inst{23} = addr{7}; let Inst{19} = memsz.encoding{0}; // enough to tell 16- from 32-bit @@ -6304,7 +6316,7 @@ class MVE_VLDRSTR_rq size, bit os, string asm, string suffix, int shift> : MVE_VLDRSTR_base:$addr)), - asm, suffix, "$Qd, $addr", dir.cstr> { + asm, suffix, "$Qd, $addr", dir.cstr, size> { bits<7> addr; let Inst{23} = 0b1; let Inst{19-16} = addr{6-3}; @@ -6437,7 +6449,7 @@ class MVE_VLDRSTR_qi : MVE_VLDRSTR_base:$addr)), - asm, suffix, "$Qd, $addr" # wbAsm, cstr # dir.cstr> { + asm, suffix, "$Qd, $addr" # wbAsm, cstr # dir.cstr, memsz.encoding> { bits<11> addr; let Inst{23} = addr{7}; let Inst{19-17} = addr{10-8}; @@ -6546,7 +6558,7 @@ foreach suffix = memsz.suffixes in { // end of MVE predicable load/store class MVE_VPT size, dag iops, string asm, list pattern=[]> - : MVE_MI<(outs ), iops, NoItinerary, !strconcat("vpt", "${Mk}", ".", suffix), asm, "", pattern> { + : MVE_MI<(outs ), iops, NoItinerary, !strconcat("vpt", "${Mk}", ".", suffix), asm, "", size, pattern> { bits<3> fc; bits<4> Mk; bits<3> Qn; @@ -6656,7 +6668,7 @@ def MVE_VPTv16s8r : MVE_VPTt2s<"s8", 0b00>; class MVE_VPTf pattern=[]> : MVE_MI<(outs ), iops, NoItinerary, !strconcat("vpt", "${Mk}", ".", suffix), asm, - "", pattern> { + "", !if(size, 0b01, 0b10), pattern> { bits<3> fc; bits<4> Mk; bits<3> Qn; @@ -6709,7 +6721,7 @@ def MVE_VPTv4f32r : MVE_VPTft2<"f32", 0b0>; def MVE_VPTv8f16r : MVE_VPTft2<"f16", 0b1>; def MVE_VPST : MVE_MI<(outs ), (ins vpt_mask:$Mk), NoItinerary, - !strconcat("vpst", "${Mk}"), "", "", []> { + !strconcat("vpst", "${Mk}"), "", "", 0b00, []> { bits<4> Mk; let Inst{31-23} = 0b111111100; @@ -6726,7 +6738,7 @@ def MVE_VPST : MVE_MI<(outs ), (ins vpt_mask:$Mk), NoItinerary, } def MVE_VPSEL : MVE_p<(outs MQPR:$Qd), (ins MQPR:$Qn, MQPR:$Qm), NoItinerary, - "vpsel", "", "$Qd, $Qn, $Qm", vpred_n, "", []> { + "vpsel", "", "$Qd, $Qn, $Qm", vpred_n, "", 0b00, []> { bits<4> Qn; bits<4> Qd; bits<4> Qm; @@ -6832,7 +6844,7 @@ let Predicates = [HasMVEFloat] in { } def MVE_VPNOT : MVE_p<(outs VCCR:$P0), (ins VCCR:$P0_in), NoItinerary, - "vpnot", "", "", vpred_n, "", []> { + "vpnot", "", "", vpred_n, "", 0b00, []> { let Inst{31-0} = 0b11111110001100010000111101001101; let Unpredictable{19-17} = 0b111; let Unpredictable{12} = 0b1; diff --git a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp index 579e7d4..23e87e1 100644 --- a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp +++ b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp @@ -97,7 +97,15 @@ static bool isDomainMVE(MachineInstr *MI) { return Domain == ARMII::DomainMVE; } +static int getVecSize(const MachineInstr &MI) { + const MCInstrDesc &MCID = MI.getDesc(); + uint64_t Flags = MCID.TSFlags; + return (Flags & ARMII::VecSize) >> ARMII::VecSizeShift; +} + static bool shouldInspect(MachineInstr &MI) { + if (MI.isDebugInstr()) + return false; return isDomainMVE(&MI) || isVectorPredicate(&MI) || hasVPRUse(MI); } @@ -371,6 +379,7 @@ namespace { SmallVector VCTPs; SmallPtrSet ToRemove; SmallPtrSet BlockMasksToRecompute; + SmallPtrSet DoubleWidthResultInstrs; bool Revert = false; bool CannotTailPredicate = false; @@ -730,6 +739,20 @@ bool LowOverheadLoop::ValidateTailPredicate() { return false; } + // For any DoubleWidthResultInstrs we found whilst scanning instructions, they + // need to compute an output size that is smaller than the VCTP mask operates + // on. The VecSize of the DoubleWidthResult is the larger vector size - the + // size it extends into, so any VCTP VecSize <= is valid. + unsigned VCTPVecSize = getVecSize(*VCTP); + for (MachineInstr *MI : DoubleWidthResultInstrs) { + unsigned InstrVecSize = getVecSize(*MI); + if (InstrVecSize > VCTPVecSize) { + LLVM_DEBUG(dbgs() << "ARM Loops: Double width result larger than VCTP " + << "VecSize:\n" << *MI); + return false; + } + } + // Check that the value change of the element count is what we expect and // that the predication will be equivalent. For this we need: // NumElements = NumElements - VectorWidth. The sub will be a sub immediate @@ -1233,8 +1256,13 @@ bool LowOverheadLoop::ValidateMVEInst(MachineInstr *MI) { bool RequiresExplicitPredication = (MCID.TSFlags & ARMII::ValidForTailPredication) == 0; if (isDomainMVE(MI) && RequiresExplicitPredication) { - LLVM_DEBUG(if (!IsUse) - dbgs() << "ARM Loops: Can't tail predicate: " << *MI); + if (!IsUse && producesDoubleWidthResult(*MI)) { + DoubleWidthResultInstrs.insert(MI); + return true; + } + + LLVM_DEBUG(if (!IsUse) dbgs() + << "ARM Loops: Can't tail predicate: " << *MI); return IsUse; } diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h b/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h index ecd9611..43f7575 100644 --- a/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h +++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h @@ -408,6 +408,14 @@ namespace ARMII { // its input, typically reading from the top/bottom halves of the input(s). DoubleWidthResult = 1 << 23, + // The vector element size for MVE instructions. 00 = i8, 01 = i16, 10 = i32 + // and 11 = i64. This is the largest type if multiple are present, so a + // MVE_VMOVLs8bh is ize 01=i16, as it extends from a i8 to a i16. There are + // some caveats so cannot be used blindly, such as exchanging VMLADAVA's and + // complex instructions, which may use different input lanes. + VecSizeShift = 24, + VecSize = 3 << VecSizeShift, + //===------------------------------------------------------------------===// // Code domain. DomainShift = 15, diff --git a/llvm/test/CodeGen/Thumb2/mve-vmovlloop.ll b/llvm/test/CodeGen/Thumb2/mve-vmovlloop.ll index 1eff548..5ddbb3b 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vmovlloop.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vmovlloop.ll @@ -10,22 +10,13 @@ define void @vmovl_s32(i32* noalias nocapture %d, i32* nocapture readonly %s, i3 ; CHECK-NEXT: it lt ; CHECK-NEXT: poplt {r7, pc} ; CHECK-NEXT: .LBB0_1: @ %vector.ph -; CHECK-NEXT: adds r3, r2, #3 -; CHECK-NEXT: bic r3, r3, #3 -; CHECK-NEXT: sub.w r12, r3, #4 -; CHECK-NEXT: movs r3, #1 -; CHECK-NEXT: add.w r3, r3, r12, lsr #2 -; CHECK-NEXT: dls lr, r3 +; CHECK-NEXT: dlstp.32 lr, r2 ; CHECK-NEXT: .LBB0_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vctp.32 r2 -; CHECK-NEXT: subs r2, #4 -; CHECK-NEXT: vpst -; CHECK-NEXT: vldrwt.u32 q0, [r1], #16 +; CHECK-NEXT: vldrw.u32 q0, [r1], #16 ; CHECK-NEXT: vmovlb.s16 q0, q0 -; CHECK-NEXT: vpst -; CHECK-NEXT: vstrwt.32 q0, [r0], #16 -; CHECK-NEXT: le lr, .LBB0_2 +; CHECK-NEXT: vstrw.32 q0, [r0], #16 +; CHECK-NEXT: letp lr, .LBB0_2 ; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-NEXT: pop {r7, pc} entry: @@ -66,22 +57,13 @@ define void @vmovl_u16(i16* noalias nocapture %d, i16* nocapture readonly %s, i3 ; CHECK-NEXT: it lt ; CHECK-NEXT: poplt {r7, pc} ; CHECK-NEXT: .LBB1_1: @ %vector.ph -; CHECK-NEXT: adds r3, r2, #7 -; CHECK-NEXT: bic r3, r3, #7 -; CHECK-NEXT: sub.w r12, r3, #8 -; CHECK-NEXT: movs r3, #1 -; CHECK-NEXT: add.w r3, r3, r12, lsr #3 -; CHECK-NEXT: dls lr, r3 +; CHECK-NEXT: dlstp.16 lr, r2 ; CHECK-NEXT: .LBB1_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vctp.16 r2 -; CHECK-NEXT: subs r2, #8 -; CHECK-NEXT: vpst -; CHECK-NEXT: vldrht.u16 q0, [r1], #16 +; CHECK-NEXT: vldrh.u16 q0, [r1], #16 ; CHECK-NEXT: vmovlb.u8 q0, q0 -; CHECK-NEXT: vpst -; CHECK-NEXT: vstrht.16 q0, [r0], #16 -; CHECK-NEXT: le lr, .LBB1_2 +; CHECK-NEXT: vstrh.16 q0, [r0], #16 +; CHECK-NEXT: letp lr, .LBB1_2 ; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-NEXT: pop {r7, pc} entry: @@ -174,35 +156,22 @@ define void @sunken_vmovl(i8* noalias %pTarget, i16 signext %iTargetStride, i8* ; CHECK-NEXT: push {r7, lr} ; CHECK-NEXT: ldrsh.w r1, [sp, #8] ; CHECK-NEXT: vmov.i16 q0, #0x100 -; CHECK-NEXT: cmp r1, #8 -; CHECK-NEXT: mov r3, r1 -; CHECK-NEXT: it ge -; CHECK-NEXT: movge r3, #8 ; CHECK-NEXT: vldrb.u16 q1, [r2], #8 -; CHECK-NEXT: subs r3, r1, r3 ; CHECK-NEXT: vldrb.u16 q2, [r0], #8 -; CHECK-NEXT: add.w r12, r3, #7 -; CHECK-NEXT: movs r3, #1 -; CHECK-NEXT: add.w r12, r3, r12, lsr #3 ; CHECK-NEXT: ldr r3, [sp, #12] -; CHECK-NEXT: dls lr, r12 +; CHECK-NEXT: dlstp.16 lr, r1 ; CHECK-NEXT: .LBB3_1: @ %do.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vctp.16 r1 -; CHECK-NEXT: subs r1, #8 ; CHECK-NEXT: vmovlb.u8 q1, q1 -; CHECK-NEXT: vpst -; CHECK-NEXT: vsubt.i16 q3, q0, q1 +; CHECK-NEXT: vsub.i16 q3, q0, q1 ; CHECK-NEXT: vmovlb.u8 q2, q2 -; CHECK-NEXT: vpstttt -; CHECK-NEXT: vmult.i16 q3, q2, q3 -; CHECK-NEXT: vmlat.u16 q3, q1, r3 -; CHECK-NEXT: vshrt.u16 q3, q3, #8 -; CHECK-NEXT: vldrbt.u16 q1, [r2], #8 -; CHECK-NEXT: vpstt -; CHECK-NEXT: vldrbt.u16 q2, [r0], #8 -; CHECK-NEXT: vstrbt.16 q3, [r0, #-16] -; CHECK-NEXT: le lr, .LBB3_1 +; CHECK-NEXT: vmul.i16 q3, q2, q3 +; CHECK-NEXT: vmla.u16 q3, q1, r3 +; CHECK-NEXT: vshr.u16 q3, q3, #8 +; CHECK-NEXT: vldrb.u16 q1, [r2], #8 +; CHECK-NEXT: vldrb.u16 q2, [r0], #8 +; CHECK-NEXT: vstrb.16 q3, [r0, #-16] +; CHECK-NEXT: letp lr, .LBB3_1 ; CHECK-NEXT: @ %bb.2: @ %do.end ; CHECK-NEXT: pop {r7, pc} entry: diff --git a/llvm/unittests/Target/ARM/MachineInstrTest.cpp b/llvm/unittests/Target/ARM/MachineInstrTest.cpp index b9cfd9c..4113b79 100644 --- a/llvm/unittests/Target/ARM/MachineInstrTest.cpp +++ b/llvm/unittests/Target/ARM/MachineInstrTest.cpp @@ -1216,3 +1216,886 @@ TEST(MachineInstr, HasSideEffects) { << MII->getName(Op) << " has unexpected side effects"; } } + +TEST(MachineInstr, MVEVecSize) { + using namespace ARM; + auto MVEVecSize = [](unsigned Opcode) { + switch (Opcode) { + default: + dbgs() << Opcode << "\n"; + llvm_unreachable("Unexpected MVE instruction!"); + case MVE_ASRLi: + case MVE_ASRLr: + case MVE_LSLLi: + case MVE_LSLLr: + case MVE_LSRL: + case MVE_SQRSHR: + case MVE_SQRSHRL: + case MVE_SQSHL: + case MVE_SQSHLL: + case MVE_SRSHR: + case MVE_SRSHRL: + case MVE_UQRSHL: + case MVE_UQRSHLL: + case MVE_UQSHL: + case MVE_UQSHLL: + case MVE_URSHR: + case MVE_URSHRL: + case MVE_VABAVs8: + case MVE_VABAVu8: + case MVE_VABDs8: + case MVE_VABDu8: + case MVE_VABSs8: + case MVE_VADDVs8acc: + case MVE_VADDVs8no_acc: + case MVE_VADDVu8acc: + case MVE_VADDVu8no_acc: + case MVE_VADD_qr_i8: + case MVE_VADDi8: + case MVE_VBRSR8: + case MVE_VCADDi8: + case MVE_VCLSs8: + case MVE_VCLZs8: + case MVE_VCMPi8: + case MVE_VCMPi8r: + case MVE_VCMPs8: + case MVE_VCMPs8r: + case MVE_VCMPu8: + case MVE_VCMPu8r: + case MVE_VCTP8: + case MVE_VDDUPu8: + case MVE_VDUP8: + case MVE_VDWDUPu8: + case MVE_VHADD_qr_s8: + case MVE_VHADD_qr_u8: + case MVE_VHADDs8: + case MVE_VHADDu8: + case MVE_VHCADDs8: + case MVE_VHSUB_qr_s8: + case MVE_VHSUB_qr_u8: + case MVE_VHSUBs8: + case MVE_VHSUBu8: + case MVE_VIDUPu8: + case MVE_VIWDUPu8: + case MVE_VLD20_8: + case MVE_VLD20_8_wb: + case MVE_VLD21_8: + case MVE_VLD21_8_wb: + case MVE_VLD40_8: + case MVE_VLD40_8_wb: + case MVE_VLD41_8: + case MVE_VLD41_8_wb: + case MVE_VLD42_8: + case MVE_VLD42_8_wb: + case MVE_VLD43_8: + case MVE_VLD43_8_wb: + case MVE_VLDRBU8: + case MVE_VLDRBU8_post: + case MVE_VLDRBU8_pre: + case MVE_VLDRBU8_rq: + case MVE_VMAXAVs8: + case MVE_VMAXAs8: + case MVE_VMAXVs8: + case MVE_VMAXVu8: + case MVE_VMAXs8: + case MVE_VMAXu8: + case MVE_VMINAVs8: + case MVE_VMINAs8: + case MVE_VMINVs8: + case MVE_VMINVu8: + case MVE_VMINs8: + case MVE_VMINu8: + case MVE_VMLADAVas8: + case MVE_VMLADAVau8: + case MVE_VMLADAVaxs8: + case MVE_VMLADAVs8: + case MVE_VMLADAVu8: + case MVE_VMLADAVxs8: + case MVE_VMLAS_qr_s8: + case MVE_VMLAS_qr_u8: + case MVE_VMLA_qr_s8: + case MVE_VMLA_qr_u8: + case MVE_VMLSDAVas8: + case MVE_VMLSDAVaxs8: + case MVE_VMLSDAVs8: + case MVE_VMLSDAVxs8: + case MVE_VMOV_from_lane_s8: + case MVE_VMOV_from_lane_u8: + case MVE_VMOV_to_lane_8: + case MVE_VMOVimmi8: + case MVE_VMULHs8: + case MVE_VMULHu8: + case MVE_VMUL_qr_i8: + case MVE_VMULi8: + case MVE_VNEGs8: + case MVE_VPTv16i8: + case MVE_VPTv16i8r: + case MVE_VPTv16s8: + case MVE_VPTv16s8r: + case MVE_VPTv16u8: + case MVE_VPTv16u8r: + case MVE_VQABSs8: + case MVE_VQADD_qr_s8: + case MVE_VQADD_qr_u8: + case MVE_VQADDs8: + case MVE_VQADDu8: + case MVE_VQDMLADHXs8: + case MVE_VQDMLADHs8: + case MVE_VQDMLAH_qrs8: + case MVE_VQDMLASH_qrs8: + case MVE_VQDMLSDHXs8: + case MVE_VQDMLSDHs8: + case MVE_VQDMULH_qr_s8: + case MVE_VQDMULHi8: + case MVE_VQNEGs8: + case MVE_VQRDMLADHXs8: + case MVE_VQRDMLADHs8: + case MVE_VQRDMLAH_qrs8: + case MVE_VQRDMLASH_qrs8: + case MVE_VQRDMLSDHXs8: + case MVE_VQRDMLSDHs8: + case MVE_VQRDMULH_qr_s8: + case MVE_VQRDMULHi8: + case MVE_VQRSHL_by_vecs8: + case MVE_VQRSHL_by_vecu8: + case MVE_VQRSHL_qrs8: + case MVE_VQRSHL_qru8: + case MVE_VQSHLU_imms8: + case MVE_VQSHL_by_vecs8: + case MVE_VQSHL_by_vecu8: + case MVE_VQSHL_qrs8: + case MVE_VQSHL_qru8: + case MVE_VQSHLimms8: + case MVE_VQSHLimmu8: + case MVE_VQSUB_qr_s8: + case MVE_VQSUB_qr_u8: + case MVE_VQSUBs8: + case MVE_VQSUBu8: + case MVE_VRHADDs8: + case MVE_VRHADDu8: + case MVE_VRMULHs8: + case MVE_VRMULHu8: + case MVE_VRSHL_by_vecs8: + case MVE_VRSHL_by_vecu8: + case MVE_VRSHL_qrs8: + case MVE_VRSHL_qru8: + case MVE_VRSHR_imms8: + case MVE_VRSHR_immu8: + case MVE_VSHL_by_vecs8: + case MVE_VSHL_by_vecu8: + case MVE_VSHL_immi8: + case MVE_VSHL_qru8: + case MVE_VSHL_qrs8: + case MVE_VSHR_imms8: + case MVE_VSHR_immu8: + case MVE_VSLIimm8: + case MVE_VSRIimm8: + case MVE_VST20_8: + case MVE_VST20_8_wb: + case MVE_VST21_8: + case MVE_VST21_8_wb: + case MVE_VST40_8: + case MVE_VST40_8_wb: + case MVE_VST41_8: + case MVE_VST41_8_wb: + case MVE_VST42_8: + case MVE_VST42_8_wb: + case MVE_VST43_8: + case MVE_VST43_8_wb: + case MVE_VSTRB8_rq: + case MVE_VSTRBU8: + case MVE_VSTRBU8_post: + case MVE_VSTRBU8_pre: + case MVE_VSUB_qr_i8: + case MVE_VSUBi8: + case MVE_VAND: + case MVE_VBIC: + case MVE_VEOR: + case MVE_VMVN: + case MVE_VORN: + case MVE_VORR: + case MVE_VPNOT: + case MVE_VPSEL: + case MVE_VPST: + return 0; + case MVE_VABAVs16: + case MVE_VABAVu16: + case MVE_VABDf16: + case MVE_VABDs16: + case MVE_VABDu16: + case MVE_VABSf16: + case MVE_VABSs16: + case MVE_VADDVs16acc: + case MVE_VADDVs16no_acc: + case MVE_VADDVu16acc: + case MVE_VADDVu16no_acc: + case MVE_VADD_qr_f16: + case MVE_VADD_qr_i16: + case MVE_VADDf16: + case MVE_VADDi16: + case MVE_VBICimmi16: + case MVE_VBRSR16: + case MVE_VCADDf16: + case MVE_VCADDi16: + case MVE_VCLSs16: + case MVE_VCLZs16: + case MVE_VCMLAf16: + case MVE_VCMPf16: + case MVE_VCMPf16r: + case MVE_VCMPi16: + case MVE_VCMPi16r: + case MVE_VCMPs16: + case MVE_VCMPs16r: + case MVE_VCMPu16: + case MVE_VCMPu16r: + case MVE_VCMULf16: + case MVE_VCTP16: + case MVE_VCVTf16s16_fix: + case MVE_VCVTf16s16n: + case MVE_VCVTf16u16_fix: + case MVE_VCVTf16u16n: + case MVE_VCVTs16f16_fix: + case MVE_VCVTs16f16a: + case MVE_VCVTs16f16m: + case MVE_VCVTs16f16n: + case MVE_VCVTs16f16p: + case MVE_VCVTs16f16z: + case MVE_VCVTu16f16_fix: + case MVE_VCVTu16f16a: + case MVE_VCVTu16f16m: + case MVE_VCVTu16f16n: + case MVE_VCVTu16f16p: + case MVE_VCVTu16f16z: + case MVE_VDDUPu16: + case MVE_VDUP16: + case MVE_VDWDUPu16: + case MVE_VFMA_qr_Sf16: + case MVE_VFMA_qr_f16: + case MVE_VFMAf16: + case MVE_VFMSf16: + case MVE_VHADD_qr_s16: + case MVE_VHADD_qr_u16: + case MVE_VHADDs16: + case MVE_VHADDu16: + case MVE_VHCADDs16: + case MVE_VHSUB_qr_s16: + case MVE_VHSUB_qr_u16: + case MVE_VHSUBs16: + case MVE_VHSUBu16: + case MVE_VIDUPu16: + case MVE_VIWDUPu16: + case MVE_VLD20_16: + case MVE_VLD20_16_wb: + case MVE_VLD21_16: + case MVE_VLD21_16_wb: + case MVE_VLD40_16: + case MVE_VLD40_16_wb: + case MVE_VLD41_16: + case MVE_VLD41_16_wb: + case MVE_VLD42_16: + case MVE_VLD42_16_wb: + case MVE_VLD43_16: + case MVE_VLD43_16_wb: + case MVE_VLDRBS16: + case MVE_VLDRBS16_post: + case MVE_VLDRBS16_pre: + case MVE_VLDRBS16_rq: + case MVE_VLDRBU16: + case MVE_VLDRBU16_post: + case MVE_VLDRBU16_pre: + case MVE_VLDRBU16_rq: + case MVE_VLDRHU16: + case MVE_VLDRHU16_post: + case MVE_VLDRHU16_pre: + case MVE_VLDRHU16_rq: + case MVE_VLDRHU16_rq_u: + case MVE_VMAXAVs16: + case MVE_VMAXAs16: + case MVE_VMAXNMAVf16: + case MVE_VMAXNMAf16: + case MVE_VMAXNMVf16: + case MVE_VMAXNMf16: + case MVE_VMAXVs16: + case MVE_VMAXVu16: + case MVE_VMAXs16: + case MVE_VMAXu16: + case MVE_VMINAVs16: + case MVE_VMINAs16: + case MVE_VMINNMAVf16: + case MVE_VMINNMAf16: + case MVE_VMINNMVf16: + case MVE_VMINNMf16: + case MVE_VMINVs16: + case MVE_VMINVu16: + case MVE_VMINs16: + case MVE_VMINu16: + case MVE_VMLADAVas16: + case MVE_VMLADAVau16: + case MVE_VMLADAVaxs16: + case MVE_VMLADAVs16: + case MVE_VMLADAVu16: + case MVE_VMLADAVxs16: + case MVE_VMLALDAVas16: + case MVE_VMLALDAVau16: + case MVE_VMLALDAVaxs16: + case MVE_VMLALDAVs16: + case MVE_VMLALDAVu16: + case MVE_VMLALDAVxs16: + case MVE_VMLAS_qr_s16: + case MVE_VMLAS_qr_u16: + case MVE_VMLA_qr_s16: + case MVE_VMLA_qr_u16: + case MVE_VMLSDAVas16: + case MVE_VMLSDAVaxs16: + case MVE_VMLSDAVs16: + case MVE_VMLSDAVxs16: + case MVE_VMLSLDAVas16: + case MVE_VMLSLDAVaxs16: + case MVE_VMLSLDAVs16: + case MVE_VMLSLDAVxs16: + case MVE_VMOVNi16bh: + case MVE_VMOVNi16th: + case MVE_VMOV_from_lane_s16: + case MVE_VMOV_from_lane_u16: + case MVE_VMOV_to_lane_16: + case MVE_VMOVimmi16: + case MVE_VMOVLs8bh: + case MVE_VMOVLs8th: + case MVE_VMOVLu8bh: + case MVE_VMOVLu8th: + case MVE_VMULLBp8: + case MVE_VMULLBs8: + case MVE_VMULLBu8: + case MVE_VMULLTp8: + case MVE_VMULLTs8: + case MVE_VMULLTu8: + case MVE_VMULHs16: + case MVE_VMULHu16: + case MVE_VMUL_qr_f16: + case MVE_VMUL_qr_i16: + case MVE_VMULf16: + case MVE_VMULi16: + case MVE_VMVNimmi16: + case MVE_VNEGf16: + case MVE_VNEGs16: + case MVE_VORRimmi16: + case MVE_VPTv8f16: + case MVE_VPTv8f16r: + case MVE_VPTv8i16: + case MVE_VPTv8i16r: + case MVE_VPTv8s16: + case MVE_VPTv8s16r: + case MVE_VPTv8u16: + case MVE_VPTv8u16r: + case MVE_VQABSs16: + case MVE_VQADD_qr_s16: + case MVE_VQADD_qr_u16: + case MVE_VQADDs16: + case MVE_VQADDu16: + case MVE_VQDMLADHXs16: + case MVE_VQDMLADHs16: + case MVE_VQDMLAH_qrs16: + case MVE_VQDMLASH_qrs16: + case MVE_VQDMLSDHXs16: + case MVE_VQDMLSDHs16: + case MVE_VQDMULH_qr_s16: + case MVE_VQDMULHi16: + case MVE_VQDMULL_qr_s16bh: + case MVE_VQDMULL_qr_s16th: + case MVE_VQDMULLs16bh: + case MVE_VQDMULLs16th: + case MVE_VQMOVNs16bh: + case MVE_VQMOVNs16th: + case MVE_VQMOVNu16bh: + case MVE_VQMOVNu16th: + case MVE_VQMOVUNs16bh: + case MVE_VQMOVUNs16th: + case MVE_VQNEGs16: + case MVE_VQRDMLADHXs16: + case MVE_VQRDMLADHs16: + case MVE_VQRDMLAH_qrs16: + case MVE_VQRDMLASH_qrs16: + case MVE_VQRDMLSDHXs16: + case MVE_VQRDMLSDHs16: + case MVE_VQRDMULH_qr_s16: + case MVE_VQRDMULHi16: + case MVE_VQRSHL_by_vecs16: + case MVE_VQRSHL_by_vecu16: + case MVE_VQRSHL_qrs16: + case MVE_VQRSHL_qru16: + case MVE_VQRSHRNbhs16: + case MVE_VQRSHRNbhu16: + case MVE_VQRSHRNths16: + case MVE_VQRSHRNthu16: + case MVE_VQRSHRUNs16bh: + case MVE_VQRSHRUNs16th: + case MVE_VQSHLU_imms16: + case MVE_VQSHL_by_vecs16: + case MVE_VQSHL_by_vecu16: + case MVE_VQSHL_qrs16: + case MVE_VQSHL_qru16: + case MVE_VQSHLimms16: + case MVE_VQSHLimmu16: + case MVE_VQSHRNbhs16: + case MVE_VQSHRNbhu16: + case MVE_VQSHRNths16: + case MVE_VQSHRNthu16: + case MVE_VQSHRUNs16bh: + case MVE_VQSHRUNs16th: + case MVE_VQSUB_qr_s16: + case MVE_VQSUB_qr_u16: + case MVE_VQSUBs16: + case MVE_VQSUBu16: + case MVE_VREV16_8: + case MVE_VRHADDs16: + case MVE_VRHADDu16: + case MVE_VRINTf16A: + case MVE_VRINTf16M: + case MVE_VRINTf16N: + case MVE_VRINTf16P: + case MVE_VRINTf16X: + case MVE_VRINTf16Z: + case MVE_VRMULHs16: + case MVE_VRMULHu16: + case MVE_VRSHL_by_vecs16: + case MVE_VRSHL_by_vecu16: + case MVE_VRSHL_qrs16: + case MVE_VRSHL_qru16: + case MVE_VRSHRNi16bh: + case MVE_VRSHRNi16th: + case MVE_VRSHR_imms16: + case MVE_VRSHR_immu16: + case MVE_VSHLL_imms8bh: + case MVE_VSHLL_imms8th: + case MVE_VSHLL_immu8bh: + case MVE_VSHLL_immu8th: + case MVE_VSHLL_lws8bh: + case MVE_VSHLL_lws8th: + case MVE_VSHLL_lwu8bh: + case MVE_VSHLL_lwu8th: + case MVE_VSHL_by_vecs16: + case MVE_VSHL_by_vecu16: + case MVE_VSHL_immi16: + case MVE_VSHL_qrs16: + case MVE_VSHL_qru16: + case MVE_VSHRNi16bh: + case MVE_VSHRNi16th: + case MVE_VSHR_imms16: + case MVE_VSHR_immu16: + case MVE_VSLIimm16: + case MVE_VSRIimm16: + case MVE_VST20_16: + case MVE_VST20_16_wb: + case MVE_VST21_16: + case MVE_VST21_16_wb: + case MVE_VST40_16: + case MVE_VST40_16_wb: + case MVE_VST41_16: + case MVE_VST41_16_wb: + case MVE_VST42_16: + case MVE_VST42_16_wb: + case MVE_VST43_16: + case MVE_VST43_16_wb: + case MVE_VSTRB16: + case MVE_VSTRB16_post: + case MVE_VSTRB16_pre: + case MVE_VSTRB16_rq: + case MVE_VSTRH16_rq: + case MVE_VSTRH16_rq_u: + case MVE_VSTRHU16: + case MVE_VSTRHU16_post: + case MVE_VSTRHU16_pre: + case MVE_VSUB_qr_f16: + case MVE_VSUB_qr_i16: + case MVE_VSUBf16: + case MVE_VSUBi16: + return 1; + case MVE_VABAVs32: + case MVE_VABAVu32: + case MVE_VABDf32: + case MVE_VABDs32: + case MVE_VABDu32: + case MVE_VABSf32: + case MVE_VABSs32: + case MVE_VADC: + case MVE_VADCI: + case MVE_VADDLVs32acc: + case MVE_VADDLVs32no_acc: + case MVE_VADDLVu32acc: + case MVE_VADDLVu32no_acc: + case MVE_VADDVs32acc: + case MVE_VADDVs32no_acc: + case MVE_VADDVu32acc: + case MVE_VADDVu32no_acc: + case MVE_VADD_qr_f32: + case MVE_VADD_qr_i32: + case MVE_VADDf32: + case MVE_VADDi32: + case MVE_VBICimmi32: + case MVE_VBRSR32: + case MVE_VCADDf32: + case MVE_VCADDi32: + case MVE_VCLSs32: + case MVE_VCLZs32: + case MVE_VCMLAf32: + case MVE_VCMPf32: + case MVE_VCMPf32r: + case MVE_VCMPi32: + case MVE_VCMPi32r: + case MVE_VCMPs32: + case MVE_VCMPs32r: + case MVE_VCMPu32: + case MVE_VCMPu32r: + case MVE_VCMULf32: + case MVE_VCTP32: + case MVE_VCVTf16f32bh: + case MVE_VCVTf16f32th: + case MVE_VCVTf32f16bh: + case MVE_VCVTf32f16th: + case MVE_VCVTf32s32_fix: + case MVE_VCVTf32s32n: + case MVE_VCVTf32u32_fix: + case MVE_VCVTf32u32n: + case MVE_VCVTs32f32_fix: + case MVE_VCVTs32f32a: + case MVE_VCVTs32f32m: + case MVE_VCVTs32f32n: + case MVE_VCVTs32f32p: + case MVE_VCVTs32f32z: + case MVE_VCVTu32f32_fix: + case MVE_VCVTu32f32a: + case MVE_VCVTu32f32m: + case MVE_VCVTu32f32n: + case MVE_VCVTu32f32p: + case MVE_VCVTu32f32z: + case MVE_VDDUPu32: + case MVE_VDUP32: + case MVE_VDWDUPu32: + case MVE_VFMA_qr_Sf32: + case MVE_VFMA_qr_f32: + case MVE_VFMAf32: + case MVE_VFMSf32: + case MVE_VHADD_qr_s32: + case MVE_VHADD_qr_u32: + case MVE_VHADDs32: + case MVE_VHADDu32: + case MVE_VHCADDs32: + case MVE_VHSUB_qr_s32: + case MVE_VHSUB_qr_u32: + case MVE_VHSUBs32: + case MVE_VHSUBu32: + case MVE_VIDUPu32: + case MVE_VIWDUPu32: + case MVE_VLD20_32: + case MVE_VLD20_32_wb: + case MVE_VLD21_32: + case MVE_VLD21_32_wb: + case MVE_VLD40_32: + case MVE_VLD40_32_wb: + case MVE_VLD41_32: + case MVE_VLD41_32_wb: + case MVE_VLD42_32: + case MVE_VLD42_32_wb: + case MVE_VLD43_32: + case MVE_VLD43_32_wb: + case MVE_VLDRBS32: + case MVE_VLDRBS32_post: + case MVE_VLDRBS32_pre: + case MVE_VLDRBS32_rq: + case MVE_VLDRBU32: + case MVE_VLDRBU32_post: + case MVE_VLDRBU32_pre: + case MVE_VLDRBU32_rq: + case MVE_VLDRHS32: + case MVE_VLDRHS32_post: + case MVE_VLDRHS32_pre: + case MVE_VLDRHS32_rq: + case MVE_VLDRHS32_rq_u: + case MVE_VLDRHU32: + case MVE_VLDRHU32_post: + case MVE_VLDRHU32_pre: + case MVE_VLDRHU32_rq: + case MVE_VLDRHU32_rq_u: + case MVE_VLDRWU32: + case MVE_VLDRWU32_post: + case MVE_VLDRWU32_pre: + case MVE_VLDRWU32_qi: + case MVE_VLDRWU32_qi_pre: + case MVE_VLDRWU32_rq: + case MVE_VLDRWU32_rq_u: + case MVE_VMAXAVs32: + case MVE_VMAXAs32: + case MVE_VMAXNMAVf32: + case MVE_VMAXNMAf32: + case MVE_VMAXNMVf32: + case MVE_VMAXNMf32: + case MVE_VMAXVs32: + case MVE_VMAXVu32: + case MVE_VMAXs32: + case MVE_VMAXu32: + case MVE_VMINAVs32: + case MVE_VMINAs32: + case MVE_VMINNMAVf32: + case MVE_VMINNMAf32: + case MVE_VMINNMVf32: + case MVE_VMINNMf32: + case MVE_VMINVs32: + case MVE_VMINVu32: + case MVE_VMINs32: + case MVE_VMINu32: + case MVE_VMLADAVas32: + case MVE_VMLADAVau32: + case MVE_VMLADAVaxs32: + case MVE_VMLADAVs32: + case MVE_VMLADAVu32: + case MVE_VMLADAVxs32: + case MVE_VMLALDAVas32: + case MVE_VMLALDAVau32: + case MVE_VMLALDAVaxs32: + case MVE_VMLALDAVs32: + case MVE_VMLALDAVu32: + case MVE_VMLALDAVxs32: + case MVE_VMLAS_qr_s32: + case MVE_VMLAS_qr_u32: + case MVE_VMLA_qr_s32: + case MVE_VMLA_qr_u32: + case MVE_VMLSDAVas32: + case MVE_VMLSDAVaxs32: + case MVE_VMLSDAVs32: + case MVE_VMLSDAVxs32: + case MVE_VMLSLDAVas32: + case MVE_VMLSLDAVaxs32: + case MVE_VMLSLDAVs32: + case MVE_VMLSLDAVxs32: + case MVE_VMOVNi32bh: + case MVE_VMOVNi32th: + case MVE_VMOV_from_lane_32: + case MVE_VMOV_q_rr: + case MVE_VMOV_rr_q: + case MVE_VMOV_to_lane_32: + case MVE_VMOVimmf32: + case MVE_VMOVimmi32: + case MVE_VMOVLs16bh: + case MVE_VMOVLs16th: + case MVE_VMOVLu16bh: + case MVE_VMOVLu16th: + case MVE_VMULHs32: + case MVE_VMULHu32: + case MVE_VMULLBp16: + case MVE_VMULLBs16: + case MVE_VMULLBu16: + case MVE_VMULLTp16: + case MVE_VMULLTs16: + case MVE_VMULLTu16: + case MVE_VMUL_qr_f32: + case MVE_VMUL_qr_i32: + case MVE_VMULf32: + case MVE_VMULi32: + case MVE_VMVNimmi32: + case MVE_VNEGf32: + case MVE_VNEGs32: + case MVE_VORRimmi32: + case MVE_VPTv4f32: + case MVE_VPTv4f32r: + case MVE_VPTv4i32: + case MVE_VPTv4i32r: + case MVE_VPTv4s32: + case MVE_VPTv4s32r: + case MVE_VPTv4u32: + case MVE_VPTv4u32r: + case MVE_VQABSs32: + case MVE_VQADD_qr_s32: + case MVE_VQADD_qr_u32: + case MVE_VQADDs32: + case MVE_VQADDu32: + case MVE_VQDMLADHXs32: + case MVE_VQDMLADHs32: + case MVE_VQDMLAH_qrs32: + case MVE_VQDMLASH_qrs32: + case MVE_VQDMLSDHXs32: + case MVE_VQDMLSDHs32: + case MVE_VQDMULH_qr_s32: + case MVE_VQDMULHi32: + case MVE_VQDMULL_qr_s32bh: + case MVE_VQDMULL_qr_s32th: + case MVE_VQDMULLs32bh: + case MVE_VQDMULLs32th: + case MVE_VQMOVNs32bh: + case MVE_VQMOVNs32th: + case MVE_VQMOVNu32bh: + case MVE_VQMOVNu32th: + case MVE_VQMOVUNs32bh: + case MVE_VQMOVUNs32th: + case MVE_VQNEGs32: + case MVE_VQRDMLADHXs32: + case MVE_VQRDMLADHs32: + case MVE_VQRDMLAH_qrs32: + case MVE_VQRDMLASH_qrs32: + case MVE_VQRDMLSDHXs32: + case MVE_VQRDMLSDHs32: + case MVE_VQRDMULH_qr_s32: + case MVE_VQRDMULHi32: + case MVE_VQRSHL_by_vecs32: + case MVE_VQRSHL_by_vecu32: + case MVE_VQRSHL_qrs32: + case MVE_VQRSHL_qru32: + case MVE_VQRSHRNbhs32: + case MVE_VQRSHRNbhu32: + case MVE_VQRSHRNths32: + case MVE_VQRSHRNthu32: + case MVE_VQRSHRUNs32bh: + case MVE_VQRSHRUNs32th: + case MVE_VQSHLU_imms32: + case MVE_VQSHL_by_vecs32: + case MVE_VQSHL_by_vecu32: + case MVE_VQSHL_qrs32: + case MVE_VQSHL_qru32: + case MVE_VQSHLimms32: + case MVE_VQSHLimmu32: + case MVE_VQSHRNbhs32: + case MVE_VQSHRNbhu32: + case MVE_VQSHRNths32: + case MVE_VQSHRNthu32: + case MVE_VQSHRUNs32bh: + case MVE_VQSHRUNs32th: + case MVE_VQSUB_qr_s32: + case MVE_VQSUB_qr_u32: + case MVE_VQSUBs32: + case MVE_VQSUBu32: + case MVE_VREV32_16: + case MVE_VREV32_8: + case MVE_VRHADDs32: + case MVE_VRHADDu32: + case MVE_VRINTf32A: + case MVE_VRINTf32M: + case MVE_VRINTf32N: + case MVE_VRINTf32P: + case MVE_VRINTf32X: + case MVE_VRINTf32Z: + case MVE_VRMLALDAVHas32: + case MVE_VRMLALDAVHau32: + case MVE_VRMLALDAVHaxs32: + case MVE_VRMLALDAVHs32: + case MVE_VRMLALDAVHu32: + case MVE_VRMLALDAVHxs32: + case MVE_VRMLSLDAVHas32: + case MVE_VRMLSLDAVHaxs32: + case MVE_VRMLSLDAVHs32: + case MVE_VRMLSLDAVHxs32: + case MVE_VRMULHs32: + case MVE_VRMULHu32: + case MVE_VRSHL_by_vecs32: + case MVE_VRSHL_by_vecu32: + case MVE_VRSHL_qrs32: + case MVE_VRSHL_qru32: + case MVE_VRSHRNi32bh: + case MVE_VRSHRNi32th: + case MVE_VRSHR_imms32: + case MVE_VRSHR_immu32: + case MVE_VSBC: + case MVE_VSBCI: + case MVE_VSHLC: + case MVE_VSHLL_imms16bh: + case MVE_VSHLL_imms16th: + case MVE_VSHLL_immu16bh: + case MVE_VSHLL_immu16th: + case MVE_VSHLL_lws16bh: + case MVE_VSHLL_lws16th: + case MVE_VSHLL_lwu16bh: + case MVE_VSHLL_lwu16th: + case MVE_VSHL_by_vecs32: + case MVE_VSHL_by_vecu32: + case MVE_VSHL_immi32: + case MVE_VSHL_qrs32: + case MVE_VSHL_qru32: + case MVE_VSHRNi32bh: + case MVE_VSHRNi32th: + case MVE_VSHR_imms32: + case MVE_VSHR_immu32: + case MVE_VSLIimm32: + case MVE_VSRIimm32: + case MVE_VST20_32: + case MVE_VST20_32_wb: + case MVE_VST21_32: + case MVE_VST21_32_wb: + case MVE_VST40_32: + case MVE_VST40_32_wb: + case MVE_VST41_32: + case MVE_VST41_32_wb: + case MVE_VST42_32: + case MVE_VST42_32_wb: + case MVE_VST43_32: + case MVE_VST43_32_wb: + case MVE_VSTRB32: + case MVE_VSTRB32_post: + case MVE_VSTRB32_pre: + case MVE_VSTRB32_rq: + case MVE_VSTRH32: + case MVE_VSTRH32_post: + case MVE_VSTRH32_pre: + case MVE_VSTRH32_rq: + case MVE_VSTRH32_rq_u: + case MVE_VSTRW32_qi: + case MVE_VSTRW32_qi_pre: + case MVE_VSTRW32_rq: + case MVE_VSTRW32_rq_u: + case MVE_VSTRWU32: + case MVE_VSTRWU32_post: + case MVE_VSTRWU32_pre: + case MVE_VSUB_qr_f32: + case MVE_VSUB_qr_i32: + case MVE_VSUBf32: + case MVE_VSUBi32: + return 2; + case MVE_VCTP64: + case MVE_VLDRDU64_qi: + case MVE_VLDRDU64_qi_pre: + case MVE_VLDRDU64_rq: + case MVE_VLDRDU64_rq_u: + case MVE_VMULLBs32: + case MVE_VMULLBu32: + case MVE_VMULLTs32: + case MVE_VMULLTu32: + case MVE_VMOVimmi64: + case MVE_VREV64_16: + case MVE_VREV64_32: + case MVE_VREV64_8: + case MVE_VSTRD64_qi: + case MVE_VSTRD64_qi_pre: + case MVE_VSTRD64_rq: + case MVE_VSTRD64_rq_u: + return 3; + } + }; + LLVMInitializeARMTargetInfo(); + LLVMInitializeARMTarget(); + LLVMInitializeARMTargetMC(); + + auto TT(Triple::normalize("thumbv8.1m.main-none-none-eabi")); + std::string Error; + const Target *T = TargetRegistry::lookupTarget(TT, Error); + if (!T) { + dbgs() << Error; + return; + } + + TargetOptions Options; + auto TM = std::unique_ptr( + static_cast( + T->createTargetMachine(TT, "generic", "", Options, None, None, + CodeGenOpt::Default))); + ARMSubtarget ST(TM->getTargetTriple(), std::string(TM->getTargetCPU()), + std::string(TM->getTargetFeatureString()), + *static_cast(TM.get()), false); + + auto MII = TM->getMCInstrInfo(); + for (unsigned i = 0; i < ARM::INSTRUCTION_LIST_END; ++i) { + uint64_t Flags = MII->get(i).TSFlags; + if ((Flags & ARMII::DomainMask) != ARMII::DomainMVE) + continue; + int Size = (Flags & ARMII::VecSize) >> ARMII::VecSizeShift; + ASSERT_EQ(MVEVecSize(i), Size) + << MII->getName(i) + << ": mismatched expectation for MVE vec size\n"; + } +} \ No newline at end of file -- 2.7.4