From a8d63dc2896a555a5efa19dea312f245fdf10ba2 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Sat, 5 Nov 2016 15:01:38 +0000 Subject: [PATCH] [Hexagon] Split all selection patterns into a separate file This is just the basic separation, without any cleanup. Further changes will follow. llvm-svn: 286036 --- llvm/lib/Target/Hexagon/Hexagon.td | 1 + llvm/lib/Target/Hexagon/HexagonInstrInfo.td | 1002 +------ llvm/lib/Target/Hexagon/HexagonInstrInfoV3.td | 60 +- llvm/lib/Target/Hexagon/HexagonInstrInfoV4.td | 1145 +------ llvm/lib/Target/Hexagon/HexagonInstrInfoV5.td | 542 +--- llvm/lib/Target/Hexagon/HexagonInstrInfoV60.td | 219 -- llvm/lib/Target/Hexagon/HexagonInstrInfoVector.td | 414 +-- llvm/lib/Target/Hexagon/HexagonPatterns.td | 3273 +++++++++++++++++++++ 8 files changed, 3359 insertions(+), 3297 deletions(-) create mode 100644 llvm/lib/Target/Hexagon/HexagonPatterns.td diff --git a/llvm/lib/Target/Hexagon/Hexagon.td b/llvm/lib/Target/Hexagon/Hexagon.td index f6a2e4f..5e083f0 100644 --- a/llvm/lib/Target/Hexagon/Hexagon.td +++ b/llvm/lib/Target/Hexagon/Hexagon.td @@ -250,6 +250,7 @@ include "HexagonSchedule.td" include "HexagonRegisterInfo.td" include "HexagonCallingConv.td" include "HexagonInstrInfo.td" +include "HexagonPatterns.td" include "HexagonIntrinsics.td" include "HexagonIntrinsicsDerived.td" diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.td b/llvm/lib/Target/Hexagon/HexagonInstrInfo.td index bf26987..97aedaf 100644 --- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.td +++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.td @@ -14,54 +14,6 @@ include "HexagonInstrFormats.td" include "HexagonOperands.td" include "HexagonInstrEnc.td" -// Pattern fragment that combines the value type and the register class -// into a single parameter. -// The pat frags in the definitions below need to have a named register, -// otherwise i32 will be assumed regardless of the register class. The -// name of the register does not matter. -def I1 : PatLeaf<(i1 PredRegs:$R)>; -def I32 : PatLeaf<(i32 IntRegs:$R)>; -def I64 : PatLeaf<(i64 DoubleRegs:$R)>; -def F32 : PatLeaf<(f32 IntRegs:$R)>; -def F64 : PatLeaf<(f64 DoubleRegs:$R)>; - -// Pattern fragments to extract the low and high subregisters from a -// 64-bit value. -def LoReg: OutPatFrag<(ops node:$Rs), - (EXTRACT_SUBREG (i64 $Rs), subreg_loreg)>; -def HiReg: OutPatFrag<(ops node:$Rs), - (EXTRACT_SUBREG (i64 $Rs), subreg_hireg)>; - -def orisadd: PatFrag<(ops node:$Addr, node:$off), - (or node:$Addr, node:$off), [{ return orIsAdd(N); }]>; - -// SDNode for converting immediate C to C-1. -def DEC_CONST_SIGNED : SDNodeXFormgetSExtValue(); - return XformSToSM1Imm(imm, SDLoc(N)); -}]>; - -// SDNode for converting immediate C to C-2. -def DEC2_CONST_SIGNED : SDNodeXFormgetSExtValue(); - return XformSToSM2Imm(imm, SDLoc(N)); -}]>; - -// SDNode for converting immediate C to C-3. -def DEC3_CONST_SIGNED : SDNodeXFormgetSExtValue(); - return XformSToSM3Imm(imm, SDLoc(N)); -}]>; - -// SDNode for converting immediate C to C-1. -def DEC_CONST_UNSIGNED : SDNodeXFormgetZExtValue(); - return XformUToUM1Imm(imm, SDLoc(N)); -}]>; //===----------------------------------------------------------------------===// // Compare @@ -96,28 +48,11 @@ def C2_cmpeqi : T_CMP <"cmp.eq", 0b00, 0, s10_0Ext>; def C2_cmpgti : T_CMP <"cmp.gt", 0b01, 0, s10_0Ext>; def C2_cmpgtui : T_CMP <"cmp.gtu", 0b10, 0, u9_0Ext>; -class T_CMP_pat - : Pat<(i1 (OpNode (i32 IntRegs:$src1), ImmPred:$src2)), - (MI IntRegs:$src1, ImmPred:$src2)>; - -def : T_CMP_pat ; -def : T_CMP_pat ; -def : T_CMP_pat ; - //===----------------------------------------------------------------------===// // ALU32/ALU + //===----------------------------------------------------------------------===// // Add. -def SDT_Int32Leaf : SDTypeProfile<1, 0, [SDTCisVT<0, i32>]>; -def SDT_Int32Unary : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>; - -def SDTHexagonI64I32I32 : SDTypeProfile<1, 2, - [SDTCisVT<0, i64>, SDTCisVT<1, i32>, SDTCisSameAs<1, 2>]>; - -def HexagonCOMBINE : SDNode<"HexagonISD::COMBINE", SDTHexagonI64I32I32>; -def HexagonPACKHL : SDNode<"HexagonISD::PACKHL", SDTHexagonI64I32I32>; - let hasSideEffects = 0, hasNewValue = 1, InputType = "reg" in class T_ALU32_3op MajOp, bits<3> MinOp, bit OpsRev, bit IsComm> @@ -227,17 +162,6 @@ defm or : T_ALU32_3op_A2<"or", 0b001, 0b001, 0, 1>; defm sub : T_ALU32_3op_A2<"sub", 0b011, 0b001, 1, 0>; defm xor : T_ALU32_3op_A2<"xor", 0b001, 0b011, 0, 1>; -// Pats for instruction selection. -class BinOp32_pat - : Pat<(ResT (Op (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))), - (ResT (MI IntRegs:$Rs, IntRegs:$Rt))>; - -def: BinOp32_pat; -def: BinOp32_pat; -def: BinOp32_pat; -def: BinOp32_pat; -def: BinOp32_pat; - // A few special cases producing register pairs: let OutOperandList = (outs DoubleRegs:$Rd), hasNewValue = 0 in { def S2_packhl : T_ALU32_3op <"packhl", 0b101, 0b100, 0, 0>; @@ -252,9 +176,6 @@ let OutOperandList = (outs DoubleRegs:$Rd), hasNewValue = 0 in { def C2_ccombinewnewf : T_ALU32_3op_pred<"combine", 0b101, 0b000, 0, 1, 1>; } -def: BinOp32_pat; -def: BinOp32_pat; - let hasSideEffects = 0, hasNewValue = 1, isCompare = 1, InputType = "reg" in class T_ALU32_3op_cmp MinOp, bit IsNeg, bit IsComm> : ALU32_rr<(outs PredRegs:$Pd), (ins IntRegs:$Rs, IntRegs:$Rt), @@ -282,23 +203,6 @@ let Itinerary = ALU32_3op_tc_2early_SLOT0123 in { def C2_cmpgtu : T_ALU32_3op_cmp< "cmp.gtu", 0b11, 0, 0>; } -// Patfrag to convert the usual comparison patfrags (e.g. setlt) to ones -// that reverse the order of the operands. -class RevCmp : PatFrag<(ops node:$rhs, node:$lhs), F.Fragment>; - -// Pats for compares. They use PatFrags as operands, not SDNodes, -// since seteq/setgt/etc. are defined as ParFrags. -class T_cmp32_rr_pat - : Pat<(VT (Op (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))), - (VT (MI IntRegs:$Rs, IntRegs:$Rt))>; - -def: T_cmp32_rr_pat; -def: T_cmp32_rr_pat; -def: T_cmp32_rr_pat; - -def: T_cmp32_rr_pat, i1>; -def: T_cmp32_rr_pat, i1>; - let CextOpcode = "MUX", InputType = "reg", hasNewValue = 1 in def C2_mux: ALU32_rr<(outs IntRegs:$Rd), (ins PredRegs:$Pu, IntRegs:$Rs, IntRegs:$Rt), @@ -320,9 +224,6 @@ def C2_mux: ALU32_rr<(outs IntRegs:$Rd), let Inst{4-0} = Rd; } -def: Pat<(i32 (select (i1 PredRegs:$Pu), (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))), - (C2_mux PredRegs:$Pu, IntRegs:$Rs, IntRegs:$Rt)>; - // Combines the two immediates into a double register. // Increase complexity to make it greater than any complexity of a combine // that involves a register. @@ -332,8 +233,7 @@ let isReMaterializable = 1, isMoveImm = 1, isAsCheapAsAMove = 1, AddedComplexity = 75 in def A2_combineii: ALU32Inst <(outs DoubleRegs:$Rdd), (ins s8_0Ext:$s8, s8_0Imm:$S8), "$Rdd = combine(#$s8, #$S8)", - [(set (i64 DoubleRegs:$Rdd), - (i64 (HexagonCOMBINE(i32 s32_0ImmPred:$s8), (i32 s8_0ImmPred:$S8))))]> { + []> { bits<5> Rdd; bits<8> s8; bits<8> S8; @@ -418,9 +318,6 @@ multiclass Addri_base { defm addi : Addri_base<"add", add>, ImmRegRel, PredNewRel; -def: Pat<(i32 (add I32:$Rs, s32_0ImmPred:$s16)), - (i32 (A2_addi I32:$Rs, imm:$s16))>; - let hasNewValue = 1, hasSideEffects = 0, isPseudo = 1 in def A2_iconst : ALU32_ri <(outs IntRegs:$Rd), @@ -438,7 +335,7 @@ class T_ALU32ri_logical MinOp> : ALU32_ri <(outs IntRegs:$Rd), (ins IntRegs:$Rs, s10_0Ext:$s10), "$Rd = "#mnemonic#"($Rs, #$s10)" , - [(set (i32 IntRegs:$Rd), (OpNode (i32 IntRegs:$Rs), s32_0ImmPred:$s10))]> { + []> { bits<5> Rd; bits<5> Rs; bits<10> s10; @@ -483,13 +380,6 @@ def A2_nop: ALU32Inst <(outs), (ins), "nop" > { let Inst{27-24} = 0b1111; } -def: Pat<(sub s32_0ImmPred:$s10, IntRegs:$Rs), - (A2_subri imm:$s10, IntRegs:$Rs)>; - -// Rd = not(Rs) gets mapped to Rd=sub(#-1, Rs). -def: Pat<(not (i32 IntRegs:$src1)), - (A2_subri -1, IntRegs:$src1)>; - let hasSideEffects = 0, hasNewValue = 1 in class T_tfr16 : ALU32Inst <(outs IntRegs:$Rx), (ins IntRegs:$src1, u16_0Imm:$u16), @@ -631,7 +521,7 @@ let InputType = "imm", isExtendable = 1, isExtentSigned = 1, isAsCheapAsAMove = 1 , opExtendable = 1, opExtentBits = 16, isMoveImm = 1, isPredicated = 0, isPredicable = 1, isReMaterializable = 1 in def A2_tfrsi : ALU32Inst<(outs IntRegs:$Rd), (ins s16_0Ext:$s16), "$Rd = #$s16", - [(set (i32 IntRegs:$Rd), s32_0ImmPred:$s16)], "", ALU32_2op_tc_1_SLOT0123>, + [], "", ALU32_2op_tc_1_SLOT0123>, ImmRegRel, PredRel { bits<5> Rd; bits<16> s16; @@ -651,7 +541,7 @@ let isReMaterializable = 1, isMoveImm = 1, isAsCheapAsAMove = 1, isAsmParserOnly = 1 in def A2_tfrpi : ALU64_rr<(outs DoubleRegs:$dst), (ins s8_0Imm64:$src1), "$dst = #$src1", - [(set (i64 DoubleRegs:$dst), s8_0Imm64Pred:$src1)]>; + []>; // TODO: see if this instruction can be deleted.. let isExtendable = 1, opExtendable = 1, opExtentBits = 6, @@ -699,20 +589,13 @@ let opExtendable = 3 in def C2_muxir : T_MUX1<0b0, (ins PredRegs:$Pu, IntRegs:$Rs, s8_0Ext:$s8), "$Rd = mux($Pu, $Rs, #$s8)">; -def : Pat<(i32 (select I1:$Pu, s32_0ImmPred:$s8, I32:$Rs)), - (C2_muxri I1:$Pu, s32_0ImmPred:$s8, I32:$Rs)>; - -def : Pat<(i32 (select I1:$Pu, I32:$Rs, s32_0ImmPred:$s8)), - (C2_muxir I1:$Pu, I32:$Rs, s32_0ImmPred:$s8)>; - // C2_muxii: Scalar mux immediates. let isExtentSigned = 1, hasNewValue = 1, isExtendable = 1, opExtentBits = 8, opExtendable = 2 in def C2_muxii: ALU32Inst <(outs IntRegs:$Rd), (ins PredRegs:$Pu, s8_0Ext:$s8, s8_0Imm:$S8), "$Rd = mux($Pu, #$s8, #$S8)" , - [(set (i32 IntRegs:$Rd), - (i32 (select I1:$Pu, s32_0ImmPred:$s8, s8_0ImmPred:$S8)))] > { + []> { bits<5> Rd; bits<2> Pu; bits<8> s8; @@ -845,11 +728,6 @@ multiclass ZXTB_base minOp> { defm zxtb : ZXTB_base<"zxtb",0b100>, PredNewRel; -def: Pat<(shl I32:$src1, (i32 16)), (A2_aslh I32:$src1)>; -def: Pat<(sra I32:$src1, (i32 16)), (A2_asrh I32:$src1)>; -def: Pat<(sext_inreg I32:$src1, i8), (A2_sxtb I32:$src1)>; -def: Pat<(sext_inreg I32:$src1, i16), (A2_sxth I32:$src1)>; - //===----------------------------------------------------------------------===// // Template class for vector add and avg //===----------------------------------------------------------------------===// @@ -980,10 +858,6 @@ class T_vcmp minOp> let Inst{12-8} = Rtt; } -class T_vcmp_pat - : Pat<(i1 (Op (T DoubleRegs:$Rss), (T DoubleRegs:$Rtt))), - (i1 (MI DoubleRegs:$Rss, DoubleRegs:$Rtt))>; - // Vector compare bytes def A2_vcmpbeq : T_vcmp <"vcmpb.eq", 0b0110>; def A2_vcmpbgtu : T_vcmp <"vcmpb.gtu", 0b0111>; @@ -998,15 +872,6 @@ def A2_vcmpweq : T_vcmp <"vcmpw.eq", 0b0000>; def A2_vcmpwgt : T_vcmp <"vcmpw.gt", 0b0001>; def A2_vcmpwgtu : T_vcmp <"vcmpw.gtu", 0b0010>; -def: T_vcmp_pat; -def: T_vcmp_pat; -def: T_vcmp_pat; -def: T_vcmp_pat; -def: T_vcmp_pat; -def: T_vcmp_pat; -def: T_vcmp_pat; -def: T_vcmp_pat; - //===----------------------------------------------------------------------===// // ALU32/PERM - //===----------------------------------------------------------------------===// @@ -1112,23 +977,6 @@ let Itinerary = ALU64_tc_2_SLOT23, Defs = [USR_OVF] in { def A2_addh_h16_sat_hh : T_XTYPE_ADD_SUB <0b11, 1, 1, 0>; } -// Add halfword. -def: Pat<(sext_inreg (add I32:$src1, I32:$src2), i16), - (A2_addh_l16_ll I32:$src1, I32:$src2)>; - -def: Pat<(sra (add (shl I32:$src1, (i32 16)), I32:$src2), (i32 16)), - (A2_addh_l16_hl I32:$src1, I32:$src2)>; - -def: Pat<(shl (add I32:$src1, I32:$src2), (i32 16)), - (A2_addh_h16_ll I32:$src1, I32:$src2)>; - -// Subtract halfword. -def: Pat<(sext_inreg (sub I32:$src1, I32:$src2), i16), - (A2_subh_l16_ll I32:$src1, I32:$src2)>; - -def: Pat<(shl (sub I32:$src1, I32:$src2), (i32 16)), - (A2_subh_h16_ll I32:$src1, I32:$src2)>; - let hasSideEffects = 0, hasNewValue = 1 in def S2_parityp: ALU64Inst<(outs IntRegs:$Rd), (ins DoubleRegs:$Rs, DoubleRegs:$Rt), @@ -1168,52 +1016,6 @@ def A2_minu : T_XTYPE_MIN_MAX < 0, 1 >; def A2_max : T_XTYPE_MIN_MAX < 1, 0 >; def A2_maxu : T_XTYPE_MIN_MAX < 1, 1 >; -// Here, depending on the operand being selected, we'll either generate a -// min or max instruction. -// Ex: -// (a>b)?a:b --> max(a,b) => Here check performed is '>' and the value selected -// is the larger of two. So, the corresponding HexagonInst is passed in 'Inst'. -// (a>b)?b:a --> min(a,b) => Here check performed is '>' but the smaller value -// is selected and the corresponding HexagonInst is passed in 'SwapInst'. - -multiclass T_MinMax_pats { - def: Pat<(select (i1 (Op (VT RC:$src1), (VT RC:$src2))), - (VT RC:$src1), (VT RC:$src2)), - (Inst RC:$src1, RC:$src2)>; - def: Pat<(select (i1 (Op (VT RC:$src1), (VT RC:$src2))), - (VT RC:$src2), (VT RC:$src1)), - (SwapInst RC:$src1, RC:$src2)>; -} - - -multiclass MinMax_pats { - defm: T_MinMax_pats; - - def: Pat<(sext_inreg (i32 (select (i1 (Op (i32 PositiveHalfWord:$src1), - (i32 PositiveHalfWord:$src2))), - (i32 PositiveHalfWord:$src1), - (i32 PositiveHalfWord:$src2))), i16), - (Inst IntRegs:$src1, IntRegs:$src2)>; - - def: Pat<(sext_inreg (i32 (select (i1 (Op (i32 PositiveHalfWord:$src1), - (i32 PositiveHalfWord:$src2))), - (i32 PositiveHalfWord:$src2), - (i32 PositiveHalfWord:$src1))), i16), - (SwapInst IntRegs:$src1, IntRegs:$src2)>; -} - -let AddedComplexity = 200 in { - defm: MinMax_pats; - defm: MinMax_pats; - defm: MinMax_pats; - defm: MinMax_pats; - defm: MinMax_pats; - defm: MinMax_pats; - defm: MinMax_pats; - defm: MinMax_pats; -} - class T_cmp64_rr MinOp, bit IsComm> : ALU64_rr<(outs PredRegs:$Pd), (ins DoubleRegs:$Rs, DoubleRegs:$Rt), "$Pd = "#mnemonic#"($Rs, $Rt)", [], "", ALU64_tc_2early_SLOT23> { @@ -1237,16 +1039,6 @@ def C2_cmpeqp : T_cmp64_rr<"cmp.eq", 0b000, 1>; def C2_cmpgtp : T_cmp64_rr<"cmp.gt", 0b010, 0>; def C2_cmpgtup : T_cmp64_rr<"cmp.gtu", 0b100, 0>; -class T_cmp64_rr_pat - : Pat<(i1 (CmpOp (i64 DoubleRegs:$Rs), (i64 DoubleRegs:$Rt))), - (i1 (MI DoubleRegs:$Rs, DoubleRegs:$Rt))>; - -def: T_cmp64_rr_pat; -def: T_cmp64_rr_pat; -def: T_cmp64_rr_pat; -def: T_cmp64_rr_pat>; -def: T_cmp64_rr_pat>; - def C2_vmux : ALU64_rr<(outs DoubleRegs:$Rd), (ins PredRegs:$Pu, DoubleRegs:$Rs, DoubleRegs:$Rt), "$Rd = vmux($Pu, $Rs, $Rt)", [], "", ALU64_tc_1_SLOT23> { @@ -1296,9 +1088,6 @@ let isAdd = 1 in def A2_addp : T_ALU64_arith<"add", 0b000, 0b111, 0, 0, 1>; def A2_subp : T_ALU64_arith<"sub", 0b001, 0b111, 0, 1, 0>; -def: Pat<(i64 (add I64:$Rs, I64:$Rt)), (A2_addp I64:$Rs, I64:$Rt)>; -def: Pat<(i64 (sub I64:$Rs, I64:$Rt)), (A2_subp I64:$Rs, I64:$Rt)>; - class T_ALU64_logical MinOp, bit OpsRev, bit IsComm, bit IsNeg> : T_ALU64_rr; def A2_orp : T_ALU64_logical<"or", 0b010, 0, 1, 0>; def A2_xorp : T_ALU64_logical<"xor", 0b100, 0, 1, 0>; -def: Pat<(i64 (and I64:$Rs, I64:$Rt)), (A2_andp I64:$Rs, I64:$Rt)>; -def: Pat<(i64 (or I64:$Rs, I64:$Rt)), (A2_orp I64:$Rs, I64:$Rt)>; -def: Pat<(i64 (xor I64:$Rs, I64:$Rt)), (A2_xorp I64:$Rs, I64:$Rt)>; - //===----------------------------------------------------------------------===// // ALU64/ALU - //===----------------------------------------------------------------------===// @@ -1362,9 +1147,6 @@ def C2_any8 : T_LOGICAL_1OP<"any8", 0b00>; def C2_all8 : T_LOGICAL_1OP<"all8", 0b01>; def C2_not : T_LOGICAL_1OP<"not", 0b10>; -def: Pat<(i1 (not (i1 PredRegs:$Ps))), - (C2_not PredRegs:$Ps)>; - let hasSideEffects = 0 in class T_LOGICAL_2OP OpBits, bit IsNeg, bit Rev> : CRInst<(outs PredRegs:$Pd), (ins PredRegs:$Ps, PredRegs:$Pt), @@ -1390,12 +1172,6 @@ def C2_xor : T_LOGICAL_2OP<"xor", 0b010, 0, 0>; def C2_andn : T_LOGICAL_2OP<"and", 0b011, 1, 1>; def C2_orn : T_LOGICAL_2OP<"or", 0b111, 1, 1>; -def: Pat<(i1 (and I1:$Ps, I1:$Pt)), (C2_and I1:$Ps, I1:$Pt)>; -def: Pat<(i1 (or I1:$Ps, I1:$Pt)), (C2_or I1:$Ps, I1:$Pt)>; -def: Pat<(i1 (xor I1:$Ps, I1:$Pt)), (C2_xor I1:$Ps, I1:$Pt)>; -def: Pat<(i1 (and I1:$Ps, (not I1:$Pt))), (C2_andn I1:$Ps, I1:$Pt)>; -def: Pat<(i1 (or I1:$Ps, (not I1:$Pt))), (C2_orn I1:$Ps, I1:$Pt)>; - let hasSideEffects = 0, hasNewValue = 1 in def C2_vitpack : SInst<(outs IntRegs:$Rd), (ins PredRegs:$Ps, PredRegs:$Pt), "$Rd = vitpack($Ps, $Pt)", [], "", S_2op_tc_1_SLOT23> { @@ -1432,10 +1208,6 @@ def C2_mask : SInst<(outs DoubleRegs:$Rd), (ins PredRegs:$Pt), // JR + //===----------------------------------------------------------------------===// -def retflag : SDNode<"HexagonISD::RET_FLAG", SDTNone, - [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; -def eh_return: SDNode<"HexagonISD::EH_RETURN", SDTNone, [SDNPHasChain]>; - class CondStr { string S = "if (" # !if(True,"","!") # CReg # !if(New,".new","") # ") "; } @@ -1611,23 +1383,11 @@ multiclass JMPRpt_base { defm J2_jumpr : JMPRpt_base<"JMPr">; defm J2_jump : JMPpt_base<"JMP">; -def: Pat<(br bb:$dst), - (J2_jump brtarget:$dst)>; -def: Pat<(retflag), - (PS_jmpret (i32 R31))>; -def: Pat<(brcond (i1 PredRegs:$src1), bb:$offset), - (J2_jumpt PredRegs:$src1, bb:$offset)>; - // A return through builtin_eh_return. let isReturn = 1, isTerminator = 1, isBarrier = 1, hasSideEffects = 0, isCodeGenOnly = 1, Defs = [PC], Uses = [R28], isPredicable = 0 in def EH_RETURN_JMPR : T_JMPr; -def: Pat<(eh_return), - (EH_RETURN_JMPR (i32 R31))>; -def: Pat<(brind (i32 IntRegs:$dst)), - (J2_jumpr IntRegs:$dst)>; - //===----------------------------------------------------------------------===// // JR - //===----------------------------------------------------------------------===// @@ -1785,45 +1545,6 @@ def L2_loadalignh_io: T_loadalign_io <"memh_fifo", 0b0010, s11_1Ext>; let accessSize = ByteAccess, opExtentBits = 11 in def L2_loadalignb_io: T_loadalign_io <"memb_fifo", 0b0100, s11_0Ext>; -// Patterns to select load-indexed (i.e. load from base+offset). -multiclass Loadx_pat { - def: Pat<(VT (Load AddrFI:$fi)), (VT (MI AddrFI:$fi, 0))>; - def: Pat<(VT (Load (add (i32 AddrFI:$fi), ImmPred:$Off))), - (VT (MI AddrFI:$fi, imm:$Off))>; - def: Pat<(VT (Load (orisadd (i32 AddrFI:$fi), ImmPred:$Off))), - (VT (MI AddrFI:$fi, imm:$Off))>; - def: Pat<(VT (Load (add (i32 IntRegs:$Rs), ImmPred:$Off))), - (VT (MI IntRegs:$Rs, imm:$Off))>; - def: Pat<(VT (Load (i32 IntRegs:$Rs))), (VT (MI IntRegs:$Rs, 0))>; -} - -let AddedComplexity = 20 in { - defm: Loadx_pat; - defm: Loadx_pat; - defm: Loadx_pat; - defm: Loadx_pat; - defm: Loadx_pat; - defm: Loadx_pat; - - defm: Loadx_pat; - defm: Loadx_pat; - defm: Loadx_pat; - defm: Loadx_pat; - defm: Loadx_pat; - defm: Loadx_pat; - defm: Loadx_pat; - defm: Loadx_pat; - // No sextloadi1. -} - -// Sign-extending loads of i1 need to replicate the lowest bit throughout -// the 32-bit value. Since the loaded value can only be 0 or 1, 0-v should -// do the trick. -let AddedComplexity = 20 in -def: Pat<(i32 (sextloadi1 (i32 IntRegs:$Rs))), - (A2_subri 0, (L2_loadrub_io IntRegs:$Rs, 0))>; - //===----------------------------------------------------------------------===// // Post increment load //===----------------------------------------------------------------------===// @@ -2697,10 +2418,6 @@ def M2_mpy_up_s1_sat : T_MType_rr1 <"mpy", 0b111, 0b000, 1>; def M2_hmmpyh_s1 : T_MType_rr2 <"mpy", 0b101, 0b000, 1, 0, ".h">; def M2_hmmpyl_s1 : T_MType_rr2 <"mpy", 0b101, 0b001, 1, 0, ".l">; -def: Pat<(i32 (mul I32:$src1, I32:$src2)), (M2_mpyi I32:$src1, I32:$src2)>; -def: Pat<(i32 (mulhs I32:$src1, I32:$src2)), (M2_mpy_up I32:$src1, I32:$src2)>; -def: Pat<(i32 (mulhu I32:$src1, I32:$src2)), (M2_mpyu_up I32:$src1, I32:$src2)>; - let hasNewValue = 1, opNewValue = 0 in class T_MType_mpy_ri pattern> : MInst < (outs IntRegs:$Rd), (ins IntRegs:$Rs, ImmOp:$u8), @@ -2721,12 +2438,9 @@ class T_MType_mpy_ri pattern> } let isExtendable = 1, opExtentBits = 8, opExtendable = 2 in -def M2_mpysip : T_MType_mpy_ri <0, u8_0Ext, - [(set (i32 IntRegs:$Rd), (mul IntRegs:$Rs, u32_0ImmPred:$u8))]>; +def M2_mpysip : T_MType_mpy_ri <0, u8_0Ext, []>; -def M2_mpysin : T_MType_mpy_ri <1, u8_0Imm, - [(set (i32 IntRegs:$Rd), (ineg (mul IntRegs:$Rs, - u8_0ImmPred:$u8)))]>; +def M2_mpysin : T_MType_mpy_ri <1, u8_0Imm, []>; // Assember mapped to M2_mpyi let isAsmParserOnly = 1 in @@ -2742,9 +2456,7 @@ let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 9, CextOpcode = "mpyi", InputType = "imm", hasNewValue = 1, isAsmParserOnly = 1 in def M2_mpysmi : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, s9_0Ext:$src2), - "$dst = mpyi($src1, #$src2)", - [(set (i32 IntRegs:$dst), (mul (i32 IntRegs:$src1), - s32_0ImmPred:$src2))]>, ImmRegRel; + "$dst = mpyi($src1, #$src2)", []>, ImmRegRel; let hasNewValue = 1, isExtendable = 1, opExtentBits = 8, opExtendable = 3, InputType = "imm" in @@ -2793,28 +2505,16 @@ class T_MType_acc_rr MajOp, bits<3> MinOp, } let CextOpcode = "MPYI_acc", Itinerary = M_tc_3x_SLOT23 in { - def M2_macsip : T_MType_acc_ri <"+= mpyi", 0b010, u8_0Ext, - [(set (i32 IntRegs:$dst), - (add (mul IntRegs:$src2, u32_0ImmPred:$src3), - IntRegs:$src1))]>, ImmRegRel; - - def M2_maci : T_MType_acc_rr <"+= mpyi", 0b000, 0b000, 0, - [(set (i32 IntRegs:$dst), - (add (mul IntRegs:$src2, IntRegs:$src3), - IntRegs:$src1))]>, ImmRegRel; + def M2_macsip : T_MType_acc_ri <"+= mpyi", 0b010, u8_0Ext, []>, ImmRegRel; + + def M2_maci : T_MType_acc_rr <"+= mpyi", 0b000, 0b000, 0, []>, ImmRegRel; } let CextOpcode = "ADD_acc" in { let isExtentSigned = 1 in - def M2_accii : T_MType_acc_ri <"+= add", 0b100, s8_0Ext, - [(set (i32 IntRegs:$dst), - (add (add (i32 IntRegs:$src2), s32_0ImmPred:$src3), - (i32 IntRegs:$src1)))]>, ImmRegRel; - - def M2_acci : T_MType_acc_rr <"+= add", 0b000, 0b001, 0, - [(set (i32 IntRegs:$dst), - (add (add (i32 IntRegs:$src2), (i32 IntRegs:$src3)), - (i32 IntRegs:$src1)))]>, ImmRegRel; + def M2_accii : T_MType_acc_ri <"+= add", 0b100, s8_0Ext, []>, ImmRegRel; + + def M2_acci : T_MType_acc_rr <"+= add", 0b000, 0b001, 0, []>, ImmRegRel; } let CextOpcode = "SUB_acc" in { @@ -2830,21 +2530,6 @@ def M2_macsin : T_MType_acc_ri <"-= mpyi", 0b011, u8_0Ext>; def M2_xor_xacc : T_MType_acc_rr < "^= xor", 0b100, 0b011, 0>; def M2_subacc : T_MType_acc_rr <"+= sub", 0b000, 0b011, 1>; -class T_MType_acc_pat1 - : Pat <(secOp IntRegs:$src1, (firstOp IntRegs:$src2, ImmPred:$src3)), - (MI IntRegs:$src1, IntRegs:$src2, ImmPred:$src3)>; - -class T_MType_acc_pat2 - : Pat <(i32 (secOp IntRegs:$src1, (firstOp IntRegs:$src2, IntRegs:$src3))), - (MI IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>; - -def : T_MType_acc_pat2 ; -def : T_MType_acc_pat1 ; - -def : T_MType_acc_pat1 ; -def : T_MType_acc_pat2 ; - //===----------------------------------------------------------------------===// // Template Class -- XType Vector Instructions //===----------------------------------------------------------------------===// @@ -3190,51 +2875,6 @@ def M2_vmac2 : T_XTYPE_mpy64_acc < "vmpyh", "+", 0b001, 0b001, 0, 0, 0>; def M2_vmac2s_s1 : T_XTYPE_mpy64_acc < "vmpyh", "+", 0b100, 0b101, 1, 1, 0>; def M2_vmac2s_s0 : T_XTYPE_mpy64_acc < "vmpyh", "+", 0b000, 0b101, 1, 0, 0>; -def: Pat<(i64 (mul (i64 (anyext (i32 IntRegs:$src1))), - (i64 (anyext (i32 IntRegs:$src2))))), - (M2_dpmpyuu_s0 IntRegs:$src1, IntRegs:$src2)>; - -def: Pat<(i64 (mul (i64 (sext (i32 IntRegs:$src1))), - (i64 (sext (i32 IntRegs:$src2))))), - (M2_dpmpyss_s0 IntRegs:$src1, IntRegs:$src2)>; - -def: Pat<(i64 (mul (is_sext_i32:$src1), - (is_sext_i32:$src2))), - (M2_dpmpyss_s0 (LoReg DoubleRegs:$src1), (LoReg DoubleRegs:$src2))>; - -// Multiply and accumulate, use full result. -// Rxx[+-]=mpy(Rs,Rt) - -def: Pat<(i64 (add (i64 DoubleRegs:$src1), - (mul (i64 (sext (i32 IntRegs:$src2))), - (i64 (sext (i32 IntRegs:$src3)))))), - (M2_dpmpyss_acc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>; - -def: Pat<(i64 (sub (i64 DoubleRegs:$src1), - (mul (i64 (sext (i32 IntRegs:$src2))), - (i64 (sext (i32 IntRegs:$src3)))))), - (M2_dpmpyss_nac_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>; - -def: Pat<(i64 (add (i64 DoubleRegs:$src1), - (mul (i64 (anyext (i32 IntRegs:$src2))), - (i64 (anyext (i32 IntRegs:$src3)))))), - (M2_dpmpyuu_acc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>; - -def: Pat<(i64 (add (i64 DoubleRegs:$src1), - (mul (i64 (zext (i32 IntRegs:$src2))), - (i64 (zext (i32 IntRegs:$src3)))))), - (M2_dpmpyuu_acc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>; - -def: Pat<(i64 (sub (i64 DoubleRegs:$src1), - (mul (i64 (anyext (i32 IntRegs:$src2))), - (i64 (anyext (i32 IntRegs:$src3)))))), - (M2_dpmpyuu_nac_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>; - -def: Pat<(i64 (sub (i64 DoubleRegs:$src1), - (mul (i64 (zext (i32 IntRegs:$src2))), - (i64 (zext (i32 IntRegs:$src3)))))), - (M2_dpmpyuu_nac_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>; - //===----------------------------------------------------------------------===// // MTYPE/MPYH - //===----------------------------------------------------------------------===// @@ -3376,16 +3016,6 @@ defm storerd: ST_PostInc <"memd", "STrid", DoubleRegs, s4_3Imm, 0b1110>; let accessSize = HalfWordAccess, isNVStorable = 0 in defm storerf: ST_PostInc <"memh", "STrih_H", IntRegs, s4_1Imm, 0b1011, 1>; -class Storepi_pat - : Pat<(Store Value:$src1, I32:$src2, Offset:$offset), - (MI I32:$src2, imm:$offset, Value:$src1)>; - -def: Storepi_pat; -def: Storepi_pat; -def: Storepi_pat; -def: Storepi_pat; - //===----------------------------------------------------------------------===// // Template class for post increment stores with register offset. //===----------------------------------------------------------------------===// @@ -3536,116 +3166,6 @@ let addrMode = BaseImmOffset, InputType = "imm" in { u6_1Ext, 0b011, 1>; } -// Patterns for generating stores, where the address takes different forms: -// - frameindex, -// - frameindex + offset, -// - base + offset, -// - simple (base address without offset). -// These would usually be used together (via Storex_pat defined below), but -// in some cases one may want to apply different properties (such as -// AddedComplexity) to the individual patterns. -class Storex_fi_pat - : Pat<(Store Value:$Rs, AddrFI:$fi), (MI AddrFI:$fi, 0, Value:$Rs)>; -multiclass Storex_fi_add_pat { - def: Pat<(Store Value:$Rs, (add (i32 AddrFI:$fi), ImmPred:$Off)), - (MI AddrFI:$fi, imm:$Off, Value:$Rs)>; - def: Pat<(Store Value:$Rs, (orisadd (i32 AddrFI:$fi), ImmPred:$Off)), - (MI AddrFI:$fi, imm:$Off, Value:$Rs)>; -} -multiclass Storex_add_pat { - def: Pat<(Store Value:$Rt, (add (i32 IntRegs:$Rs), ImmPred:$Off)), - (MI IntRegs:$Rs, imm:$Off, Value:$Rt)>; - def: Pat<(Store Value:$Rt, (orisadd (i32 IntRegs:$Rs), ImmPred:$Off)), - (MI IntRegs:$Rs, imm:$Off, Value:$Rt)>; -} -class Storex_simple_pat - : Pat<(Store Value:$Rt, (i32 IntRegs:$Rs)), - (MI IntRegs:$Rs, 0, Value:$Rt)>; - -// Patterns for generating stores, where the address takes different forms, -// and where the value being stored is transformed through the value modifier -// ValueMod. The address forms are same as above. -class Storexm_fi_pat - : Pat<(Store Value:$Rs, AddrFI:$fi), - (MI AddrFI:$fi, 0, (ValueMod Value:$Rs))>; -multiclass Storexm_fi_add_pat { - def: Pat<(Store Value:$Rs, (add (i32 AddrFI:$fi), ImmPred:$Off)), - (MI AddrFI:$fi, imm:$Off, (ValueMod Value:$Rs))>; - def: Pat<(Store Value:$Rs, (orisadd (i32 AddrFI:$fi), ImmPred:$Off)), - (MI AddrFI:$fi, imm:$Off, (ValueMod Value:$Rs))>; -} -multiclass Storexm_add_pat { - def: Pat<(Store Value:$Rt, (add (i32 IntRegs:$Rs), ImmPred:$Off)), - (MI IntRegs:$Rs, imm:$Off, (ValueMod Value:$Rt))>; - def: Pat<(Store Value:$Rt, (orisadd (i32 IntRegs:$Rs), ImmPred:$Off)), - (MI IntRegs:$Rs, imm:$Off, (ValueMod Value:$Rt))>; -} -class Storexm_simple_pat - : Pat<(Store Value:$Rt, (i32 IntRegs:$Rs)), - (MI IntRegs:$Rs, 0, (ValueMod Value:$Rt))>; - -multiclass Storex_pat { - def: Storex_fi_pat ; - defm: Storex_fi_add_pat ; - defm: Storex_add_pat ; -} - -multiclass Storexm_pat { - def: Storexm_fi_pat ; - defm: Storexm_fi_add_pat ; - defm: Storexm_add_pat ; -} - -// Regular stores in the DAG have two operands: value and address. -// Atomic stores also have two, but they are reversed: address, value. -// To use atomic stores with the patterns, they need to have their operands -// swapped. This relies on the knowledge that the F.Fragment uses names -// "ptr" and "val". -class SwapSt - : PatFrag<(ops node:$val, node:$ptr), F.Fragment, F.PredicateCode, - F.OperandTransform>; - -let AddedComplexity = 20 in { - defm: Storex_pat; - defm: Storex_pat; - defm: Storex_pat; - defm: Storex_pat; - - defm: Storex_pat, I32, s32_0ImmPred, S2_storerb_io>; - defm: Storex_pat, I32, s31_1ImmPred, S2_storerh_io>; - defm: Storex_pat, I32, s30_2ImmPred, S2_storeri_io>; - defm: Storex_pat, I64, s29_3ImmPred, S2_storerd_io>; -} - -// Simple patterns should be tried with the least priority. -def: Storex_simple_pat; -def: Storex_simple_pat; -def: Storex_simple_pat; -def: Storex_simple_pat; - -def: Storex_simple_pat, I32, S2_storerb_io>; -def: Storex_simple_pat, I32, S2_storerh_io>; -def: Storex_simple_pat, I32, S2_storeri_io>; -def: Storex_simple_pat, I64, S2_storerd_io>; - -let AddedComplexity = 20 in { - defm: Storexm_pat; - defm: Storexm_pat; - defm: Storexm_pat; -} - -def: Storexm_simple_pat; -def: Storexm_simple_pat; -def: Storexm_simple_pat; - // Store predicate. let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 13, isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 0 in @@ -3952,8 +3472,6 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1 in { // Sign extend word to doubleword def A2_sxtw : T_S2op_1_di <"sxtw", 0b01, 0b000>; -def: Pat <(i64 (sext I32:$src)), (A2_sxtw I32:$src)>; - // Vector saturate and pack let Defs = [USR_OVF] in { def S2_svsathb : T_S2op_1_ii <"vsathb", 0b10, 0b000>; @@ -4002,17 +3520,6 @@ let Itinerary = S_2op_tc_2_SLOT23 in { def A2_negsat : T_S2op_1_ii <"neg", 0b10, 0b110, 1>; } -def: Pat<(i32 (select (i1 (setlt (i32 IntRegs:$src), 0)), - (i32 (sub 0, (i32 IntRegs:$src))), - (i32 IntRegs:$src))), - (A2_abs IntRegs:$src)>; - -let AddedComplexity = 50 in -def: Pat<(i32 (xor (add (sra (i32 IntRegs:$src), (i32 31)), - (i32 IntRegs:$src)), - (sra (i32 IntRegs:$src), (i32 31)))), - (A2_abs IntRegs:$src)>; - class T_S2op_2 RegTyBits, RegisterClass RCOut, RegisterClass RCIn, bits<3> MajOp, bits<3> MinOp, bit isSat, bit isRnd, list pattern = []> @@ -4050,9 +3557,7 @@ class T_S2op_2_ii MajOp, bits<3> MinOp, isSat, isRnd, pattern>; class T_S2op_shift MajOp, bits<3> MinOp, SDNode OpNd> - : T_S2op_2_ii ; + : T_S2op_2_ii ; // Vector arithmetic shift right by immediate with truncate and pack def S2_asr_i_svw_trun : T_S2op_2_id <"vasrw", 0b110, 0b010>; @@ -4081,11 +3586,6 @@ let isAsmParserOnly = 1 in def A2_not: ALU32_rr<(outs IntRegs:$dst),(ins IntRegs:$src), "$dst = not($src)">; -def: Pat<(i32 (sra (i32 (add (i32 (sra I32:$src1, u5_0ImmPred:$src2)), - (i32 1))), - (i32 1))), - (S2_asr_i_r_rnd IntRegs:$src1, u5_0ImmPred:$src2)>; - class T_S2op_3MajOp, bits<3>minOp, bits<1> sat = 0> : SInst<(outs DoubleRegs:$Rdd), (ins DoubleRegs:$Rss), "$Rdd = "#opc#"($Rss)"#!if(!eq(sat, 1),":sat","")> { @@ -4125,9 +3625,6 @@ def A2_vabshsat : T_S2op_3 <"vabsh", 0b01, 0b101, 1>; def A2_vabsw : T_S2op_3 <"vabsw", 0b01, 0b110>; def A2_vabswsat : T_S2op_3 <"vabsw", 0b01, 0b111, 1>; -def : Pat<(not (i64 DoubleRegs:$src1)), - (A2_notp DoubleRegs:$src1)>; - //===----------------------------------------------------------------------===// // STYPE/BIT + //===----------------------------------------------------------------------===// @@ -4167,20 +3664,6 @@ def S2_clb : T_COUNT_LEADING_32<"clb", 0b000, 0b100>; def S2_clbp : T_COUNT_LEADING_64<"clb", 0b010, 0b000>; def S2_clbnorm : T_COUNT_LEADING_32<"normamt", 0b000, 0b111>; -// Count leading zeros. -def: Pat<(i32 (ctlz I32:$Rs)), (S2_cl0 I32:$Rs)>; -def: Pat<(i32 (trunc (ctlz I64:$Rss))), (S2_cl0p I64:$Rss)>; - -// Count trailing zeros: 32-bit. -def: Pat<(i32 (cttz I32:$Rs)), (S2_ct0 I32:$Rs)>; - -// Count leading ones. -def: Pat<(i32 (ctlz (not I32:$Rs))), (S2_cl1 I32:$Rs)>; -def: Pat<(i32 (trunc (ctlz (not I64:$Rss)))), (S2_cl1p I64:$Rss)>; - -// Count trailing ones: 32-bit. -def: Pat<(i32 (cttz (not I32:$Rs))), (S2_ct1 I32:$Rs)>; - // The 64-bit counts leading/trailing are defined in HexagonInstrInfoV4.td. // Bit set/clear/toggle @@ -4223,19 +3706,6 @@ def S2_clrbit_r : T_SCT_BIT_REG<"clrbit", 0b01>; def S2_setbit_r : T_SCT_BIT_REG<"setbit", 0b00>; def S2_togglebit_r : T_SCT_BIT_REG<"togglebit", 0b10>; -def: Pat<(i32 (and (i32 IntRegs:$Rs), (not (shl 1, u5_0ImmPred:$u5)))), - (S2_clrbit_i IntRegs:$Rs, u5_0ImmPred:$u5)>; -def: Pat<(i32 (or (i32 IntRegs:$Rs), (shl 1, u5_0ImmPred:$u5))), - (S2_setbit_i IntRegs:$Rs, u5_0ImmPred:$u5)>; -def: Pat<(i32 (xor (i32 IntRegs:$Rs), (shl 1, u5_0ImmPred:$u5))), - (S2_togglebit_i IntRegs:$Rs, u5_0ImmPred:$u5)>; -def: Pat<(i32 (and (i32 IntRegs:$Rs), (not (shl 1, (i32 IntRegs:$Rt))))), - (S2_clrbit_r IntRegs:$Rs, IntRegs:$Rt)>; -def: Pat<(i32 (or (i32 IntRegs:$Rs), (shl 1, (i32 IntRegs:$Rt)))), - (S2_setbit_r IntRegs:$Rs, IntRegs:$Rt)>; -def: Pat<(i32 (xor (i32 IntRegs:$Rs), (shl 1, (i32 IntRegs:$Rt)))), - (S2_togglebit_r IntRegs:$Rs, IntRegs:$Rt)>; - // Bit test let hasSideEffects = 0 in @@ -4274,17 +3744,6 @@ class T_TEST_BIT_REG def S2_tstbit_i : T_TEST_BIT_IMM<"tstbit", 0b000>; def S2_tstbit_r : T_TEST_BIT_REG<"tstbit", 0>; -let AddedComplexity = 20 in { // Complexity greater than cmp reg-imm. - def: Pat<(i1 (setne (and (shl 1, u5_0ImmPred:$u5), (i32 IntRegs:$Rs)), 0)), - (S2_tstbit_i IntRegs:$Rs, u5_0ImmPred:$u5)>; - def: Pat<(i1 (setne (and (shl 1, (i32 IntRegs:$Rt)), (i32 IntRegs:$Rs)), 0)), - (S2_tstbit_r IntRegs:$Rs, IntRegs:$Rt)>; - def: Pat<(i1 (trunc (i32 IntRegs:$Rs))), - (S2_tstbit_i IntRegs:$Rs, 0)>; - def: Pat<(i1 (trunc (i64 DoubleRegs:$Rs))), - (S2_tstbit_i (LoReg DoubleRegs:$Rs), 0)>; -} - let hasSideEffects = 0 in class T_TEST_BITS_IMM MajOp, bit IsNeg> : SInst<(outs PredRegs:$Pd), (ins IntRegs:$Rs, u6_0Imm:$u6), @@ -4323,17 +3782,6 @@ def C2_bitsclri : T_TEST_BITS_IMM<"bitsclr", 0b10, 0>; def C2_bitsclr : T_TEST_BITS_REG<"bitsclr", 0b10, 0>; def C2_bitsset : T_TEST_BITS_REG<"bitsset", 0b01, 0>; -let AddedComplexity = 20 in { // Complexity greater than compare reg-imm. - def: Pat<(i1 (seteq (and (i32 IntRegs:$Rs), u6_0ImmPred:$u6), 0)), - (C2_bitsclri IntRegs:$Rs, u6_0ImmPred:$u6)>; - def: Pat<(i1 (seteq (and (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)), 0)), - (C2_bitsclr IntRegs:$Rs, IntRegs:$Rt)>; -} - -let AddedComplexity = 10 in // Complexity greater than compare reg-reg. -def: Pat<(i1 (seteq (and (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)), IntRegs:$Rt)), - (C2_bitsset IntRegs:$Rs, IntRegs:$Rt)>; - //===----------------------------------------------------------------------===// // STYPE/BIT - //===----------------------------------------------------------------------===// @@ -4349,14 +3797,6 @@ def: Pat<(i1 (seteq (and (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)), IntRegs:$Rt)), // XTYPE/PERM + //===----------------------------------------------------------------------===// -def: Pat<(or (or (shl (or (shl (i32 (extloadi8 (add (i32 IntRegs:$b), 3))), - (i32 8)), - (i32 (zextloadi8 (add (i32 IntRegs:$b), 2)))), - (i32 16)), - (shl (i32 (zextloadi8 (add (i32 IntRegs:$b), 1))), (i32 8))), - (zextloadi8 (i32 IntRegs:$b))), - (A2_swiz (L2_loadri_io IntRegs:$b, 0))>; - //===----------------------------------------------------------------------===// // XTYPE/PERM - //===----------------------------------------------------------------------===// @@ -4396,24 +3836,6 @@ let hasSideEffects = 0, isCodeGenOnly = 1 in def C2_pxfer_map: SInst<(outs PredRegs:$dst), (ins PredRegs:$src), "$dst = $src">; - -// Patterns for loads of i1: -def: Pat<(i1 (load AddrFI:$fi)), - (C2_tfrrp (L2_loadrub_io AddrFI:$fi, 0))>; -def: Pat<(i1 (load (add (i32 IntRegs:$Rs), s32_0ImmPred:$Off))), - (C2_tfrrp (L2_loadrub_io IntRegs:$Rs, imm:$Off))>; -def: Pat<(i1 (load (i32 IntRegs:$Rs))), - (C2_tfrrp (L2_loadrub_io IntRegs:$Rs, 0))>; - -def I1toI32: OutPatFrag<(ops node:$Rs), - (C2_muxii (i1 $Rs), 1, 0)>; - -def I32toI1: OutPatFrag<(ops node:$Rs), - (i1 (C2_tfrrp (i32 $Rs)))>; - -defm: Storexm_pat; -def: Storexm_simple_pat; - //===----------------------------------------------------------------------===// // STYPE/PRED - //===----------------------------------------------------------------------===// @@ -4437,9 +3859,7 @@ class S_2OpInstImmMajOp, bits<3>MinOp, } class S_2OpInstImmI6MinOp> - : S_2OpInstImm { + : S_2OpInstImm { bits<6> src2; let Inst{13-8} = src2; } @@ -4453,9 +3873,7 @@ def S2_lsr_i_p : S_2OpInstImmI6<"lsr", srl, 0b001>; let AddedComplexity = 100, hasNewValue = 1, hasSideEffects = 0 in def S2_addasl_rrri: SInst <(outs IntRegs:$Rd), (ins IntRegs:$Rt, IntRegs:$Rs, u3_0Imm:$u3), - "$Rd = addasl($Rt, $Rs, #$u3)" , - [(set (i32 IntRegs:$Rd), (add (i32 IntRegs:$Rt), - (shl (i32 IntRegs:$Rs), u3_0ImmPred:$u3)))], + "$Rd = addasl($Rt, $Rs, #$u3)" , [], "", S_3op_tc_2_SLOT23> { bits<5> Rd; bits<5> Rt; @@ -4497,12 +3915,8 @@ def S2_addasl_rrri: SInst <(outs IntRegs:$Rd), //===----------------------------------------------------------------------===// // SYSTEM/USER + //===----------------------------------------------------------------------===// -def HexagonBARRIER: SDNode<"HexagonISD::BARRIER", SDTNone, [SDNPHasChain]>; - let hasSideEffects = 1, isSoloAX = 1 in -def Y2_barrier : SYSInst<(outs), (ins), - "barrier", - [(HexagonBARRIER)],"",ST_tc_st_SLOT0> { +def Y2_barrier : SYSInst<(outs), (ins), "barrier", [],"",ST_tc_st_SLOT0> { let Inst{31-28} = 0b1010; let Inst{27-21} = 0b1000000; } @@ -4524,9 +3938,6 @@ let isMoveImm = 1, isAsCheapAsAMove = 1, isReMaterializable = 1, (ins IntRegs:$Rs, IntRegs:$fi, s32_0Imm:$off), "">; } -def: Pat<(orisadd (i32 AddrFI:$Rs), s32_0ImmPred:$off), - (PS_fi (i32 AddrFI:$Rs), s32_0ImmPred:$off)>; - //===----------------------------------------------------------------------===// // CRUSER - Type. //===----------------------------------------------------------------------===// @@ -4734,14 +4145,6 @@ def Y4_trace: CRInst <(outs), (ins IntRegs:$Rs), let Inst{20-16} = Rs; } -// Support for generating global address. -// Taken from X86InstrInfo.td. -def SDTHexagonCONST32 : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, - SDTCisVT<1, i32>, - SDTCisPtrTy<0>]>; -def HexagonCONST32 : SDNode<"HexagonISD::CONST32", SDTHexagonCONST32>; -def HexagonCONST32_GP : SDNode<"HexagonISD::CONST32_GP", SDTHexagonCONST32>; - // HI/LO Instructions let isReMaterializable = 1, isMoveImm = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in @@ -4768,52 +4171,26 @@ let isAsmParserOnly = 1 in { let isReMaterializable = 1, isMoveImm = 1, isAsmParserOnly = 1 in { def CONST32 : CONSTLDInst<(outs IntRegs:$Rd), (ins i32imm:$v), - "$Rd = CONST32(#$v)", [(set I32:$Rd, imm:$v)]>; + "$Rd = CONST32(#$v)", []>; def CONST64 : CONSTLDInst<(outs DoubleRegs:$Rd), (ins i64imm:$v), - "$Rd = CONST64(#$v)", [(set I64:$Rd, imm:$v)]>; + "$Rd = CONST64(#$v)", []>; } -// Map TLS addressses to A2_tfrsi. -def: Pat<(HexagonCONST32 tglobaltlsaddr:$addr), (A2_tfrsi s16_0Ext:$addr)>; -def: Pat<(HexagonCONST32 bbl:$label), (A2_tfrsi s16_0Ext:$label)>; - let hasSideEffects = 0, isReMaterializable = 1, isPseudo = 1, isCodeGenOnly = 1 in -def PS_true : SInst<(outs PredRegs:$dst), (ins), "", - [(set (i1 PredRegs:$dst), 1)]>; +def PS_true : SInst<(outs PredRegs:$dst), (ins), "", []>; let hasSideEffects = 0, isReMaterializable = 1, isPseudo = 1, isCodeGenOnly = 1 in -def PS_false : SInst<(outs PredRegs:$dst), (ins), "", - [(set (i1 PredRegs:$dst), 0)]>; - -// Pseudo instructions. -def SDT_SPCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>; -def SDT_SPCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>, - SDTCisVT<1, i32> ]>; - -def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_SPCallSeqStart, - [SDNPHasChain, SDNPOutGlue]>; -def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_SPCallSeqEnd, - [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; - -def SDT_SPCall : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>; - -// For tailcalls a HexagonTCRet SDNode has 3 SDNode Properties - a chain, -// Optional Flag and Variable Arguments. -// Its 1 Operand has pointer type. -def HexagonTCRet : SDNode<"HexagonISD::TC_RETURN", SDT_SPCall, - [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; +def PS_false : SInst<(outs PredRegs:$dst), (ins), "", []>; let Defs = [R29, R30], Uses = [R31, R30, R29], isPseudo = 1 in def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt), - ".error \"should not emit\" ", - [(callseq_start timm:$amt)]>; + ".error \"should not emit\" ", []>; let Defs = [R29, R30, R31], Uses = [R29], isPseudo = 1 in def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2), - ".error \"should not emit\" ", - [(callseq_end timm:$amt1, timm:$amt2)]>; + ".error \"should not emit\" ", []>; // Call subroutine indirectly. let Defs = VolatileV3.Regs in @@ -4829,264 +4206,15 @@ let isPseudo = 1, isCall = 1, isReturn = 1, isBarrier = 1, isPredicable = 0, isTerminator = 1, isCodeGenOnly = 1 in def PS_tailcall_i : JInst<(outs), (ins calltarget:$dst), "", []>; -//Tail calls. -def: Pat<(HexagonTCRet tglobaladdr:$dst), - (PS_tailcall_i tglobaladdr:$dst)>; -def: Pat<(HexagonTCRet texternalsym:$dst), - (PS_tailcall_i texternalsym:$dst)>; -def: Pat<(HexagonTCRet I32:$dst), - (PS_tailcall_r I32:$dst)>; - -// Map from r0 = and(r1, 65535) to r0 = zxth(r1) -def: Pat<(and (i32 IntRegs:$src1), 65535), - (A2_zxth IntRegs:$src1)>; - -// Map from r0 = and(r1, 255) to r0 = zxtb(r1). -def: Pat<(and (i32 IntRegs:$src1), 255), - (A2_zxtb IntRegs:$src1)>; - -// Map Add(p1, true) to p1 = not(p1). -// Add(p1, false) should never be produced, -// if it does, it got to be mapped to NOOP. -def: Pat<(add (i1 PredRegs:$src1), -1), - (C2_not PredRegs:$src1)>; - -// Map from p0 = pnot(p0); r0 = mux(p0, #i, #j) => r0 = mux(p0, #j, #i). -def: Pat<(select (not (i1 PredRegs:$src1)), s8_0ImmPred:$src2, s32_0ImmPred:$src3), - (C2_muxii PredRegs:$src1, s32_0ImmPred:$src3, s8_0ImmPred:$src2)>; - -// Map from p0 = pnot(p0); r0 = select(p0, #i, r1) -// => r0 = C2_muxir(p0, r1, #i) -def: Pat<(select (not (i1 PredRegs:$src1)), s32_0ImmPred:$src2, - (i32 IntRegs:$src3)), - (C2_muxir PredRegs:$src1, IntRegs:$src3, s32_0ImmPred:$src2)>; - -// Map from p0 = pnot(p0); r0 = mux(p0, r1, #i) -// => r0 = C2_muxri (p0, #i, r1) -def: Pat<(select (not (i1 PredRegs:$src1)), IntRegs:$src2, s32_0ImmPred:$src3), - (C2_muxri PredRegs:$src1, s32_0ImmPred:$src3, IntRegs:$src2)>; - -// Map from p0 = pnot(p0); if (p0) jump => if (!p0) jump. -def: Pat<(brcond (not (i1 PredRegs:$src1)), bb:$offset), - (J2_jumpf PredRegs:$src1, bb:$offset)>; - -// Map from Rdd = sign_extend_inreg(Rss, i32) -> Rdd = A2_sxtw(Rss.lo). -def: Pat<(i64 (sext_inreg (i64 DoubleRegs:$src1), i32)), - (A2_sxtw (LoReg DoubleRegs:$src1))>; - -// Map from Rdd = sign_extend_inreg(Rss, i16) -> Rdd = A2_sxtw(A2_sxth(Rss.lo)). -def: Pat<(i64 (sext_inreg (i64 DoubleRegs:$src1), i16)), - (A2_sxtw (A2_sxth (LoReg DoubleRegs:$src1)))>; - -// Map from Rdd = sign_extend_inreg(Rss, i8) -> Rdd = A2_sxtw(A2_sxtb(Rss.lo)). -def: Pat<(i64 (sext_inreg (i64 DoubleRegs:$src1), i8)), - (A2_sxtw (A2_sxtb (LoReg DoubleRegs:$src1)))>; - -// We want to prevent emitting pnot's as much as possible. -// Map brcond with an unsupported setcc to a J2_jumpf. -def : Pat <(brcond (i1 (setne (i32 IntRegs:$src1), (i32 IntRegs:$src2))), - bb:$offset), - (J2_jumpf (C2_cmpeq (i32 IntRegs:$src1), (i32 IntRegs:$src2)), - bb:$offset)>; - -def : Pat <(brcond (i1 (setne (i32 IntRegs:$src1), s10_0ImmPred:$src2)), - bb:$offset), - (J2_jumpf (C2_cmpeqi (i32 IntRegs:$src1), s10_0ImmPred:$src2), bb:$offset)>; - -def: Pat<(brcond (i1 (setne (i1 PredRegs:$src1), (i1 -1))), bb:$offset), - (J2_jumpf PredRegs:$src1, bb:$offset)>; - -def: Pat<(brcond (i1 (setne (i1 PredRegs:$src1), (i1 0))), bb:$offset), - (J2_jumpt PredRegs:$src1, bb:$offset)>; - -// cmp.lt(Rs, Imm) -> !cmp.ge(Rs, Imm) -> !cmp.gt(Rs, Imm-1) -def: Pat<(brcond (i1 (setlt (i32 IntRegs:$src1), s8_0ImmPred:$src2)), bb:$offset), - (J2_jumpf (C2_cmpgti IntRegs:$src1, (DEC_CONST_SIGNED s8_0ImmPred:$src2)), - bb:$offset)>; - -// Map from a 64-bit select to an emulated 64-bit mux. -// Hexagon does not support 64-bit MUXes; so emulate with combines. -def: Pat<(select (i1 PredRegs:$src1), (i64 DoubleRegs:$src2), - (i64 DoubleRegs:$src3)), - (A2_combinew (C2_mux PredRegs:$src1, (HiReg DoubleRegs:$src2), - (HiReg DoubleRegs:$src3)), - (C2_mux PredRegs:$src1, (LoReg DoubleRegs:$src2), - (LoReg DoubleRegs:$src3)))>; - -// Map from a 1-bit select to logical ops. -// From LegalizeDAG.cpp: (B1 ? B2 : B3) <=> (B1 & B2)|(!B1&B3). -def: Pat<(select (i1 PredRegs:$src1), (i1 PredRegs:$src2), (i1 PredRegs:$src3)), - (C2_or (C2_and PredRegs:$src1, PredRegs:$src2), - (C2_and (C2_not PredRegs:$src1), PredRegs:$src3))>; - -// Map for truncating from 64 immediates to 32 bit immediates. -def: Pat<(i32 (trunc (i64 DoubleRegs:$src))), - (LoReg DoubleRegs:$src)>; - -// Map for truncating from i64 immediates to i1 bit immediates. -def: Pat<(i1 (trunc (i64 DoubleRegs:$src))), - (C2_tfrrp (LoReg DoubleRegs:$src))>; - -// rs <= rt -> !(rs > rt). -let AddedComplexity = 30 in -def: Pat<(i1 (setle (i32 IntRegs:$src1), s32_0ImmPred:$src2)), - (C2_not (C2_cmpgti IntRegs:$src1, s32_0ImmPred:$src2))>; - -// rs <= rt -> !(rs > rt). -def : Pat<(i1 (setle (i32 IntRegs:$src1), (i32 IntRegs:$src2))), - (i1 (C2_not (C2_cmpgt (i32 IntRegs:$src1), (i32 IntRegs:$src2))))>; - -// Rss <= Rtt -> !(Rss > Rtt). -def: Pat<(i1 (setle (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))), - (C2_not (C2_cmpgtp DoubleRegs:$src1, DoubleRegs:$src2))>; - -// Map cmpne -> cmpeq. -// Hexagon_TODO: We should improve on this. -// rs != rt -> !(rs == rt). -let AddedComplexity = 30 in -def: Pat<(i1 (setne (i32 IntRegs:$src1), s32_0ImmPred:$src2)), - (C2_not (C2_cmpeqi IntRegs:$src1, s32_0ImmPred:$src2))>; - -// Convert setne back to xor for hexagon since we compute w/ pred registers. -def: Pat<(i1 (setne (i1 PredRegs:$src1), (i1 PredRegs:$src2))), - (C2_xor PredRegs:$src1, PredRegs:$src2)>; - -// Map cmpne(Rss) -> !cmpew(Rss). -// rs != rt -> !(rs == rt). -def: Pat<(i1 (setne (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))), - (C2_not (C2_cmpeqp DoubleRegs:$src1, DoubleRegs:$src2))>; - -// Map cmpge(Rs, Rt) -> !(cmpgt(Rs, Rt). -// rs >= rt -> !(rt > rs). -def : Pat <(i1 (setge (i32 IntRegs:$src1), (i32 IntRegs:$src2))), - (i1 (C2_not (i1 (C2_cmpgt (i32 IntRegs:$src2), (i32 IntRegs:$src1)))))>; - -// cmpge(Rs, Imm) -> cmpgt(Rs, Imm-1) -let AddedComplexity = 30 in -def: Pat<(i1 (setge (i32 IntRegs:$src1), s32_0ImmPred:$src2)), - (C2_cmpgti IntRegs:$src1, (DEC_CONST_SIGNED s32_0ImmPred:$src2))>; - -// Map cmpge(Rss, Rtt) -> !cmpgt(Rtt, Rss). -// rss >= rtt -> !(rtt > rss). -def: Pat<(i1 (setge (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))), - (C2_not (C2_cmpgtp DoubleRegs:$src2, DoubleRegs:$src1))>; - -// Map cmplt(Rs, Imm) -> !cmpge(Rs, Imm). -// !cmpge(Rs, Imm) -> !cmpgt(Rs, Imm-1). -// rs < rt -> !(rs >= rt). -let AddedComplexity = 30 in -def: Pat<(i1 (setlt (i32 IntRegs:$src1), s32_0ImmPred:$src2)), - (C2_not (C2_cmpgti IntRegs:$src1, - (DEC_CONST_SIGNED s32_0ImmPred:$src2)))>; - -// Generate cmpgeu(Rs, #0) -> cmpeq(Rs, Rs) -def: Pat<(i1 (setuge (i32 IntRegs:$src1), 0)), - (C2_cmpeq IntRegs:$src1, IntRegs:$src1)>; - -// Generate cmpgeu(Rs, #u8) -> cmpgtu(Rs, #u8 -1) -def: Pat<(i1 (setuge (i32 IntRegs:$src1), u32_0ImmPred:$src2)), - (C2_cmpgtui IntRegs:$src1, (DEC_CONST_UNSIGNED u32_0ImmPred:$src2))>; - -// Generate cmpgtu(Rs, #u9) -def: Pat<(i1 (setugt (i32 IntRegs:$src1), u32_0ImmPred:$src2)), - (C2_cmpgtui IntRegs:$src1, u32_0ImmPred:$src2)>; - -// Map from Rs >= Rt -> !(Rt > Rs). -// rs >= rt -> !(rt > rs). -def: Pat<(i1 (setuge (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))), - (C2_not (C2_cmpgtup DoubleRegs:$src2, DoubleRegs:$src1))>; - -// Map from cmpleu(Rss, Rtt) -> !cmpgtu(Rss, Rtt-1). -// Map from (Rs <= Rt) -> !(Rs > Rt). -def: Pat<(i1 (setule (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))), - (C2_not (C2_cmpgtup DoubleRegs:$src1, DoubleRegs:$src2))>; - -// Sign extends. -// i1 -> i32 -def: Pat<(i32 (sext (i1 PredRegs:$src1))), - (C2_muxii PredRegs:$src1, -1, 0)>; - -// i1 -> i64 -def: Pat<(i64 (sext (i1 PredRegs:$src1))), - (A2_combinew (A2_tfrsi -1), (C2_muxii PredRegs:$src1, -1, 0))>; - -// Zero extends. -// i1 -> i32 -def: Pat<(i32 (zext (i1 PredRegs:$src1))), - (C2_muxii PredRegs:$src1, 1, 0)>; - -// Map from Rs = Pd to Pd = mux(Pd, #1, #0) -def: Pat<(i32 (anyext (i1 PredRegs:$src1))), - (C2_muxii PredRegs:$src1, 1, 0)>; - -// Map from Rss = Pd to Rdd = sxtw (mux(Pd, #1, #0)) -def: Pat<(i64 (anyext (i1 PredRegs:$src1))), - (A2_sxtw (C2_muxii PredRegs:$src1, 1, 0))>; - -// Clear the sign bit in a 64-bit register. -def ClearSign : OutPatFrag<(ops node:$Rss), - (A2_combinew (S2_clrbit_i (HiReg $Rss), 31), (LoReg $Rss))>; - -def MulHU : OutPatFrag<(ops node:$Rss, node:$Rtt), - (A2_addp - (M2_dpmpyuu_acc_s0 - (S2_lsr_i_p - (A2_addp - (M2_dpmpyuu_acc_s0 - (S2_lsr_i_p (M2_dpmpyuu_s0 (LoReg $Rss), (LoReg $Rtt)), 32), - (HiReg $Rss), - (LoReg $Rtt)), - (A2_combinew (A2_tfrsi 0), - (LoReg (M2_dpmpyuu_s0 (LoReg $Rss), (HiReg $Rtt))))), - 32), - (HiReg $Rss), - (HiReg $Rtt)), - (S2_lsr_i_p (M2_dpmpyuu_s0 (LoReg $Rss), (HiReg $Rtt)), 32))>; - -// Multiply 64-bit unsigned and use upper result. -def : Pat <(mulhu I64:$Rss, I64:$Rtt), (MulHU $Rss, $Rtt)>; - -// Multiply 64-bit signed and use upper result. -// -// For two signed 64-bit integers A and B, let A' and B' denote A and B -// with the sign bit cleared. Then A = -2^63*s(A) + A', where s(A) is the -// sign bit of A (and identically for B). With this notation, the signed -// product A*B can be written as: -// AB = (-2^63 s(A) + A') * (-2^63 s(B) + B') -// = 2^126 s(A)s(B) - 2^63 [s(A)B'+s(B)A'] + A'B' -// = 2^126 s(A)s(B) + 2^63 [s(A)B'+s(B)A'] + A'B' - 2*2^63 [s(A)B'+s(B)A'] -// = (unsigned product AB) - 2^64 [s(A)B'+s(B)A'] - -def : Pat <(mulhs I64:$Rss, I64:$Rtt), - (A2_subp - (MulHU $Rss, $Rtt), - (A2_addp - (A2_andp (S2_asr_i_p $Rss, 63), (ClearSign $Rtt)), - (A2_andp (S2_asr_i_p $Rtt, 63), (ClearSign $Rss))))>; - -// Hexagon specific ISD nodes. -def SDTHexagonALLOCA : SDTypeProfile<1, 2, - [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>; -def HexagonALLOCA : SDNode<"HexagonISD::ALLOCA", SDTHexagonALLOCA, - [SDNPHasChain]>; - // The reason for the custom inserter is to record all ALLOCA instructions // in MachineFunctionInfo. let Defs = [R29], isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 1 in def PS_alloca: ALU32Inst<(outs IntRegs:$Rd), - (ins IntRegs:$Rs, u32_0Imm:$A), "", - [(set (i32 IntRegs:$Rd), - (HexagonALLOCA (i32 IntRegs:$Rs), (i32 imm:$A)))]>; + (ins IntRegs:$Rs, u32_0Imm:$A), "", []>; let isCodeGenOnly = 1, isPseudo = 1, Uses = [R30], hasSideEffects = 0 in def PS_aligna : ALU32Inst<(outs IntRegs:$Rd), (ins u32_0Imm:$A), "", []>; -def HexagonJT: SDNode<"HexagonISD::JT", SDTIntUnaryOp>; -def HexagonCP: SDNode<"HexagonISD::CP", SDTIntUnaryOp>; - -def: Pat<(HexagonJT tjumptable:$dst), (A2_tfrsi s16_0Ext:$dst)>; -def: Pat<(HexagonCP tconstpool:$dst), (A2_tfrsi s16_0Ext:$dst)>; - // XTYPE/SHIFT // //===----------------------------------------------------------------------===// @@ -5103,10 +4231,7 @@ class T_shift_imm_acc_r majOp, bits<2> minOp> : SInst_acc<(outs IntRegs:$Rx), (ins IntRegs:$src1, IntRegs:$Rs, u5_0Imm:$u5), - "$Rx "#opc2#opc1#"($Rs, #$u5)", - [(set (i32 IntRegs:$Rx), - (OpNode2 (i32 IntRegs:$src1), - (OpNode1 (i32 IntRegs:$Rs), u5_0ImmPred:$u5)))], + "$Rx "#opc2#opc1#"($Rs, #$u5)", [], "$src1 = $Rx", S_2op_tc_2_SLOT23> { bits<5> Rx; bits<5> Rs; @@ -5133,10 +4258,7 @@ class T_shift_reg_acc_r majOp, bits<2> minOp> : SInst_acc<(outs IntRegs:$Rx), (ins IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt), - "$Rx "#opc2#opc1#"($Rs, $Rt)", - [(set (i32 IntRegs:$Rx), - (OpNode2 (i32 IntRegs:$src1), - (OpNode1 (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))], + "$Rx "#opc2#opc1#"($Rs, $Rt)", [], "$src1 = $Rx", S_3op_tc_2_SLOT23 > { bits<5> Rx; bits<5> Rs; @@ -5160,10 +4282,7 @@ class T_shift_imm_acc_p majOp, bits<2> minOp> : SInst_acc<(outs DoubleRegs:$Rxx), (ins DoubleRegs:$src1, DoubleRegs:$Rss, u6_0Imm:$u6), - "$Rxx "#opc2#opc1#"($Rss, #$u6)", - [(set (i64 DoubleRegs:$Rxx), - (OpNode2 (i64 DoubleRegs:$src1), - (OpNode1 (i64 DoubleRegs:$Rss), u6_0ImmPred:$u6)))], + "$Rxx "#opc2#opc1#"($Rss, #$u6)", [], "$src1 = $Rxx", S_2op_tc_2_SLOT23> { bits<5> Rxx; bits<5> Rss; @@ -5190,10 +4309,7 @@ class T_shift_reg_acc_p majOp, bits<2> minOp> : SInst_acc<(outs DoubleRegs:$Rxx), (ins DoubleRegs:$src1, DoubleRegs:$Rss, IntRegs:$Rt), - "$Rxx "#opc2#opc1#"($Rss, $Rt)", - [(set (i64 DoubleRegs:$Rxx), - (OpNode2 (i64 DoubleRegs:$src1), - (OpNode1 (i64 DoubleRegs:$Rss), (i32 IntRegs:$Rt))))], + "$Rxx "#opc2#opc1#"($Rss, $Rt)", [], "$src1 = $Rxx", S_3op_tc_2_SLOT23> { bits<5> Rxx; bits<5> Rss; @@ -5365,9 +4481,7 @@ class T_S3op_3 MajOp, let hasNewValue = 1 in class T_S3op_shift32 MinOp> - : T_S3op_3 ; + : T_S3op_3 ; let hasNewValue = 1, Itinerary = S_3op_tc_2_SLOT23 in class T_S3op_shift32_Sat MinOp> @@ -5375,9 +4489,7 @@ class T_S3op_shift32_Sat MinOp> class T_S3op_shift64 MinOp> - : T_S3op_3 ; + : T_S3op_3 ; class T_S3op_shiftVect MajOp, bits<2> MinOp> @@ -5523,35 +4635,6 @@ def S2_insertp_rp : T_S3op_insert<"insert", DoubleRegs>; def S2_insertp : T_S2op_insert <0b0011, DoubleRegs, u6_0Imm>; -def SDTHexagonINSERT: - SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, - SDTCisInt<0>, SDTCisVT<3, i32>, SDTCisVT<4, i32>]>; -def SDTHexagonINSERTRP: - SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, - SDTCisInt<0>, SDTCisVT<3, i64>]>; - -def HexagonINSERT : SDNode<"HexagonISD::INSERT", SDTHexagonINSERT>; -def HexagonINSERTRP : SDNode<"HexagonISD::INSERTRP", SDTHexagonINSERTRP>; - -def: Pat<(HexagonINSERT I32:$Rs, I32:$Rt, u5_0ImmPred:$u1, u5_0ImmPred:$u2), - (S2_insert I32:$Rs, I32:$Rt, u5_0ImmPred:$u1, u5_0ImmPred:$u2)>; -def: Pat<(HexagonINSERT I64:$Rs, I64:$Rt, u6_0ImmPred:$u1, u6_0ImmPred:$u2), - (S2_insertp I64:$Rs, I64:$Rt, u6_0ImmPred:$u1, u6_0ImmPred:$u2)>; -def: Pat<(HexagonINSERTRP I32:$Rs, I32:$Rt, I64:$Ru), - (S2_insert_rp I32:$Rs, I32:$Rt, I64:$Ru)>; -def: Pat<(HexagonINSERTRP I64:$Rs, I64:$Rt, I64:$Ru), - (S2_insertp_rp I64:$Rs, I64:$Rt, I64:$Ru)>; - -let AddedComplexity = 100 in -def: Pat<(or (or (shl (HexagonINSERT (i32 (zextloadi8 (add I32:$b, 2))), - (i32 (extloadi8 (add I32:$b, 3))), - 24, 8), - (i32 16)), - (shl (i32 (zextloadi8 (add I32:$b, 1))), (i32 8))), - (zextloadi8 I32:$b)), - (A2_swiz (L2_loadri_io I32:$b, 0))>; - - //===----------------------------------------------------------------------===// // Template class for 'extract bitfield' instructions //===----------------------------------------------------------------------===// @@ -5618,29 +4701,6 @@ let hasNewValue = 1 in { def S2_extractu : T_S2op_extract <"extractu", 0b1101, IntRegs, u5_0Imm>; } -def SDTHexagonEXTRACTU: - SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisInt<0>, SDTCisInt<1>, - SDTCisVT<2, i32>, SDTCisVT<3, i32>]>; -def SDTHexagonEXTRACTURP: - SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, SDTCisInt<0>, SDTCisInt<1>, - SDTCisVT<2, i64>]>; - -def HexagonEXTRACTU : SDNode<"HexagonISD::EXTRACTU", SDTHexagonEXTRACTU>; -def HexagonEXTRACTURP : SDNode<"HexagonISD::EXTRACTURP", SDTHexagonEXTRACTURP>; - -def: Pat<(HexagonEXTRACTU I32:$src1, u5_0ImmPred:$src2, u5_0ImmPred:$src3), - (S2_extractu I32:$src1, u5_0ImmPred:$src2, u5_0ImmPred:$src3)>; -def: Pat<(HexagonEXTRACTU I64:$src1, u6_0ImmPred:$src2, u6_0ImmPred:$src3), - (S2_extractup I64:$src1, u6_0ImmPred:$src2, u6_0ImmPred:$src3)>; -def: Pat<(HexagonEXTRACTURP I32:$src1, I64:$src2), - (S2_extractu_rp I32:$src1, I64:$src2)>; -def: Pat<(HexagonEXTRACTURP I64:$src1, I64:$src2), - (S2_extractup_rp I64:$src1, I64:$src2)>; - -// Change the sign of the immediate for Rd=-mpyi(Rs,#u8) -def: Pat<(mul (i32 IntRegs:$src1), (ineg n8_0ImmPred:$src2)), - (M2_mpysin IntRegs:$src1, u8_0ImmPred:$src2)>; - //===----------------------------------------------------------------------===// // :raw for of tableindx[bdhw] insns //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfoV3.td b/llvm/lib/Target/Hexagon/HexagonInstrInfoV3.td index 8b8318b..225f944 100644 --- a/llvm/lib/Target/Hexagon/HexagonInstrInfoV3.td +++ b/llvm/lib/Target/Hexagon/HexagonInstrInfoV3.td @@ -11,12 +11,6 @@ // //===----------------------------------------------------------------------===// -def callv3 : SDNode<"HexagonISD::CALL", SDT_SPCall, - [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; - -def callv3nr : SDNode<"HexagonISD::CALLnr", SDT_SPCall, - [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; - //===----------------------------------------------------------------------===// // J + //===----------------------------------------------------------------------===// @@ -107,9 +101,7 @@ def A2_addsph : T_ALU64_addsp_hl<":raw:hi", 0b111>; let hasSideEffects = 0, isAsmParserOnly = 1 in def A2_addsp : ALU64_rr<(outs DoubleRegs:$Rd), - (ins IntRegs:$Rs, DoubleRegs:$Rt), "$Rd = add($Rs, $Rt)", - [(set (i64 DoubleRegs:$Rd), (i64 (add (i64 (sext (i32 IntRegs:$Rs))), - (i64 DoubleRegs:$Rt))))], + (ins IntRegs:$Rs, DoubleRegs:$Rt), "$Rd = add($Rs, $Rt)", [], "", ALU64_tc_1_SLOT23>; @@ -139,60 +131,10 @@ def A2_minup : T_XTYPE_MIN_MAX_P<0, 1>; def A2_maxp : T_XTYPE_MIN_MAX_P<1, 0>; def A2_maxup : T_XTYPE_MIN_MAX_P<1, 1>; -multiclass MinMax_pats_p { - defm: T_MinMax_pats; -} - -let AddedComplexity = 200 in { - defm: MinMax_pats_p; - defm: MinMax_pats_p; - defm: MinMax_pats_p; - defm: MinMax_pats_p; - defm: MinMax_pats_p; - defm: MinMax_pats_p; - defm: MinMax_pats_p; - defm: MinMax_pats_p; -} - //===----------------------------------------------------------------------===// // ALU64/ALU - //===----------------------------------------------------------------------===// - - - -//def : Pat <(brcond (i1 (seteq (i32 IntRegs:$src1), 0)), bb:$offset), -// (JMP_RegEzt (i32 IntRegs:$src1), bb:$offset)>; - -//def : Pat <(brcond (i1 (setne (i32 IntRegs:$src1), 0)), bb:$offset), -// (JMP_RegNzt (i32 IntRegs:$src1), bb:$offset)>; - -//def : Pat <(brcond (i1 (setle (i32 IntRegs:$src1), 0)), bb:$offset), -// (JMP_RegLezt (i32 IntRegs:$src1), bb:$offset)>; - -//def : Pat <(brcond (i1 (setge (i32 IntRegs:$src1), 0)), bb:$offset), -// (JMP_RegGezt (i32 IntRegs:$src1), bb:$offset)>; - -//def : Pat <(brcond (i1 (setgt (i32 IntRegs:$src1), -1)), bb:$offset), -// (JMP_RegGezt (i32 IntRegs:$src1), bb:$offset)>; - -// Map call instruction -def : Pat<(callv3 I32:$dst), - (J2_callr I32:$dst)>; -def : Pat<(callv3 tglobaladdr:$dst), - (J2_call tglobaladdr:$dst)>; -def : Pat<(callv3 texternalsym:$dst), - (J2_call texternalsym:$dst)>; -def : Pat<(callv3 tglobaltlsaddr:$dst), - (J2_call tglobaltlsaddr:$dst)>; - -def : Pat<(callv3nr I32:$dst), - (PS_callr_nr I32:$dst)>; -def : Pat<(callv3nr tglobaladdr:$dst), - (PS_call_nr tglobaladdr:$dst)>; -def : Pat<(callv3nr texternalsym:$dst), - (PS_call_nr texternalsym:$dst)>; - //===----------------------------------------------------------------------===// // :raw form of vrcmpys:hi/lo insns //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfoV4.td b/llvm/lib/Target/Hexagon/HexagonInstrInfoV4.td index 0880d5e..5369f5f 100644 --- a/llvm/lib/Target/Hexagon/HexagonInstrInfoV4.td +++ b/llvm/lib/Target/Hexagon/HexagonInstrInfoV4.td @@ -30,9 +30,6 @@ def DuplexIClassD: InstDuplex < 0xD >; def DuplexIClassE: InstDuplex < 0xE >; def DuplexIClassF: InstDuplex < 0xF >; -def addrga: PatLeaf<(i32 AddrGA:$Addr)>; -def addrgp: PatLeaf<(i32 AddrGP:$Addr)>; - let hasSideEffects = 0 in class T_Immext : EXTENDERInst<(outs), (ins ImmType:$imm), @@ -53,14 +50,6 @@ let isCodeGenOnly = 1 in { def A4_ext_g : T_Immext; } -def BITPOS32 : SDNodeXFormgetSExtValue(); - return XformMskToBitPosU5Imm(imm, SDLoc(N)); -}]>; - - // Hexagon V4 Architecture spec defines 8 instruction classes: // LD ST ALU32 XTYPE J JR MEMOP NV CR SYSTEM(system is not implemented in the // compiler) @@ -145,24 +134,6 @@ def C4_cmpneq : T_ALU32_3op_cmp<"!cmp.eq", 0b00, 1, 1>; def C4_cmplte : T_ALU32_3op_cmp<"!cmp.gt", 0b10, 1, 0>; def C4_cmplteu : T_ALU32_3op_cmp<"!cmp.gtu", 0b11, 1, 0>; -// Pats for instruction selection. - -// A class to embed the usual comparison patfrags within a zext to i32. -// The seteq/setne frags use "lhs" and "rhs" as operands, so use the same -// names, or else the frag's "body" won't match the operands. -class CmpInReg - : PatFrag<(ops node:$lhs, node:$rhs),(i32 (zext (i1 Op.Fragment)))>; - -def: T_cmp32_rr_pat, i32>; -def: T_cmp32_rr_pat, i32>; - -def: T_cmp32_rr_pat; -def: T_cmp32_rr_pat; -def: T_cmp32_rr_pat; - -def: T_cmp32_rr_pat, i1>; -def: T_cmp32_rr_pat, i1>; - class T_CMP_rrbh MinOp, bit IsComm> : SInst<(outs PredRegs:$Pd), (ins IntRegs:$Rs, IntRegs:$Rt), "$Pd = "#mnemonic#"($Rs, $Rt)", [], "", S_3op_tc_2early_SLOT23>, @@ -192,21 +163,6 @@ def A4_cmpheq : T_CMP_rrbh<"cmph.eq", 0b011, 1>; def A4_cmphgt : T_CMP_rrbh<"cmph.gt", 0b100, 0>; def A4_cmphgtu : T_CMP_rrbh<"cmph.gtu", 0b101, 0>; -let AddedComplexity = 100 in { - def: Pat<(i1 (seteq (and (xor (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)), - 255), 0)), - (A4_cmpbeq IntRegs:$Rs, IntRegs:$Rt)>; - def: Pat<(i1 (setne (and (xor (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)), - 255), 0)), - (C2_not (A4_cmpbeq IntRegs:$Rs, IntRegs:$Rt))>; - def: Pat<(i1 (seteq (and (xor (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)), - 65535), 0)), - (A4_cmpheq IntRegs:$Rs, IntRegs:$Rt)>; - def: Pat<(i1 (setne (and (xor (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)), - 65535), 0)), - (C2_not (A4_cmpheq IntRegs:$Rs, IntRegs:$Rt))>; -} - class T_CMP_ribh MajOp, bit IsHalf, bit IsComm, Operand ImmType, bit IsImmExt, bit IsImmSigned, int ImmBits> : ALU64Inst<(outs PredRegs:$Pd), (ins IntRegs:$Rs, ImmType:$Imm), @@ -272,16 +228,6 @@ class T_RCMP_EQ_ri def A4_rcmpeqi : T_RCMP_EQ_ri<"cmp.eq", 0>; def A4_rcmpneqi : T_RCMP_EQ_ri<"!cmp.eq", 1>; -def: Pat<(i32 (zext (i1 (seteq (i32 IntRegs:$Rs), s32_0ImmPred:$s8)))), - (A4_rcmpeqi IntRegs:$Rs, s32_0ImmPred:$s8)>; -def: Pat<(i32 (zext (i1 (setne (i32 IntRegs:$Rs), s32_0ImmPred:$s8)))), - (A4_rcmpneqi IntRegs:$Rs, s32_0ImmPred:$s8)>; - -// Preserve the S2_tstbit_r generation -def: Pat<(i32 (zext (i1 (setne (i32 (and (i32 (shl 1, (i32 IntRegs:$src2))), - (i32 IntRegs:$src1))), 0)))), - (C2_muxii (S2_tstbit_r IntRegs:$src1, IntRegs:$src2), 1, 0)>; - //===----------------------------------------------------------------------===// // ALU32 - //===----------------------------------------------------------------------===// @@ -317,16 +263,6 @@ let opExtendable = 1 in def A4_combineir : T_Combine1<0b01, (ins s8_0Ext:$s8, IntRegs:$Rs), "$Rdd = combine(#$s8, $Rs)">; -// The complexity of the combines involving immediates should be greater -// than the complexity of the combine with two registers. -let AddedComplexity = 50 in { -def: Pat<(HexagonCOMBINE IntRegs:$r, s32_0ImmPred:$i), - (A4_combineri IntRegs:$r, s32_0ImmPred:$i)>; - -def: Pat<(HexagonCOMBINE s32_0ImmPred:$i, IntRegs:$r), - (A4_combineir s32_0ImmPred:$i, IntRegs:$r)>; -} - // A4_combineii: Set two small immediates. let hasSideEffects = 0, isExtendable = 1, opExtentBits = 6, opExtendable = 2 in def A4_combineii: ALU32Inst<(outs DoubleRegs:$Rdd), (ins s8_0Imm:$s8, u6_0Ext:$U6), @@ -343,12 +279,6 @@ def A4_combineii: ALU32Inst<(outs DoubleRegs:$Rdd), (ins s8_0Imm:$s8, u6_0Ext:$U let Inst{4-0} = Rdd; } -// The complexity of the combine with two immediates should be greater than -// the complexity of a combine involving a register. -let AddedComplexity = 75 in -def: Pat<(HexagonCOMBINE s8_0ImmPred:$s8, u32_0ImmPred:$u6), - (A4_combineii imm:$s8, imm:$u6)>; - //===----------------------------------------------------------------------===// // ALU32/PERM - //===----------------------------------------------------------------------===// @@ -357,39 +287,6 @@ def: Pat<(HexagonCOMBINE s8_0ImmPred:$s8, u32_0ImmPred:$u6), // LD + //===----------------------------------------------------------------------===// -def Zext64: OutPatFrag<(ops node:$Rs), - (i64 (A4_combineir 0, (i32 $Rs)))>; -def Sext64: OutPatFrag<(ops node:$Rs), - (i64 (A2_sxtw (i32 $Rs)))>; - -// Patterns to generate indexed loads with different forms of the address: -// - frameindex, -// - base + offset, -// - base (without offset). -multiclass Loadxm_pat { - def: Pat<(VT (Load AddrFI:$fi)), - (VT (ValueMod (MI AddrFI:$fi, 0)))>; - def: Pat<(VT (Load (add AddrFI:$fi, ImmPred:$Off))), - (VT (ValueMod (MI AddrFI:$fi, imm:$Off)))>; - def: Pat<(VT (Load (add IntRegs:$Rs, ImmPred:$Off))), - (VT (ValueMod (MI IntRegs:$Rs, imm:$Off)))>; - def: Pat<(VT (Load (i32 IntRegs:$Rs))), - (VT (ValueMod (MI IntRegs:$Rs, 0)))>; -} - -defm: Loadxm_pat; -defm: Loadxm_pat; -defm: Loadxm_pat; -defm: Loadxm_pat; -defm: Loadxm_pat; -defm: Loadxm_pat; -defm: Loadxm_pat; -defm: Loadxm_pat; - -// Map Rdd = anyext(Rs) -> Rdd = combine(#0, Rs). -def: Pat<(i64 (anyext (i32 IntRegs:$src1))), (Zext64 IntRegs:$src1)>; - //===----------------------------------------------------------------------===// // Template class for load instructions with Absolute set addressing mode. //===----------------------------------------------------------------------===// @@ -497,42 +394,6 @@ let accessSize = DoubleWordAccess in def L4_loadrd_ur : T_LoadAbsReg<"memd", "LDrid", DoubleRegs, 0b1110>; -multiclass T_LoadAbsReg_Pat { - def : Pat <(VT (ldOp (add (shl IntRegs:$src1, u2_0ImmPred:$src2), - (HexagonCONST32 tglobaladdr:$src3)))), - (MI IntRegs:$src1, u2_0ImmPred:$src2, tglobaladdr:$src3)>; - def : Pat <(VT (ldOp (add IntRegs:$src1, - (HexagonCONST32 tglobaladdr:$src2)))), - (MI IntRegs:$src1, 0, tglobaladdr:$src2)>; - - def : Pat <(VT (ldOp (add (shl IntRegs:$src1, u2_0ImmPred:$src2), - (HexagonCONST32 tconstpool:$src3)))), - (MI IntRegs:$src1, u2_0ImmPred:$src2, tconstpool:$src3)>; - def : Pat <(VT (ldOp (add IntRegs:$src1, - (HexagonCONST32 tconstpool:$src2)))), - (MI IntRegs:$src1, 0, tconstpool:$src2)>; - - def : Pat <(VT (ldOp (add (shl IntRegs:$src1, u2_0ImmPred:$src2), - (HexagonCONST32 tjumptable:$src3)))), - (MI IntRegs:$src1, u2_0ImmPred:$src2, tjumptable:$src3)>; - def : Pat <(VT (ldOp (add IntRegs:$src1, - (HexagonCONST32 tjumptable:$src2)))), - (MI IntRegs:$src1, 0, tjumptable:$src2)>; -} - -let AddedComplexity = 60 in { -defm : T_LoadAbsReg_Pat ; -defm : T_LoadAbsReg_Pat ; -defm : T_LoadAbsReg_Pat ; - -defm : T_LoadAbsReg_Pat ; -defm : T_LoadAbsReg_Pat ; -defm : T_LoadAbsReg_Pat ; - -defm : T_LoadAbsReg_Pat ; -defm : T_LoadAbsReg_Pat ; -} - //===----------------------------------------------------------------------===// // Template classes for the non-predicated load instructions with // base + register offset addressing mode @@ -630,50 +491,6 @@ defm loadri : ld_idxd_shl<"memw", "LDriw", IntRegs, 0b100>; let accessSize = DoubleWordAccess in defm loadrd : ld_idxd_shl<"memd", "LDrid", DoubleRegs, 0b110>; -// 'def pats' for load instructions with base + register offset and non-zero -// immediate value. Immediate value is used to left-shift the second -// register operand. -class Loadxs_pat - : Pat<(VT (Load (add (i32 IntRegs:$Rs), - (i32 (shl (i32 IntRegs:$Rt), u2_0ImmPred:$u2))))), - (VT (MI IntRegs:$Rs, IntRegs:$Rt, imm:$u2))>; - -let AddedComplexity = 40 in { - def: Loadxs_pat; - def: Loadxs_pat; - def: Loadxs_pat; - def: Loadxs_pat; - def: Loadxs_pat; - def: Loadxs_pat; - def: Loadxs_pat; - def: Loadxs_pat; -} - -// 'def pats' for load instruction base + register offset and -// zero immediate value. -class Loadxs_simple_pat - : Pat<(VT (Load (add (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)))), - (VT (MI IntRegs:$Rs, IntRegs:$Rt, 0))>; - -let AddedComplexity = 20 in { - def: Loadxs_simple_pat; - def: Loadxs_simple_pat; - def: Loadxs_simple_pat; - def: Loadxs_simple_pat; - def: Loadxs_simple_pat; - def: Loadxs_simple_pat; - def: Loadxs_simple_pat; - def: Loadxs_simple_pat; -} - -// zext i1->i64 -def: Pat<(i64 (zext (i1 PredRegs:$src1))), - (Zext64 (C2_muxii PredRegs:$src1, 1, 0))>; - -// zext i32->i64 -def: Pat<(i64 (zext (i32 IntRegs:$src1))), - (Zext64 IntRegs:$src1)>; - //===----------------------------------------------------------------------===// // LD - //===----------------------------------------------------------------------===// @@ -796,29 +613,6 @@ def S4_storeri_ur : T_StoreAbsReg <"memw", "STriw", IntRegs, 0b100, WordAccess>; def S4_storerd_ur : T_StoreAbsReg <"memd", "STrid", DoubleRegs, 0b110, DoubleWordAccess>; -let AddedComplexity = 40 in -multiclass T_StoreAbsReg_Pats { - def : Pat<(stOp (VT RC:$src4), - (add (shl (i32 IntRegs:$src1), u2_0ImmPred:$src2), - u32_0ImmPred:$src3)), - (MI IntRegs:$src1, u2_0ImmPred:$src2, u32_0ImmPred:$src3, RC:$src4)>; - - def : Pat<(stOp (VT RC:$src4), - (add (shl IntRegs:$src1, u2_0ImmPred:$src2), - (HexagonCONST32 tglobaladdr:$src3))), - (MI IntRegs:$src1, u2_0ImmPred:$src2, tglobaladdr:$src3, RC:$src4)>; - - def : Pat<(stOp (VT RC:$src4), - (add IntRegs:$src1, (HexagonCONST32 tglobaladdr:$src3))), - (MI IntRegs:$src1, 0, tglobaladdr:$src3, RC:$src4)>; -} - -defm : T_StoreAbsReg_Pats ; -defm : T_StoreAbsReg_Pats ; -defm : T_StoreAbsReg_Pats ; -defm : T_StoreAbsReg_Pats ; - let mayStore = 1, isNVStore = 1, isExtended = 1, addrMode = BaseLongOffset, opExtentBits = 6, isNewValue = 1, opNewValue = 3, opExtendable = 2 in class T_StoreAbsRegNV MajOp, @@ -1037,54 +831,6 @@ let addrMode = BaseRegOffset, InputType = "reg", hasSideEffects = 0 in { defm storerf: ST_Idxd_shl<"memh", "STrif", IntRegs, 0b011, 1>; } -class Storexs_pat - : Pat<(Store Value:$Ru, (add (i32 IntRegs:$Rs), - (i32 (shl (i32 IntRegs:$Rt), u2_0ImmPred:$u2)))), - (MI IntRegs:$Rs, IntRegs:$Rt, imm:$u2, Value:$Ru)>; - -let AddedComplexity = 40 in { - def: Storexs_pat; - def: Storexs_pat; - def: Storexs_pat; - def: Storexs_pat; -} - -def s30_2ProperPred : PatLeaf<(i32 imm), [{ - int64_t v = (int64_t)N->getSExtValue(); - return isShiftedInt<30,2>(v) && !isShiftedInt<29,3>(v); -}]>; -def RoundTo8 : SDNodeXFormgetSExtValue(); - return CurDAG->getTargetConstant(Imm & -8, SDLoc(N), MVT::i32); -}]>; - -let AddedComplexity = 40 in -def: Pat<(store I64:$Ru, (add I32:$Rs, s30_2ProperPred:$Off)), - (S2_storerd_io (A2_addi I32:$Rs, 4), (RoundTo8 $Off), I64:$Ru)>; - -class Store_rr_pat - : Pat<(Store Value:$Ru, (add I32:$Rs, I32:$Rt)), - (MI IntRegs:$Rs, IntRegs:$Rt, 0, Value:$Ru)>; - -let AddedComplexity = 20 in { - def: Store_rr_pat; - def: Store_rr_pat; - def: Store_rr_pat; - def: Store_rr_pat; -} - - -// memd(Rx++#s4:3)=Rtt -// memd(Rx++#s4:3:circ(Mu))=Rtt -// memd(Rx++I:circ(Mu))=Rtt -// memd(Rx++Mu)=Rtt -// memd(Rx++Mu:brev)=Rtt -// memd(gp+#u16:3)=Rtt - -// Store doubleword conditionally. -// if ([!]Pv[.new]) memd(#u6)=Rtt -// TODO: needs to be implemented. - //===----------------------------------------------------------------------===// // Template class //===----------------------------------------------------------------------===// @@ -1188,126 +934,6 @@ let hasSideEffects = 0, addrMode = BaseImmOffset, defm S4_storeiri : ST_Imm<"memw", "STriw", u6_2Imm, 0b10>; } -def IMM_BYTE : SDNodeXFormgetSExtValue(); - return CurDAG->getTargetConstant(imm, SDLoc(N), MVT::i32); -}]>; - -def IMM_HALF : SDNodeXFormgetSExtValue(); - return CurDAG->getTargetConstant(imm, SDLoc(N), MVT::i32); -}]>; - -def IMM_WORD : SDNodeXFormgetSExtValue(); - return CurDAG->getTargetConstant(imm, SDLoc(N), MVT::i32); -}]>; - -def ToImmByte : OutPatFrag<(ops node:$R), (IMM_BYTE $R)>; -def ToImmHalf : OutPatFrag<(ops node:$R), (IMM_HALF $R)>; -def ToImmWord : OutPatFrag<(ops node:$R), (IMM_WORD $R)>; - -// Emit store-immediate, but only when the stored value will not be constant- -// extended. The reason for that is that there is no pass that can optimize -// constant extenders in store-immediate instructions. In some cases we can -// end up will a number of such stores, all of which store the same extended -// value (e.g. after unrolling a loop that initializes floating point array). - -// Predicates to determine if the 16-bit immediate is expressible as a sign- -// extended 8-bit immediate. Store-immediate-halfword will ignore any bits -// beyond 0..15, so we don't care what is in there. - -def i16in8ImmPred: PatLeaf<(i32 imm), [{ - int64_t v = (int16_t)N->getSExtValue(); - return v == (int64_t)(int8_t)v; -}]>; - -// Predicates to determine if the 32-bit immediate is expressible as a sign- -// extended 8-bit immediate. -def i32in8ImmPred: PatLeaf<(i32 imm), [{ - int64_t v = (int32_t)N->getSExtValue(); - return v == (int64_t)(int8_t)v; -}]>; - - -let AddedComplexity = 40 in { - // Even though the offset is not extendable in the store-immediate, we - // can still generate the fi# in the base address. If the final offset - // is not valid for the instruction, we will replace it with a scratch - // register. -// def: Storexm_fi_pat ; -// def: Storexm_fi_pat ; -// def: Storexm_fi_pat ; - -// defm: Storexm_fi_add_pat ; -// defm: Storexm_fi_add_pat ; -// defm: Storexm_fi_add_pat ; - - defm: Storexm_add_pat; - defm: Storexm_add_pat; - defm: Storexm_add_pat; -} - -def: Storexm_simple_pat; -def: Storexm_simple_pat; -def: Storexm_simple_pat; - -// memb(Rx++#s4:0:circ(Mu))=Rt -// memb(Rx++I:circ(Mu))=Rt -// memb(Rx++Mu)=Rt -// memb(Rx++Mu:brev)=Rt -// memb(gp+#u16:0)=Rt - -// Store halfword. -// TODO: needs to be implemented -// memh(Re=#U6)=Rt.H -// memh(Rs+#s11:1)=Rt.H -// memh(Rs+Ru<<#u2)=Rt.H -// TODO: needs to be implemented. - -// memh(Ru<<#u2+#U6)=Rt.H -// memh(Rx++#s4:1:circ(Mu))=Rt.H -// memh(Rx++#s4:1:circ(Mu))=Rt -// memh(Rx++I:circ(Mu))=Rt.H -// memh(Rx++I:circ(Mu))=Rt -// memh(Rx++Mu)=Rt.H -// memh(Rx++Mu)=Rt -// memh(Rx++Mu:brev)=Rt.H -// memh(Rx++Mu:brev)=Rt -// memh(gp+#u16:1)=Rt -// if ([!]Pv[.new]) memh(#u6)=Rt.H -// if ([!]Pv[.new]) memh(#u6)=Rt - -// if ([!]Pv[.new]) memh(Rs+#u6:1)=Rt.H -// TODO: needs to be implemented. - -// if ([!]Pv[.new]) memh(Rx++#s4:1)=Rt.H -// TODO: Needs to be implemented. - -// Store word. -// memw(Re=#U6)=Rt -// TODO: Needs to be implemented. -// memw(Rx++#s4:2)=Rt -// memw(Rx++#s4:2:circ(Mu))=Rt -// memw(Rx++I:circ(Mu))=Rt -// memw(Rx++Mu)=Rt -// memw(Rx++Mu:brev)=Rt - //===----------------------------------------------------------------------=== // ST - //===----------------------------------------------------------------------=== @@ -1875,48 +1501,6 @@ def C4_and_orn : T_LOGICAL_3OP<"and", "or", 0b01, 1>; def C4_or_andn : T_LOGICAL_3OP<"or", "and", 0b10, 1>; def C4_or_orn : T_LOGICAL_3OP<"or", "or", 0b11, 1>; -// op(Ps, op(Pt, Pu)) -class LogLog_pat - : Pat<(i1 (Op1 I1:$Ps, (Op2 I1:$Pt, I1:$Pu))), - (MI I1:$Ps, I1:$Pt, I1:$Pu)>; - -// op(Ps, op(Pt, ~Pu)) -class LogLogNot_pat - : Pat<(i1 (Op1 I1:$Ps, (Op2 I1:$Pt, (not I1:$Pu)))), - (MI I1:$Ps, I1:$Pt, I1:$Pu)>; - -def: LogLog_pat; -def: LogLog_pat; -def: LogLog_pat; -def: LogLog_pat; - -def: LogLogNot_pat; -def: LogLogNot_pat; -def: LogLogNot_pat; -def: LogLogNot_pat; - -//===----------------------------------------------------------------------===// -// PIC: Support for PIC compilations. The patterns and SD nodes defined -// below are needed to support code generation for PIC -//===----------------------------------------------------------------------===// - -def SDT_HexagonAtGot - : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, i32>]>; -def SDT_HexagonAtPcrel - : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>; - -// AT_GOT address-of-GOT, address-of-global, offset-in-global -def HexagonAtGot : SDNode<"HexagonISD::AT_GOT", SDT_HexagonAtGot>; -// AT_PCREL address-of-global -def HexagonAtPcrel : SDNode<"HexagonISD::AT_PCREL", SDT_HexagonAtPcrel>; - -def: Pat<(HexagonAtGot I32:$got, I32:$addr, (i32 0)), - (L2_loadri_io I32:$got, imm:$addr)>; -def: Pat<(HexagonAtGot I32:$got, I32:$addr, s30_2ImmPred:$off), - (A2_addi (L2_loadri_io I32:$got, imm:$addr), imm:$off)>; -def: Pat<(HexagonAtPcrel I32:$addr), - (C4_addipc imm:$addr)>; - //===----------------------------------------------------------------------===// // CR - //===----------------------------------------------------------------------===// @@ -1929,11 +1513,6 @@ def: Pat<(HexagonAtPcrel I32:$addr), def A4_andnp : T_ALU64_logical<"and", 0b001, 1, 0, 1>; def A4_ornp : T_ALU64_logical<"or", 0b011, 1, 0, 1>; -def: Pat<(i64 (and (i64 DoubleRegs:$Rs), (i64 (not (i64 DoubleRegs:$Rt))))), - (A4_andnp DoubleRegs:$Rs, DoubleRegs:$Rt)>; -def: Pat<(i64 (or (i64 DoubleRegs:$Rs), (i64 (not (i64 DoubleRegs:$Rt))))), - (A4_ornp DoubleRegs:$Rs, DoubleRegs:$Rt)>; - let hasNewValue = 1, hasSideEffects = 0 in def S4_parity: ALU64Inst<(outs IntRegs:$Rd), (ins IntRegs:$Rs, IntRegs:$Rt), "$Rd = parity($Rs, $Rt)", [], "", ALU64_tc_2_SLOT23> { @@ -1954,9 +1533,7 @@ let isExtentSigned = 1, hasNewValue = 1, isExtendable = 1, opExtentBits = 6, opExtendable = 3 in def S4_addaddi : ALU64Inst <(outs IntRegs:$Rd), (ins IntRegs:$Rs, IntRegs:$Ru, s6_0Ext:$s6), - "$Rd = add($Rs, add($Ru, #$s6))" , - [(set (i32 IntRegs:$Rd), (add (i32 IntRegs:$Rs), - (add (i32 IntRegs:$Ru), s32_0ImmPred:$s6)))], + "$Rd = add($Rs, add($Ru, #$s6))" , [], "", ALU64_tc_2_SLOT23> { bits<5> Rd; bits<5> Rs; @@ -1996,34 +1573,6 @@ def S4_subaddi: ALU64Inst <(outs IntRegs:$Rd), let Inst{4-0} = Ru; } -// Rd=add(Rs,sub(#s6,Ru)) -def: Pat<(add (i32 IntRegs:$src1), (sub s32_0ImmPred:$src2, - (i32 IntRegs:$src3))), - (S4_subaddi IntRegs:$src1, s32_0ImmPred:$src2, IntRegs:$src3)>; - -// Rd=sub(add(Rs,#s6),Ru) -def: Pat<(sub (add (i32 IntRegs:$src1), s32_0ImmPred:$src2), - (i32 IntRegs:$src3)), - (S4_subaddi IntRegs:$src1, s32_0ImmPred:$src2, IntRegs:$src3)>; - -// Rd=add(sub(Rs,Ru),#s6) -def: Pat<(add (sub (i32 IntRegs:$src1), (i32 IntRegs:$src3)), - (s32_0ImmPred:$src2)), - (S4_subaddi IntRegs:$src1, s32_0ImmPred:$src2, IntRegs:$src3)>; - - -// Add or subtract doublewords with carry. -//TODO: -// Rdd=add(Rss,Rtt,Px):carry -//TODO: -// Rdd=sub(Rss,Rtt,Px):carry - -// Extract bitfield -// Rdd=extract(Rss,#u6,#U6) -// Rdd=extract(Rss,Rtt) -// Rd=extract(Rs,Rtt) -// Rd=extract(Rs,#u5,#U5) - def S4_extractp_rp : T_S3op_64 < "extract", 0b11, 0b100, 0>; def S4_extractp : T_S2op_extract <"extract", 0b1010, DoubleRegs, u6_0Imm>; @@ -2056,10 +1605,7 @@ let hasSideEffects = 0 in def M4_xor_xacc : SInst <(outs DoubleRegs:$Rxx), (ins DoubleRegs:$dst2, DoubleRegs:$Rss, DoubleRegs:$Rtt), - "$Rxx ^= xor($Rss, $Rtt)", - [(set (i64 DoubleRegs:$Rxx), - (xor (i64 DoubleRegs:$dst2), (xor (i64 DoubleRegs:$Rss), - (i64 DoubleRegs:$Rtt))))], + "$Rxx ^= xor($Rss, $Rtt)", [], "$dst2 = $Rxx", S_3op_tc_1_SLOT23> { bits<5> Rxx; bits<5> Rss; @@ -2160,9 +1706,7 @@ let isExtentSigned = 1, hasNewValue = 1, isExtendable = 1, opExtentBits = 10, def S4_or_andix: ALU64Inst<(outs IntRegs:$Rx), (ins IntRegs:$Ru, IntRegs:$_src_, s10_0Ext:$s10), - "$Rx = or($Ru, and($_src_, #$s10))" , - [(set (i32 IntRegs:$Rx), - (or (i32 IntRegs:$Ru), (and (i32 IntRegs:$_src_), s32_0ImmPred:$s10)))] , + "$Rx = or($Ru, and($_src_, #$s10))" , [] , "$_src_ = $Rx", ALU64_tc_2_SLOT23> { bits<5> Rx; bits<5> Ru; @@ -2281,33 +1825,13 @@ def M4_xor_andn : T_MType_acc_rr < "^= and", 0b001, 0b010, 0, [], 1>; def M4_or_andn : T_MType_acc_rr < "|= and", 0b001, 0b000, 0, [], 1>; def M4_and_andn : T_MType_acc_rr < "&= and", 0b001, 0b001, 0, [], 1>; -def: T_MType_acc_pat2 ; -def: T_MType_acc_pat2 ; -def: T_MType_acc_pat2 ; -def: T_MType_acc_pat2 ; -def: T_MType_acc_pat2 ; -def: T_MType_acc_pat2 ; -def: T_MType_acc_pat2 ; -def: T_MType_acc_pat2 ; - -class T_MType_acc_pat3 - : Pat <(i32 (secOp IntRegs:$src1, (firstOp IntRegs:$src2, - (not IntRegs:$src3)))), - (i32 (MI IntRegs:$src1, IntRegs:$src2, IntRegs:$src3))>; - -def: T_MType_acc_pat3 ; -def: T_MType_acc_pat3 ; -def: T_MType_acc_pat3 ; - // Compound or-or and or-and let isExtentSigned = 1, InputType = "imm", hasNewValue = 1, isExtendable = 1, opExtentBits = 10, opExtendable = 3 in class T_CompOR MajOp, SDNode OpNode> : MInst_acc <(outs IntRegs:$Rx), (ins IntRegs:$src1, IntRegs:$Rs, s10_0Ext:$s10), - "$Rx |= "#mnemonic#"($Rs, #$s10)", - [(set (i32 IntRegs:$Rx), (or (i32 IntRegs:$src1), - (OpNode (i32 IntRegs:$Rs), s32_0ImmPred:$s10)))], + "$Rx |= "#mnemonic#"($Rs, #$s10)", [], "$src1 = $Rx", ALU64_tc_2_SLOT23>, ImmRegRel { bits<5> Rx; bits<5> Rs; @@ -2378,19 +1902,6 @@ def S2_ct0p : T_COUNT_LEADING_64<"ct0", 0b111, 0b010>; def S2_ct1p : T_COUNT_LEADING_64<"ct1", 0b111, 0b100>; def S4_clbpnorm : T_COUNT_LEADING_64<"normamt", 0b011, 0b000>; -// Count trailing zeros: 64-bit. -def: Pat<(i32 (trunc (cttz I64:$Rss))), (S2_ct0p I64:$Rss)>; - -// Count trailing ones: 64-bit. -def: Pat<(i32 (trunc (cttz (not I64:$Rss)))), (S2_ct1p I64:$Rss)>; - -// Define leading/trailing patterns that require zero-extensions to 64 bits. -def: Pat<(i64 (ctlz I64:$Rss)), (Zext64 (S2_cl0p I64:$Rss))>; -def: Pat<(i64 (cttz I64:$Rss)), (Zext64 (S2_ct0p I64:$Rss))>; -def: Pat<(i64 (ctlz (not I64:$Rss))), (Zext64 (S2_cl1p I64:$Rss))>; -def: Pat<(i64 (cttz (not I64:$Rss))), (Zext64 (S2_ct1p I64:$Rss))>; - - let hasSideEffects = 0, hasNewValue = 1 in def S4_clbaddi : SInst<(outs IntRegs:$Rd), (ins IntRegs:$Rs, s6_0Imm:$s6), "$Rd = add(clb($Rs), #$s6)", [], "", S_2op_tc_2_SLOT23> { @@ -2426,41 +1937,10 @@ def S4_clbpaddi : SInst<(outs IntRegs:$Rd), (ins DoubleRegs:$Rs, s6_0Imm:$s6), def S4_ntstbit_i : T_TEST_BIT_IMM<"!tstbit", 0b001>; def S4_ntstbit_r : T_TEST_BIT_REG<"!tstbit", 1>; -let AddedComplexity = 20 in { // Complexity greater than cmp reg-imm. - def: Pat<(i1 (seteq (and (shl 1, u5_0ImmPred:$u5), (i32 IntRegs:$Rs)), 0)), - (S4_ntstbit_i (i32 IntRegs:$Rs), u5_0ImmPred:$u5)>; - def: Pat<(i1 (seteq (and (shl 1, (i32 IntRegs:$Rt)), (i32 IntRegs:$Rs)), 0)), - (S4_ntstbit_r (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))>; -} - -// Add extra complexity to prefer these instructions over bitsset/bitsclr. -// The reason is that tstbit/ntstbit can be folded into a compound instruction: -// if ([!]tstbit(...)) jump ... -let AddedComplexity = 100 in -def: Pat<(i1 (setne (and (i32 IntRegs:$Rs), (i32 Set5ImmPred:$u5)), (i32 0))), - (S2_tstbit_i (i32 IntRegs:$Rs), (BITPOS32 Set5ImmPred:$u5))>; - -let AddedComplexity = 100 in -def: Pat<(i1 (seteq (and (i32 IntRegs:$Rs), (i32 Set5ImmPred:$u5)), (i32 0))), - (S4_ntstbit_i (i32 IntRegs:$Rs), (BITPOS32 Set5ImmPred:$u5))>; - def C4_nbitsset : T_TEST_BITS_REG<"!bitsset", 0b01, 1>; def C4_nbitsclr : T_TEST_BITS_REG<"!bitsclr", 0b10, 1>; def C4_nbitsclri : T_TEST_BITS_IMM<"!bitsclr", 0b10, 1>; -// Do not increase complexity of these patterns. In the DAG, "cmp i8" may be -// represented as a compare against "value & 0xFF", which is an exact match -// for cmpb (same for cmph). The patterns below do not contain any additional -// complexity that would make them preferable, and if they were actually used -// instead of cmpb/cmph, they would result in a compare against register that -// is loaded with the byte/half mask (i.e. 0xFF or 0xFFFF). -def: Pat<(i1 (setne (and I32:$Rs, u6_0ImmPred:$u6), 0)), - (C4_nbitsclri I32:$Rs, u6_0ImmPred:$u6)>; -def: Pat<(i1 (setne (and I32:$Rs, I32:$Rt), 0)), - (C4_nbitsclr I32:$Rs, I32:$Rt)>; -def: Pat<(i1 (setne (and I32:$Rs, I32:$Rt), I32:$Rt)), - (C4_nbitsset I32:$Rs, I32:$Rt)>; - //===----------------------------------------------------------------------===// // XTYPE/BIT - //===----------------------------------------------------------------------===// @@ -2474,10 +1954,7 @@ def: Pat<(i1 (setne (and I32:$Rs, I32:$Rt), I32:$Rt)), let hasNewValue = 1, isExtendable = 1, opExtentBits = 6, opExtendable = 1 in def M4_mpyri_addi : MInst<(outs IntRegs:$Rd), (ins u6_0Ext:$u6, IntRegs:$Rs, u6_0Imm:$U6), - "$Rd = add(#$u6, mpyi($Rs, #$U6))" , - [(set (i32 IntRegs:$Rd), - (add (mul (i32 IntRegs:$Rs), u6_0ImmPred:$U6), - u32_0ImmPred:$u6))] ,"",ALU64_tc_3x_SLOT23> { + "$Rd = add(#$u6, mpyi($Rs, #$U6))" , [],"",ALU64_tc_3x_SLOT23> { bits<5> Rd; bits<6> u6; bits<5> Rs; @@ -2500,10 +1977,7 @@ let CextOpcode = "ADD_MPY", InputType = "imm", hasNewValue = 1, isExtendable = 1, opExtentBits = 6, opExtendable = 1 in def M4_mpyrr_addi : MInst <(outs IntRegs:$Rd), (ins u6_0Ext:$u6, IntRegs:$Rs, IntRegs:$Rt), - "$Rd = add(#$u6, mpyi($Rs, $Rt))" , - [(set (i32 IntRegs:$Rd), - (add (mul (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)), u32_0ImmPred:$u6))], - "", ALU64_tc_3x_SLOT23>, ImmRegRel { + "$Rd = add(#$u6, mpyi($Rs, $Rt))" , [], "", ALU64_tc_3x_SLOT23>, ImmRegRel { bits<5> Rd; bits<6> u6; bits<5> Rs; @@ -2524,9 +1998,7 @@ let hasNewValue = 1 in class T_AddMpy : ALU64Inst <(outs IntRegs:$dst), ins, "$dst = add($src1, mpyi("#!if(MajOp,"$src3, #$src2))", - "#$src2, $src3))"), - [(set (i32 IntRegs:$dst), - (add (i32 IntRegs:$src1), (mul (i32 IntRegs:$src3), ImmPred:$src2)))], + "#$src2, $src3))"), [], "", ALU64_tc_3x_SLOT23> { bits<5> dst; bits<5> src1; @@ -2559,9 +2031,7 @@ def M4_mpyri_addr : T_AddMpy<0b1, u32_0ImmPred, let CextOpcode = "ADD_MPY", InputType = "reg", hasNewValue = 1 in def M4_mpyrr_addr: MInst_acc <(outs IntRegs:$Rx), (ins IntRegs:$Ru, IntRegs:$_src_, IntRegs:$Rs), - "$Rx = add($Ru, mpyi($_src_, $Rs))", - [(set (i32 IntRegs:$Rx), (add (i32 IntRegs:$Ru), - (mul (i32 IntRegs:$_src_), (i32 IntRegs:$Rs))))], + "$Rx = add($Ru, mpyi($_src_, $Rs))", [], "$_src_ = $Rx", M_tc_3x_SLOT23>, ImmRegRel { bits<5> Rx; bits<5> Ru; @@ -2652,7 +2122,6 @@ class T_vcmpImm cmpOp, bits<2> minOp, Operand ImmOprnd> // Vector compare bytes def A4_vcmpbgt : T_vcmp <"vcmpb.gt", 0b1010>; -def: T_vcmp_pat; let AsmString = "$Pd = any8(vcmpb.eq($Rss, $Rtt))" in def A4_vcmpbeq_any : T_vcmp <"any8(vcmpb.gt", 0b1000>; @@ -2714,40 +2183,6 @@ defm S4_andi : T_ShiftOperate<"and", 0b00, ALU64_tc_2_SLOT23>; defm S4_ori : T_ShiftOperate<"or", 0b01, ALU64_tc_1_SLOT23>; defm S4_subi : T_ShiftOperate<"sub", 0b11, ALU64_tc_1_SLOT23>; -class T_Shift_CommOp_pat - : Pat<(Op (ShOp IntRegs:$Rx, u5_0ImmPred:$U5), u32_0ImmPred:$u8), - (MI u32_0ImmPred:$u8, IntRegs:$Rx, u5_0ImmPred:$U5)>; - -let AddedComplexity = 200 in { - def : T_Shift_CommOp_pat ; - def : T_Shift_CommOp_pat ; - def : T_Shift_CommOp_pat ; - def : T_Shift_CommOp_pat ; -} - -let AddedComplexity = 30 in { - def : T_Shift_CommOp_pat ; - def : T_Shift_CommOp_pat ; -} - -class T_Shift_Op_pat - : Pat<(Op u32_0ImmPred:$u8, (ShOp IntRegs:$Rx, u5_0ImmPred:$U5)), - (MI u32_0ImmPred:$u8, IntRegs:$Rx, u5_0ImmPred:$U5)>; - -def : T_Shift_Op_pat ; -def : T_Shift_Op_pat ; - -let AddedComplexity = 200 in { - def: Pat<(add addrga:$addr, (shl I32:$src2, u5_0ImmPred:$src3)), - (S4_addi_asl_ri addrga:$addr, IntRegs:$src2, u5_0ImmPred:$src3)>; - def: Pat<(add addrga:$addr, (srl I32:$src2, u5_0ImmPred:$src3)), - (S4_addi_lsr_ri addrga:$addr, IntRegs:$src2, u5_0ImmPred:$src3)>; - def: Pat<(sub addrga:$addr, (shl I32:$src2, u5_0ImmPred:$src3)), - (S4_subi_asl_ri addrga:$addr, IntRegs:$src2, u5_0ImmPred:$src3)>; - def: Pat<(sub addrga:$addr, (srl I32:$src2, u5_0ImmPred:$src3)), - (S4_subi_lsr_ri addrga:$addr, IntRegs:$src2, u5_0ImmPred:$src3)>; -} - // Vector conditional negate // Rdd=vcnegh(Rss,Rt) let Defs = [USR_OVF], Itinerary = S_3op_tc_2_SLOT23 in @@ -2837,10 +2272,7 @@ def A4_vrminuw : T_S3op_6 < "vrminuw", 0b110, 1>; // Shift an immediate left by register amount. let hasNewValue = 1, hasSideEffects = 0 in def S4_lsli: SInst <(outs IntRegs:$Rd), (ins s6_0Imm:$s6, IntRegs:$Rt), - "$Rd = lsl(#$s6, $Rt)" , - [(set (i32 IntRegs:$Rd), (shl s6_0ImmPred:$s6, - (i32 IntRegs:$Rt)))], - "", S_3op_tc_1_SLOT23> { + "$Rd = lsl(#$s6, $Rt)" , [], "", S_3op_tc_1_SLOT23> { bits<5> Rd; bits<6> s6; bits<5> Rt; @@ -2863,71 +2295,6 @@ def S4_lsli: SInst <(outs IntRegs:$Rd), (ins s6_0Imm:$s6, IntRegs:$Rt), // MEMOP //===----------------------------------------------------------------------===// -def m5_0Imm8Pred : PatLeaf<(i32 imm), [{ - int8_t v = (int8_t)N->getSExtValue(); - return v > -32 && v <= -1; -}]>; - -def m5_0Imm16Pred : PatLeaf<(i32 imm), [{ - int16_t v = (int16_t)N->getSExtValue(); - return v > -32 && v <= -1; -}]>; - -def Clr5Imm8Pred : PatLeaf<(i32 imm), [{ - uint32_t v = (uint8_t)~N->getZExtValue(); - return ImmIsSingleBit(v); -}]>; - -def Clr5Imm16Pred : PatLeaf<(i32 imm), [{ - uint32_t v = (uint16_t)~N->getZExtValue(); - return ImmIsSingleBit(v); -}]>; - -def Set5Imm8 : SDNodeXFormgetZExtValue(); - return XformMskToBitPosU5Imm(imm, SDLoc(N)); -}]>; - -def Set5Imm16 : SDNodeXFormgetZExtValue(); - return XformMskToBitPosU5Imm(imm, SDLoc(N)); -}]>; - -def Set5Imm32 : SDNodeXFormgetZExtValue(); - return XformMskToBitPosU5Imm(imm, SDLoc(N)); -}]>; - -def Clr5Imm8 : SDNodeXFormgetZExtValue(); - return XformMskToBitPosU5Imm(imm, SDLoc(N)); -}]>; - -def Clr5Imm16 : SDNodeXFormgetZExtValue(); - return XformMskToBitPosU5Imm(imm, SDLoc(N)); -}]>; - -def Clr5Imm32 : SDNodeXFormgetZExtValue(); - return XformMskToBitPosU5Imm(imm, SDLoc(N)); -}]>; - -def NegImm8 : SDNodeXFormgetSExtValue(); - return CurDAG->getTargetConstant(-V, SDLoc(N), MVT::i32); -}]>; - -def NegImm16 : SDNodeXFormgetSExtValue(); - return CurDAG->getTargetConstant(-V, SDLoc(N), MVT::i32); -}]>; - -def NegImm32 : SDNodeXFormgetTargetConstant(-N->getSExtValue(), SDLoc(N), MVT::i32); -}]>; - -def IdImm : SDNodeXForm; //===----------------------------------------------------------------------===// // Template class for MemOp instructions with the register value. @@ -3026,235 +2393,6 @@ let isExtendable = 1, opExtendable = 1, isExtentSigned = 0 in { } -multiclass Memopxr_simple_pat { - // Addr: i32 - def: Pat<(Store (Oper (Load I32:$Rs), I32:$A), I32:$Rs), - (MI I32:$Rs, 0, I32:$A)>; - // Addr: fi - def: Pat<(Store (Oper (Load AddrFI:$Rs), I32:$A), AddrFI:$Rs), - (MI AddrFI:$Rs, 0, I32:$A)>; -} - -multiclass Memopxr_add_pat { - // Addr: i32 - def: Pat<(Store (Oper (Load (add I32:$Rs, ImmPred:$Off)), I32:$A), - (add I32:$Rs, ImmPred:$Off)), - (MI I32:$Rs, imm:$Off, I32:$A)>; - def: Pat<(Store (Oper (Load (orisadd I32:$Rs, ImmPred:$Off)), I32:$A), - (orisadd I32:$Rs, ImmPred:$Off)), - (MI I32:$Rs, imm:$Off, I32:$A)>; - // Addr: fi - def: Pat<(Store (Oper (Load (add AddrFI:$Rs, ImmPred:$Off)), I32:$A), - (add AddrFI:$Rs, ImmPred:$Off)), - (MI AddrFI:$Rs, imm:$Off, I32:$A)>; - def: Pat<(Store (Oper (Load (orisadd AddrFI:$Rs, ImmPred:$Off)), I32:$A), - (orisadd AddrFI:$Rs, ImmPred:$Off)), - (MI AddrFI:$Rs, imm:$Off, I32:$A)>; -} - -multiclass Memopxr_pat { - defm: Memopxr_simple_pat ; - defm: Memopxr_add_pat ; -} - -let AddedComplexity = 180 in { - // add reg - defm: Memopxr_pat; - defm: Memopxr_pat; - defm: Memopxr_pat; - defm: Memopxr_pat; - defm: Memopxr_pat; - defm: Memopxr_pat; - defm: Memopxr_pat; - - // sub reg - defm: Memopxr_pat; - defm: Memopxr_pat; - defm: Memopxr_pat; - defm: Memopxr_pat; - defm: Memopxr_pat; - defm: Memopxr_pat; - defm: Memopxr_pat; - - // and reg - defm: Memopxr_pat; - defm: Memopxr_pat; - defm: Memopxr_pat; - defm: Memopxr_pat; - defm: Memopxr_pat; - defm: Memopxr_pat; - defm: Memopxr_pat; - - // or reg - defm: Memopxr_pat; - defm: Memopxr_pat; - defm: Memopxr_pat; - defm: Memopxr_pat; - defm: Memopxr_pat; - defm: Memopxr_pat; - defm: Memopxr_pat; -} - - -multiclass Memopxi_simple_pat { - // Addr: i32 - def: Pat<(Store (Oper (Load I32:$Rs), Arg:$A), I32:$Rs), - (MI I32:$Rs, 0, (ArgMod Arg:$A))>; - // Addr: fi - def: Pat<(Store (Oper (Load AddrFI:$Rs), Arg:$A), AddrFI:$Rs), - (MI AddrFI:$Rs, 0, (ArgMod Arg:$A))>; -} - -multiclass Memopxi_add_pat { - // Addr: i32 - def: Pat<(Store (Oper (Load (add I32:$Rs, ImmPred:$Off)), Arg:$A), - (add I32:$Rs, ImmPred:$Off)), - (MI I32:$Rs, imm:$Off, (ArgMod Arg:$A))>; - def: Pat<(Store (Oper (Load (orisadd I32:$Rs, ImmPred:$Off)), Arg:$A), - (orisadd I32:$Rs, ImmPred:$Off)), - (MI I32:$Rs, imm:$Off, (ArgMod Arg:$A))>; - // Addr: fi - def: Pat<(Store (Oper (Load (add AddrFI:$Rs, ImmPred:$Off)), Arg:$A), - (add AddrFI:$Rs, ImmPred:$Off)), - (MI AddrFI:$Rs, imm:$Off, (ArgMod Arg:$A))>; - def: Pat<(Store (Oper (Load (orisadd AddrFI:$Rs, ImmPred:$Off)), Arg:$A), - (orisadd AddrFI:$Rs, ImmPred:$Off)), - (MI AddrFI:$Rs, imm:$Off, (ArgMod Arg:$A))>; -} - -multiclass Memopxi_pat { - defm: Memopxi_simple_pat ; - defm: Memopxi_add_pat ; -} - - -let AddedComplexity = 200 in { - // add imm - defm: Memopxi_pat; - defm: Memopxi_pat; - defm: Memopxi_pat; - defm: Memopxi_pat; - defm: Memopxi_pat; - defm: Memopxi_pat; - defm: Memopxi_pat; - defm: Memopxi_pat; - defm: Memopxi_pat; - defm: Memopxi_pat; - defm: Memopxi_pat; - defm: Memopxi_pat; - defm: Memopxi_pat; - defm: Memopxi_pat; - - // sub imm - defm: Memopxi_pat; - defm: Memopxi_pat; - defm: Memopxi_pat; - defm: Memopxi_pat; - defm: Memopxi_pat; - defm: Memopxi_pat; - defm: Memopxi_pat; - defm: Memopxi_pat; - defm: Memopxi_pat; - defm: Memopxi_pat; - defm: Memopxi_pat; - defm: Memopxi_pat; - defm: Memopxi_pat; - defm: Memopxi_pat; - - // clrbit imm - defm: Memopxi_pat; - defm: Memopxi_pat; - defm: Memopxi_pat; - defm: Memopxi_pat; - defm: Memopxi_pat; - defm: Memopxi_pat; - defm: Memopxi_pat; - - // setbit imm - defm: Memopxi_pat; - defm: Memopxi_pat; - defm: Memopxi_pat; - defm: Memopxi_pat; - defm: Memopxi_pat; - defm: Memopxi_pat; - defm: Memopxi_pat; -} - //===----------------------------------------------------------------------===// // XTYPE/PRED + //===----------------------------------------------------------------------===// @@ -3275,54 +2413,6 @@ def C4_cmpneqi : T_CMP <"cmp.eq", 0b00, 1, s10_0Ext>; def C4_cmpltei : T_CMP <"cmp.gt", 0b01, 1, s10_0Ext>; def C4_cmplteui : T_CMP <"cmp.gtu", 0b10, 1, u9_0Ext>; -def : T_CMP_pat ; -def : T_CMP_pat ; -def : T_CMP_pat ; - -// rs <= rt -> !(rs > rt). -/* -def: Pat<(i1 (setle (i32 IntRegs:$src1), s32_0ImmPred:$src2)), - (C2_not (C2_cmpgti IntRegs:$src1, s32_0ImmPred:$src2))>; -// (C4_cmpltei IntRegs:$src1, s32_0ImmPred:$src2)>; -*/ -// Map cmplt(Rs, Imm) -> !cmpgt(Rs, Imm-1). -def: Pat<(i1 (setlt (i32 IntRegs:$src1), s32_0ImmPred:$src2)), - (C4_cmpltei IntRegs:$src1, (DEC_CONST_SIGNED s32_0ImmPred:$src2))>; - -// rs != rt -> !(rs == rt). -def: Pat<(i1 (setne (i32 IntRegs:$src1), s32_0ImmPred:$src2)), - (C4_cmpneqi IntRegs:$src1, s32_0ImmPred:$src2)>; - -// SDNode for converting immediate C to C-1. -def DEC_CONST_BYTE : SDNodeXFormgetSExtValue(); - return XformU7ToU7M1Imm(imm, SDLoc(N)); -}]>; - -// For the sequence -// zext( setult ( and(Rs, 255), u8)) -// Use the isdigit transformation below - -// Generate code of the form 'C2_muxii(cmpbgtui(Rdd, C-1),0,1)' -// for C code of the form r = ((c>='0') & (c<='9')) ? 1 : 0;. -// The isdigit transformation relies on two 'clever' aspects: -// 1) The data type is unsigned which allows us to eliminate a zero test after -// biasing the expression by 48. We are depending on the representation of -// the unsigned types, and semantics. -// 2) The front end has converted <= 9 into < 10 on entry to LLVM -// -// For the C code: -// retval = ((c>='0') & (c<='9')) ? 1 : 0; -// The code is transformed upstream of llvm into -// retval = (c-48) < 10 ? 1 : 0; -let AddedComplexity = 139 in -def: Pat<(i32 (zext (i1 (setult (i32 (and (i32 IntRegs:$src1), 255)), - u7_0StrictPosImmPred:$src2)))), - (C2_muxii (A4_cmpbgtui IntRegs:$src1, - (DEC_CONST_BYTE u7_0StrictPosImmPred:$src2)), - 0, 1)>; - //===----------------------------------------------------------------------===// // XTYPE/PRED - //===----------------------------------------------------------------------===// @@ -3717,50 +2807,6 @@ let isNVStorable = 0, accessSize = HalfWordAccess in def S2_storerfgp : T_StoreGP <"memh", "STrif", IntRegs, u16_1Imm, 0b01, 1>, PredNewRel; -class Loada_pat - : Pat<(VT (Load Addr:$addr)), (MI Addr:$addr)>; - -class Loadam_pat - : Pat<(VT (Load Addr:$addr)), (ValueMod (MI Addr:$addr))>; - -class Storea_pat - : Pat<(Store Value:$val, Addr:$addr), (MI Addr:$addr, Value:$val)>; - -class Stoream_pat - : Pat<(Store Value:$val, Addr:$addr), - (MI Addr:$addr, (ValueMod Value:$val))>; - -let AddedComplexity = 30 in { - def: Storea_pat; - def: Storea_pat; - def: Storea_pat; - def: Storea_pat; - - def: Stoream_pat; - def: Stoream_pat; - def: Stoream_pat; -} - -def: Storea_pat, I32, addrgp, S2_storerbgp>; -def: Storea_pat, I32, addrgp, S2_storerhgp>; -def: Storea_pat, I32, addrgp, S2_storerigp>; -def: Storea_pat, I64, addrgp, S2_storerdgp>; - -let AddedComplexity = 100 in { - def: Storea_pat; - def: Storea_pat; - def: Storea_pat; - def: Storea_pat; - - // Map from "i1 = constant<-1>; memw(CONST32(#foo)) = i1" - // to "r0 = 1; memw(#foo) = r0" - let AddedComplexity = 100 in - def: Pat<(store (i1 -1), (HexagonCONST32_GP tglobaladdr:$global)), - (S2_storerbgp tglobaladdr:$global, (A2_tfrsi 1))>; -} - //===----------------------------------------------------------------------===// // Template class for non predicated load instructions with // absolute addressing mode. @@ -3880,26 +2926,6 @@ defm loadri : LD_Abs<"memw", "LDriw", IntRegs, u16_2Imm, 0b100>; let accessSize = DoubleWordAccess in defm loadrd : LD_Abs<"memd", "LDrid", DoubleRegs, u16_3Imm, 0b110>; -class LoadAbs_pats - : Pat <(VT (ldOp (HexagonCONST32 tglobaladdr:$absaddr))), - (VT (MI tglobaladdr:$absaddr))>; - -let AddedComplexity = 30 in { - def: LoadAbs_pats ; - def: LoadAbs_pats ; - def: LoadAbs_pats ; - def: LoadAbs_pats ; - def: LoadAbs_pats ; - def: LoadAbs_pats ; - def: LoadAbs_pats ; - def: LoadAbs_pats ; - def: LoadAbs_pats ; -} - -let AddedComplexity = 30 in -def: Pat<(i64 (zextloadi1 (HexagonCONST32 tglobaladdr:$absaddr))), - (Zext64 (PS_loadrubabs tglobaladdr:$absaddr))>; - //===----------------------------------------------------------------------===// // multiclass for load instructions with GP-relative addressing mode. // Rx=mem[bhwd](##global) @@ -3930,148 +2956,10 @@ def L2_loadrigp : T_LoadGP<"memw", "LDriw", IntRegs, u16_2Imm, 0b100>; let accessSize = DoubleWordAccess in def L2_loadrdgp : T_LoadGP<"memd", "LDrid", DoubleRegs, u16_3Imm, 0b110>; -def: Loada_pat; -def: Loada_pat; -def: Loada_pat; -def: Loada_pat; - -// Map from Pd = load(globaladdress) -> Rd = memb(globaladdress), Pd = Rd -def: Loadam_pat; -def: Loadam_pat; - -def: Stoream_pat; -def: Stoream_pat; - -// Map from load(globaladdress) -> mem[u][bhwd](#foo) -class LoadGP_pats - : Pat <(VT (ldOp (HexagonCONST32_GP tglobaladdr:$global))), - (VT (MI tglobaladdr:$global))>; - -let AddedComplexity = 100 in { - def: LoadGP_pats ; - def: LoadGP_pats ; - def: LoadGP_pats ; - def: LoadGP_pats ; - def: LoadGP_pats ; - def: LoadGP_pats ; - def: LoadGP_pats ; - def: LoadGP_pats ; -} - -// When the Interprocedural Global Variable optimizer realizes that a certain -// global variable takes only two constant values, it shrinks the global to -// a boolean. Catch those loads here in the following 3 patterns. -let AddedComplexity = 100 in { - def: LoadGP_pats ; - def: LoadGP_pats ; -} - -// Transfer global address into a register -def: Pat<(HexagonCONST32 tglobaladdr:$Rs), (A2_tfrsi s16_0Ext:$Rs)>; -def: Pat<(HexagonCONST32_GP tblockaddress:$Rs), (A2_tfrsi s16_0Ext:$Rs)>; -def: Pat<(HexagonCONST32_GP tglobaladdr:$Rs), (A2_tfrsi s16_0Ext:$Rs)>; - -let AddedComplexity = 30 in { - def: Storea_pat; - def: Storea_pat; - def: Storea_pat; -} - -let AddedComplexity = 30 in { - def: Loada_pat; - def: Loada_pat; - def: Loada_pat; - def: Loada_pat; - def: Loada_pat; -} - -// Indexed store word - global address. -// memw(Rs+#u6:2)=#S8 -let AddedComplexity = 100 in -defm: Storex_add_pat; - -// Load from a global address that has only one use in the current basic block. -let AddedComplexity = 100 in { - def: Loada_pat; - def: Loada_pat; - def: Loada_pat; - - def: Loada_pat; - def: Loada_pat; - def: Loada_pat; - - def: Loada_pat; - def: Loada_pat; -} - -// Store to a global address that has only one use in the current basic block. -let AddedComplexity = 100 in { - def: Storea_pat; - def: Storea_pat; - def: Storea_pat; - def: Storea_pat; - - def: Stoream_pat; -} - -// i8/i16/i32 -> i64 loads -// We need a complexity of 120 here to override preceding handling of -// zextload. -let AddedComplexity = 120 in { - def: Loadam_pat; - def: Loadam_pat; - def: Loadam_pat; - - def: Loadam_pat; - def: Loadam_pat; - def: Loadam_pat; - - def: Loadam_pat; - def: Loadam_pat; - def: Loadam_pat; -} - -let AddedComplexity = 100 in { - def: Loada_pat; - def: Loada_pat; - def: Loada_pat; - - def: Loada_pat; - def: Loada_pat; - def: Loada_pat; - - def: Loada_pat; - def: Loada_pat; -} - -let AddedComplexity = 100 in { - def: Storea_pat; - def: Storea_pat; - def: Storea_pat; - def: Storea_pat; -} - -def: Loada_pat; -def: Loada_pat; -def: Loada_pat; -def: Loada_pat; - -def: Storea_pat, I32, addrgp, PS_storerbabs>; -def: Storea_pat, I32, addrgp, PS_storerhabs>; -def: Storea_pat, I32, addrgp, PS_storeriabs>; -def: Storea_pat, I64, addrgp, PS_storerdabs>; - let Constraints = "@earlyclobber $dst" in def Insert4 : PseudoM<(outs DoubleRegs:$dst), (ins IntRegs:$a, IntRegs:$b, IntRegs:$c, IntRegs:$d), - ".error \"Should never try to emit Insert4\"", - [(set (i64 DoubleRegs:$dst), - (or (or (or (shl (i64 (zext (i32 (and (i32 IntRegs:$b), (i32 65535))))), - (i32 16)), - (i64 (zext (i32 (and (i32 IntRegs:$a), (i32 65535)))))), - (shl (i64 (anyext (i32 (and (i32 IntRegs:$c), (i32 65535))))), - (i32 32))), - (shl (i64 (anyext (i32 IntRegs:$d))), (i32 48))))]>; + ".error \"Should never try to emit Insert4\"", []>; //===----------------------------------------------------------------------===// // :raw for of boundscheck:hi:lo insns @@ -4141,20 +3029,12 @@ def A4_tlbmatch : ALU64Inst<(outs PredRegs:$Pd), let Inst{1-0} = Pd; } -// We need custom lowering of ISD::PREFETCH into HexagonISD::DCFETCH -// because the SDNode ISD::PREFETCH has properties MayLoad and MayStore. -// We don't really want either one here. -def SDTHexagonDCFETCH : SDTypeProfile<0, 2, [SDTCisPtrTy<0>,SDTCisInt<1>]>; -def HexagonDCFETCH : SDNode<"HexagonISD::DCFETCH", SDTHexagonDCFETCH, - [SDNPHasChain]>; - // Use LD0Inst for dcfetch, but set "mayLoad" to 0 because this doesn't // really do a load. let hasSideEffects = 1, mayLoad = 0 in def Y2_dcfetchbo : LD0Inst<(outs), (ins IntRegs:$Rs, u11_3Imm:$u11_3), "dcfetch($Rs + #$u11_3)", - [(HexagonDCFETCH IntRegs:$Rs, u11_3ImmPred:$u11_3)], - "", LD_tc_ld_SLOT0> { + [], "", LD_tc_ld_SLOT0> { bits<5> Rs; bits<14> u11_3; @@ -4166,9 +3046,6 @@ def Y2_dcfetchbo : LD0Inst<(outs), (ins IntRegs:$Rs, u11_3Imm:$u11_3), } -def: Pat<(HexagonDCFETCH (i32 (add IntRegs:$Rs, u11_3ImmPred:$u11_3)), (i32 0)), - (Y2_dcfetchbo IntRegs:$Rs, u11_3ImmPred:$u11_3)>; - //===----------------------------------------------------------------------===// // Compound instructions //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfoV5.td b/llvm/lib/Target/Hexagon/HexagonInstrInfoV5.td index 5a6ef19..cd19b69 100644 --- a/llvm/lib/Target/Hexagon/HexagonInstrInfoV5.td +++ b/llvm/lib/Target/Hexagon/HexagonInstrInfoV5.td @@ -11,15 +11,6 @@ // //===----------------------------------------------------------------------===// -def f32ImmPred : PatLeaf<(f32 fpimm:$F)>; -def f64ImmPred : PatLeaf<(f64 fpimm:$F)>; - -def ftoi : SDNodeXFormgetValueAPF().bitcastToAPInt(); - return CurDAG->getTargetConstant(I.getZExtValue(), SDLoc(N), - MVT::getIntegerVT(I.getBitWidth())); -}]>; - //===----------------------------------------------------------------------===// // XTYPE/MPY //===----------------------------------------------------------------------===// @@ -52,10 +43,7 @@ let Predicates = [HasV5T] in { def A5_vaddhubs: T_S3op_1 <"vaddhub", IntRegs, 0b01, 0b001, 0, 1>; } -def S2_asr_i_p_rnd : S_2OpInstImm<"asr", 0b110, 0b111, u6_0Imm, - [(set I64:$dst, - (sra (i64 (add (i64 (sra I64:$src1, u6_0ImmPred:$src2)), 1)), - (i32 1)))], 1>, +def S2_asr_i_p_rnd : S_2OpInstImm<"asr", 0b110, 0b111, u6_0Imm, [], 1>, Requires<[HasV5T]> { bits<6> src2; let Inst{13-8} = src2; @@ -76,15 +64,9 @@ def C4_fastcorner9_not : T_LOGICAL_2OP<"!fastcorner9", 0b000, 0, 0>, let Inst{20,13,7,4} = 0b1111; } -def SDTHexagonI32I64: SDTypeProfile<1, 1, [SDTCisVT<0, i32>, - SDTCisVT<1, i64>]>; - -def HexagonPOPCOUNT: SDNode<"HexagonISD::POPCOUNT", SDTHexagonI32I64>; - let hasNewValue = 1, validSubTargets = HasV5SubT in def S5_popcountp : ALU64_rr<(outs IntRegs:$Rd), (ins DoubleRegs:$Rss), - "$Rd = popcount($Rss)", - [(set I32:$Rd, (HexagonPOPCOUNT I64:$Rss))], "", S_2op_tc_2_SLOT23>, + "$Rd = popcount($Rss)", [], "", S_2op_tc_2_SLOT23>, Requires<[HasV5T]> { bits<5> Rd; bits<5> Rss; @@ -97,76 +79,6 @@ def S5_popcountp : ALU64_rr<(outs IntRegs:$Rd), (ins DoubleRegs:$Rss), let Inst{20-16} = Rss; } -let AddedComplexity = 20 in { - defm: Loadx_pat; - defm: Loadx_pat; -} - -let AddedComplexity = 60 in { - defm : T_LoadAbsReg_Pat ; - defm : T_LoadAbsReg_Pat ; -} - -let AddedComplexity = 40 in { - def: Loadxs_pat; - def: Loadxs_pat; -} - -let AddedComplexity = 20 in { - def: Loadxs_simple_pat; - def: Loadxs_simple_pat; -} - -let AddedComplexity = 80 in { - def: Loada_pat; - def: Loada_pat; - def: Loada_pat; -} - -let AddedComplexity = 100 in { - def: LoadGP_pats ; - def: LoadGP_pats ; -} - -let AddedComplexity = 20 in { - defm: Storex_pat; - defm: Storex_pat; -} - -// Simple patterns should be tried with the least priority. -def: Storex_simple_pat; -def: Storex_simple_pat; - -let AddedComplexity = 60 in { - defm : T_StoreAbsReg_Pats ; - defm : T_StoreAbsReg_Pats ; -} - -let AddedComplexity = 40 in { - def: Storexs_pat; - def: Storexs_pat; -} - -let AddedComplexity = 20 in { - def: Store_rr_pat; - def: Store_rr_pat; -} - -let AddedComplexity = 80 in { - def: Storea_pat; - def: Storea_pat; -} - -let AddedComplexity = 100 in { - def: Storea_pat; - def: Storea_pat; -} - -defm: Storex_pat; -defm: Storex_pat; -def: Storex_simple_pat; -def: Storex_simple_pat; - let isFP = 1, hasNewValue = 1, opNewValue = 0 in class T_MInstFloat MajOp, bits<3> MinOp> : MInst<(outs IntRegs:$Rd), @@ -196,43 +108,11 @@ let isCommutable = 1 in { def F2_sfsub : T_MInstFloat < "sfsub", 0b000, 0b001>; -def: Pat<(fadd F32:$src1, F32:$src2), - (F2_sfadd F32:$src1, F32:$src2)>; - -def: Pat<(fsub F32:$src1, F32:$src2), - (F2_sfsub F32:$src1, F32:$src2)>; - -def: Pat<(fmul F32:$src1, F32:$src2), - (F2_sfmpy F32:$src1, F32:$src2)>; - let Itinerary = M_tc_3x_SLOT23 in { def F2_sfmax : T_MInstFloat < "sfmax", 0b100, 0b000>; def F2_sfmin : T_MInstFloat < "sfmin", 0b100, 0b001>; } -let Predicates = [HasV5T] in { - def: Pat<(f32 (fminnum F32:$Rs, F32:$Rt)), (F2_sfmin F32:$Rs, F32:$Rt)>; - def: Pat<(f32 (fmaxnum F32:$Rs, F32:$Rt)), (F2_sfmax F32:$Rs, F32:$Rt)>; -} - -let AddedComplexity = 100, Predicates = [HasV5T] in { - class SfSel12 - : Pat<(select (i1 (Cmp F32:$Rs, F32:$Rt)), F32:$Rs, F32:$Rt), - (MI F32:$Rs, F32:$Rt)>; - class SfSel21 - : Pat<(select (i1 (Cmp F32:$Rs, F32:$Rt)), F32:$Rt, F32:$Rs), - (MI F32:$Rs, F32:$Rt)>; - - def: SfSel12; - def: SfSel12; - def: SfSel12; - def: SfSel12; - def: SfSel21; - def: SfSel21; - def: SfSel21; - def: SfSel21; -} - let Itinerary = M_tc_3or4x_SLOT23 in { def F2_sffixupn : T_MInstFloat < "sffixupn", 0b110, 0b000>; def F2_sffixupd : T_MInstFloat < "sffixupd", 0b110, 0b001>; @@ -283,15 +163,13 @@ class T_fcmp MinOp, } class T_fcmp64 MinOp> - : T_fcmp { + : T_fcmp { let IClass = 0b1101; let Inst{27-21} = 0b0010111; } class T_fcmp32 MinOp> - : T_fcmp { + : T_fcmp { let IClass = 0b1100; let Inst{27-21} = 0b0111111; } @@ -306,260 +184,12 @@ def F2_sfcmpuo : T_fcmp32<"sfcmp.uo", setuo, 0b001>; def F2_sfcmpeq : T_fcmp32<"sfcmp.eq", setoeq, 0b011>; def F2_sfcmpgt : T_fcmp32<"sfcmp.gt", setogt, 0b100>; -//===----------------------------------------------------------------------===// -// Multiclass to define 'Def Pats' for ordered gt, ge, eq operations. -//===----------------------------------------------------------------------===// - -let Predicates = [HasV5T] in -multiclass T_fcmp_pats { - // IntRegs - def: Pat<(i1 (cmpOp F32:$src1, F32:$src2)), - (IntMI F32:$src1, F32:$src2)>; - // DoubleRegs - def: Pat<(i1 (cmpOp F64:$src1, F64:$src2)), - (DoubleMI F64:$src1, F64:$src2)>; -} - -defm : T_fcmp_pats ; -defm : T_fcmp_pats ; -defm : T_fcmp_pats ; - -//===----------------------------------------------------------------------===// -// Multiclass to define 'Def Pats' for unordered gt, ge, eq operations. -//===----------------------------------------------------------------------===// -let Predicates = [HasV5T] in -multiclass unord_Pats { - // IntRegs - def: Pat<(i1 (cmpOp F32:$src1, F32:$src2)), - (C2_or (F2_sfcmpuo F32:$src1, F32:$src2), - (IntMI F32:$src1, F32:$src2))>; - - // DoubleRegs - def: Pat<(i1 (cmpOp F64:$src1, F64:$src2)), - (C2_or (F2_dfcmpuo F64:$src1, F64:$src2), - (DoubleMI F64:$src1, F64:$src2))>; -} - -defm : unord_Pats ; -defm : unord_Pats ; -defm : unord_Pats ; - -//===----------------------------------------------------------------------===// -// Multiclass to define 'Def Pats' for the following dags: -// seteq(setoeq(op1, op2), 0) -> not(setoeq(op1, op2)) -// seteq(setoeq(op1, op2), 1) -> setoeq(op1, op2) -// setne(setoeq(op1, op2), 0) -> setoeq(op1, op2) -// setne(setoeq(op1, op2), 1) -> not(setoeq(op1, op2)) -//===----------------------------------------------------------------------===// -let Predicates = [HasV5T] in -multiclass eq_ordgePats { - // IntRegs - def: Pat<(i1 (seteq (i1 (cmpOp F32:$src1, F32:$src2)), 0)), - (C2_not (IntMI F32:$src1, F32:$src2))>; - def: Pat<(i1 (seteq (i1 (cmpOp F32:$src1, F32:$src2)), 1)), - (IntMI F32:$src1, F32:$src2)>; - def: Pat<(i1 (setne (i1 (cmpOp F32:$src1, F32:$src2)), 0)), - (IntMI F32:$src1, F32:$src2)>; - def: Pat<(i1 (setne (i1 (cmpOp F32:$src1, F32:$src2)), 1)), - (C2_not (IntMI F32:$src1, F32:$src2))>; - - // DoubleRegs - def : Pat<(i1 (seteq (i1 (cmpOp F64:$src1, F64:$src2)), 0)), - (C2_not (DoubleMI F64:$src1, F64:$src2))>; - def : Pat<(i1 (seteq (i1 (cmpOp F64:$src1, F64:$src2)), 1)), - (DoubleMI F64:$src1, F64:$src2)>; - def : Pat<(i1 (setne (i1 (cmpOp F64:$src1, F64:$src2)), 0)), - (DoubleMI F64:$src1, F64:$src2)>; - def : Pat<(i1 (setne (i1 (cmpOp F64:$src1, F64:$src2)), 1)), - (C2_not (DoubleMI F64:$src1, F64:$src2))>; -} - -defm : eq_ordgePats; -defm : eq_ordgePats; -defm : eq_ordgePats; - -//===----------------------------------------------------------------------===// -// Multiclass to define 'Def Pats' for the following dags: -// seteq(setolt(op1, op2), 0) -> not(setogt(op2, op1)) -// seteq(setolt(op1, op2), 1) -> setogt(op2, op1) -// setne(setolt(op1, op2), 0) -> setogt(op2, op1) -// setne(setolt(op1, op2), 1) -> not(setogt(op2, op1)) -//===----------------------------------------------------------------------===// -let Predicates = [HasV5T] in -multiclass eq_ordltPats { - // IntRegs - def: Pat<(i1 (seteq (i1 (cmpOp F32:$src1, F32:$src2)), 0)), - (C2_not (IntMI F32:$src2, F32:$src1))>; - def: Pat<(i1 (seteq (i1 (cmpOp F32:$src1, F32:$src2)), 1)), - (IntMI F32:$src2, F32:$src1)>; - def: Pat<(i1 (setne (i1 (cmpOp F32:$src1, F32:$src2)), 0)), - (IntMI F32:$src2, F32:$src1)>; - def: Pat<(i1 (setne (i1 (cmpOp F32:$src1, F32:$src2)), 1)), - (C2_not (IntMI F32:$src2, F32:$src1))>; - - // DoubleRegs - def: Pat<(i1 (seteq (i1 (cmpOp F64:$src1, F64:$src2)), 0)), - (C2_not (DoubleMI F64:$src2, F64:$src1))>; - def: Pat<(i1 (seteq (i1 (cmpOp F64:$src1, F64:$src2)), 1)), - (DoubleMI F64:$src2, F64:$src1)>; - def: Pat<(i1 (setne (i1 (cmpOp F64:$src1, F64:$src2)), 0)), - (DoubleMI F64:$src2, F64:$src1)>; - def: Pat<(i1 (setne (i1 (cmpOp F64:$src1, F64:$src2)), 0)), - (C2_not (DoubleMI F64:$src2, F64:$src1))>; -} - -defm : eq_ordltPats; -defm : eq_ordltPats; - - -// o. seto inverse of setuo. http://llvm.org/docs/LangRef.html#i_fcmp -let Predicates = [HasV5T] in { - def: Pat<(i1 (seto F32:$src1, F32:$src2)), - (C2_not (F2_sfcmpuo F32:$src2, F32:$src1))>; - def: Pat<(i1 (seto F32:$src1, f32ImmPred:$src2)), - (C2_not (F2_sfcmpuo (f32 (A2_tfrsi (ftoi $src2))), F32:$src1))>; - def: Pat<(i1 (seto F64:$src1, F64:$src2)), - (C2_not (F2_dfcmpuo F64:$src2, F64:$src1))>; - def: Pat<(i1 (seto F64:$src1, f64ImmPred:$src2)), - (C2_not (F2_dfcmpuo (CONST64 (ftoi $src2)), F64:$src1))>; -} - -// Ordered lt. -let Predicates = [HasV5T] in { - def: Pat<(i1 (setolt F32:$src1, F32:$src2)), - (F2_sfcmpgt F32:$src2, F32:$src1)>; - def: Pat<(i1 (setolt F32:$src1, f32ImmPred:$src2)), - (F2_sfcmpgt (f32 (A2_tfrsi (ftoi $src2))), F32:$src1)>; - def: Pat<(i1 (setolt F64:$src1, F64:$src2)), - (F2_dfcmpgt F64:$src2, F64:$src1)>; - def: Pat<(i1 (setolt F64:$src1, f64ImmPred:$src2)), - (F2_dfcmpgt (CONST64 (ftoi $src2)), F64:$src1)>; -} - -// Unordered lt. -let Predicates = [HasV5T] in { - def: Pat<(i1 (setult F32:$src1, F32:$src2)), - (C2_or (F2_sfcmpuo F32:$src1, F32:$src2), - (F2_sfcmpgt F32:$src2, F32:$src1))>; - def: Pat<(i1 (setult F32:$src1, f32ImmPred:$src2)), - (C2_or (F2_sfcmpuo F32:$src1, (f32 (A2_tfrsi (ftoi $src2)))), - (F2_sfcmpgt (f32 (A2_tfrsi (ftoi $src2))), F32:$src1))>; - def: Pat<(i1 (setult F64:$src1, F64:$src2)), - (C2_or (F2_dfcmpuo F64:$src1, F64:$src2), - (F2_dfcmpgt F64:$src2, F64:$src1))>; - def: Pat<(i1 (setult F64:$src1, f64ImmPred:$src2)), - (C2_or (F2_dfcmpuo F64:$src1, (CONST64 (ftoi $src2))), - (F2_dfcmpgt (CONST64 (ftoi $src2)), F64:$src1))>; -} - -// Ordered le. -let Predicates = [HasV5T] in { - // rs <= rt -> rt >= rs. - def: Pat<(i1 (setole F32:$src1, F32:$src2)), - (F2_sfcmpge F32:$src2, F32:$src1)>; - def: Pat<(i1 (setole F32:$src1, f32ImmPred:$src2)), - (F2_sfcmpge (f32 (A2_tfrsi (ftoi $src2))), F32:$src1)>; - - // Rss <= Rtt -> Rtt >= Rss. - def: Pat<(i1 (setole F64:$src1, F64:$src2)), - (F2_dfcmpge F64:$src2, F64:$src1)>; - def: Pat<(i1 (setole F64:$src1, f64ImmPred:$src2)), - (F2_dfcmpge (CONST64 (ftoi $src2)), F64:$src1)>; -} - -// Unordered le. -let Predicates = [HasV5T] in { -// rs <= rt -> rt >= rs. - def: Pat<(i1 (setule F32:$src1, F32:$src2)), - (C2_or (F2_sfcmpuo F32:$src1, F32:$src2), - (F2_sfcmpge F32:$src2, F32:$src1))>; - def: Pat<(i1 (setule F32:$src1, f32ImmPred:$src2)), - (C2_or (F2_sfcmpuo F32:$src1, (f32 (A2_tfrsi (ftoi $src2)))), - (F2_sfcmpge (f32 (A2_tfrsi (ftoi $src2))), F32:$src1))>; - def: Pat<(i1 (setule F64:$src1, F64:$src2)), - (C2_or (F2_dfcmpuo F64:$src1, F64:$src2), - (F2_dfcmpge F64:$src2, F64:$src1))>; - def: Pat<(i1 (setule F64:$src1, f64ImmPred:$src2)), - (C2_or (F2_dfcmpuo F64:$src1, (CONST64 (ftoi $src2))), - (F2_dfcmpge (CONST64 (ftoi $src2)), F64:$src1))>; -} - -// Ordered ne. -let Predicates = [HasV5T] in { - def: Pat<(i1 (setone F32:$src1, F32:$src2)), - (C2_not (F2_sfcmpeq F32:$src1, F32:$src2))>; - def: Pat<(i1 (setone F64:$src1, F64:$src2)), - (C2_not (F2_dfcmpeq F64:$src1, F64:$src2))>; - def: Pat<(i1 (setone F32:$src1, f32ImmPred:$src2)), - (C2_not (F2_sfcmpeq F32:$src1, (f32 (A2_tfrsi (ftoi $src2)))))>; - def: Pat<(i1 (setone F64:$src1, f64ImmPred:$src2)), - (C2_not (F2_dfcmpeq F64:$src1, (CONST64 (ftoi $src2))))>; -} - -// Unordered ne. -let Predicates = [HasV5T] in { - def: Pat<(i1 (setune F32:$src1, F32:$src2)), - (C2_or (F2_sfcmpuo F32:$src1, F32:$src2), - (C2_not (F2_sfcmpeq F32:$src1, F32:$src2)))>; - def: Pat<(i1 (setune F64:$src1, F64:$src2)), - (C2_or (F2_dfcmpuo F64:$src1, F64:$src2), - (C2_not (F2_dfcmpeq F64:$src1, F64:$src2)))>; - def: Pat<(i1 (setune F32:$src1, f32ImmPred:$src2)), - (C2_or (F2_sfcmpuo F32:$src1, (f32 (A2_tfrsi (ftoi $src2)))), - (C2_not (F2_sfcmpeq F32:$src1, - (f32 (A2_tfrsi (ftoi $src2))))))>; - def: Pat<(i1 (setune F64:$src1, f64ImmPred:$src2)), - (C2_or (F2_dfcmpuo F64:$src1, (CONST64 (ftoi $src2))), - (C2_not (F2_dfcmpeq F64:$src1, - (CONST64 (ftoi $src2)))))>; -} - -// Besides set[o|u][comparions], we also need set[comparisons]. -let Predicates = [HasV5T] in { - // lt. - def: Pat<(i1 (setlt F32:$src1, F32:$src2)), - (F2_sfcmpgt F32:$src2, F32:$src1)>; - def: Pat<(i1 (setlt F32:$src1, f32ImmPred:$src2)), - (F2_sfcmpgt (f32 (A2_tfrsi (ftoi $src2))), F32:$src1)>; - def: Pat<(i1 (setlt F64:$src1, F64:$src2)), - (F2_dfcmpgt F64:$src2, F64:$src1)>; - def: Pat<(i1 (setlt F64:$src1, f64ImmPred:$src2)), - (F2_dfcmpgt (CONST64 (ftoi $src2)), F64:$src1)>; - - // le. - // rs <= rt -> rt >= rs. - def: Pat<(i1 (setle F32:$src1, F32:$src2)), - (F2_sfcmpge F32:$src2, F32:$src1)>; - def: Pat<(i1 (setle F32:$src1, f32ImmPred:$src2)), - (F2_sfcmpge (f32 (A2_tfrsi (ftoi $src2))), F32:$src1)>; - - // Rss <= Rtt -> Rtt >= Rss. - def: Pat<(i1 (setle F64:$src1, F64:$src2)), - (F2_dfcmpge F64:$src2, F64:$src1)>; - def: Pat<(i1 (setle F64:$src1, f64ImmPred:$src2)), - (F2_dfcmpge (CONST64 (ftoi $src2)), F64:$src1)>; - - // ne. - def: Pat<(i1 (setne F32:$src1, F32:$src2)), - (C2_not (F2_sfcmpeq F32:$src1, F32:$src2))>; - def: Pat<(i1 (setne F64:$src1, F64:$src2)), - (C2_not (F2_dfcmpeq F64:$src1, F64:$src2))>; - def: Pat<(i1 (setne F32:$src1, f32ImmPred:$src2)), - (C2_not (F2_sfcmpeq F32:$src1, (f32 (A2_tfrsi (ftoi $src2)))))>; - def: Pat<(i1 (setne F64:$src1, f64ImmPred:$src2)), - (C2_not (F2_dfcmpeq F64:$src1, (CONST64 (ftoi $src2))))>; -} - // F2 convert template classes: let Uses = [USR], isFP = 1 in class F2_RDD_RSS_CONVERT MinOp, - SDNode Op, PatLeaf RCOut, PatLeaf RCIn, string chop =""> : SInst <(outs DoubleRegs:$Rdd), (ins DoubleRegs:$Rss), - "$Rdd = "#mnemonic#"($Rss)"#chop, - [(set RCOut:$Rdd, (Op RCIn:$Rss))], "", + "$Rdd = "#mnemonic#"($Rss)"#chop, [], "", S_2op_tc_3or4x_SLOT23> { bits<5> Rdd; bits<5> Rss; @@ -574,11 +204,9 @@ class F2_RDD_RSS_CONVERT MinOp, let Uses = [USR], isFP = 1 in class F2_RDD_RS_CONVERT MinOp, - SDNode Op, PatLeaf RCOut, PatLeaf RCIn, string chop =""> : SInst <(outs DoubleRegs:$Rdd), (ins IntRegs:$Rs), - "$Rdd = "#mnemonic#"($Rs)"#chop, - [(set RCOut:$Rdd, (Op RCIn:$Rs))], "", + "$Rdd = "#mnemonic#"($Rs)"#chop, [], "", S_2op_tc_3or4x_SLOT23> { bits<5> Rdd; bits<5> Rs; @@ -593,11 +221,9 @@ class F2_RDD_RS_CONVERT MinOp, let Uses = [USR], isFP = 1, hasNewValue = 1 in class F2_RD_RSS_CONVERT MinOp, - SDNode Op, PatLeaf RCOut, PatLeaf RCIn, string chop =""> : SInst <(outs IntRegs:$Rd), (ins DoubleRegs:$Rss), - "$Rd = "#mnemonic#"($Rss)"#chop, - [(set RCOut:$Rd, (Op RCIn:$Rss))], "", + "$Rd = "#mnemonic#"($Rss)"#chop, [], "", S_2op_tc_3or4x_SLOT23> { bits<5> Rd; bits<5> Rss; @@ -613,11 +239,9 @@ class F2_RD_RSS_CONVERT MinOp, let Uses = [USR], isFP = 1, hasNewValue = 1 in class F2_RD_RS_CONVERT MajOp, bits<3> MinOp, - SDNode Op, PatLeaf RCOut, PatLeaf RCIn, string chop =""> : SInst <(outs IntRegs:$Rd), (ins IntRegs:$Rs), - "$Rd = "#mnemonic#"($Rs)"#chop, - [(set RCOut:$Rd, (Op RCIn:$Rs))], "", + "$Rd = "#mnemonic#"($Rs)"#chop, [], "", S_2op_tc_3or4x_SLOT23> { bits<5> Rd; bits<5> Rs; @@ -632,66 +256,41 @@ class F2_RD_RS_CONVERT MajOp, bits<3> MinOp, } // Convert single precision to double precision and vice-versa. -def F2_conv_sf2df : F2_RDD_RS_CONVERT <"convert_sf2df", 0b000, - fpextend, F64, F32>; - -def F2_conv_df2sf : F2_RD_RSS_CONVERT <"convert_df2sf", 0b000, - fpround, F32, F64>; +def F2_conv_sf2df : F2_RDD_RS_CONVERT <"convert_sf2df", 0b000>; +def F2_conv_df2sf : F2_RD_RSS_CONVERT <"convert_df2sf", 0b000>; // Convert Integer to Floating Point. -def F2_conv_d2sf : F2_RD_RSS_CONVERT <"convert_d2sf", 0b010, - sint_to_fp, F32, I64>; -def F2_conv_ud2sf : F2_RD_RSS_CONVERT <"convert_ud2sf", 0b001, - uint_to_fp, F32, I64>; -def F2_conv_uw2sf : F2_RD_RS_CONVERT <"convert_uw2sf", 0b001, 0b000, - uint_to_fp, F32, I32>; -def F2_conv_w2sf : F2_RD_RS_CONVERT <"convert_w2sf", 0b010, 0b000, - sint_to_fp, F32, I32>; -def F2_conv_d2df : F2_RDD_RSS_CONVERT <"convert_d2df", 0b011, - sint_to_fp, F64, I64>; -def F2_conv_ud2df : F2_RDD_RSS_CONVERT <"convert_ud2df", 0b010, - uint_to_fp, F64, I64>; -def F2_conv_uw2df : F2_RDD_RS_CONVERT <"convert_uw2df", 0b001, - uint_to_fp, F64, I32>; -def F2_conv_w2df : F2_RDD_RS_CONVERT <"convert_w2df", 0b010, - sint_to_fp, F64, I32>; - -// Convert Floating Point to Integer - default. -def F2_conv_df2uw_chop : F2_RD_RSS_CONVERT <"convert_df2uw", 0b101, - fp_to_uint, I32, F64, ":chop">; -def F2_conv_df2w_chop : F2_RD_RSS_CONVERT <"convert_df2w", 0b111, - fp_to_sint, I32, F64, ":chop">; +def F2_conv_d2sf : F2_RD_RSS_CONVERT <"convert_d2sf", 0b010>; +def F2_conv_ud2sf : F2_RD_RSS_CONVERT <"convert_ud2sf", 0b001>; +def F2_conv_uw2sf : F2_RD_RS_CONVERT <"convert_uw2sf", 0b001, 0b000>; +def F2_conv_w2sf : F2_RD_RS_CONVERT <"convert_w2sf", 0b010, 0b000>; +def F2_conv_d2df : F2_RDD_RSS_CONVERT <"convert_d2df", 0b011>; +def F2_conv_ud2df : F2_RDD_RSS_CONVERT <"convert_ud2df", 0b010>; +def F2_conv_uw2df : F2_RDD_RS_CONVERT <"convert_uw2df", 0b001>; +def F2_conv_w2df : F2_RDD_RS_CONVERT <"convert_w2df", 0b010>; + +// Convert Floating Point to Integer. +def F2_conv_df2uw_chop : F2_RD_RSS_CONVERT <"convert_df2uw", 0b101, ":chop">; +def F2_conv_df2w_chop : F2_RD_RSS_CONVERT <"convert_df2w", 0b111, ":chop">; def F2_conv_sf2uw_chop : F2_RD_RS_CONVERT <"convert_sf2uw", 0b011, 0b001, - fp_to_uint, I32, F32, ":chop">; + ":chop">; def F2_conv_sf2w_chop : F2_RD_RS_CONVERT <"convert_sf2w", 0b100, 0b001, - fp_to_sint, I32, F32, ":chop">; -def F2_conv_df2d_chop : F2_RDD_RSS_CONVERT <"convert_df2d", 0b110, - fp_to_sint, I64, F64, ":chop">; -def F2_conv_df2ud_chop : F2_RDD_RSS_CONVERT <"convert_df2ud", 0b111, - fp_to_uint, I64, F64, ":chop">; -def F2_conv_sf2d_chop : F2_RDD_RS_CONVERT <"convert_sf2d", 0b110, - fp_to_sint, I64, F32, ":chop">; -def F2_conv_sf2ud_chop : F2_RDD_RS_CONVERT <"convert_sf2ud", 0b101, - fp_to_uint, I64, F32, ":chop">; + ":chop">; +def F2_conv_df2d_chop : F2_RDD_RSS_CONVERT <"convert_df2d", 0b110, ":chop">; +def F2_conv_df2ud_chop : F2_RDD_RSS_CONVERT <"convert_df2ud", 0b111, ":chop">; +def F2_conv_sf2d_chop : F2_RDD_RS_CONVERT <"convert_sf2d", 0b110, ":chop">; +def F2_conv_sf2ud_chop : F2_RDD_RS_CONVERT <"convert_sf2ud", 0b101, ":chop">; // Convert Floating Point to Integer: non-chopped. -let AddedComplexity = 20, Predicates = [HasV5T, IEEERndNearV5T] in { - def F2_conv_df2d : F2_RDD_RSS_CONVERT <"convert_df2d", 0b000, - fp_to_sint, I64, F64>; - def F2_conv_df2ud : F2_RDD_RSS_CONVERT <"convert_df2ud", 0b001, - fp_to_uint, I64, F64>; - def F2_conv_sf2ud : F2_RDD_RS_CONVERT <"convert_sf2ud", 0b011, - fp_to_uint, I64, F32>; - def F2_conv_sf2d : F2_RDD_RS_CONVERT <"convert_sf2d", 0b100, - fp_to_sint, I64, F32>; - def F2_conv_df2uw : F2_RD_RSS_CONVERT <"convert_df2uw", 0b011, - fp_to_uint, I32, F64>; - def F2_conv_df2w : F2_RD_RSS_CONVERT <"convert_df2w", 0b100, - fp_to_sint, I32, F64>; - def F2_conv_sf2uw : F2_RD_RS_CONVERT <"convert_sf2uw", 0b011, 0b000, - fp_to_uint, I32, F32>; - def F2_conv_sf2w : F2_RD_RS_CONVERT <"convert_sf2w", 0b100, 0b000, - fp_to_sint, I32, F32>; +let AddedComplexity = 20, Predicates = [HasV5T] in { + def F2_conv_df2d : F2_RDD_RSS_CONVERT <"convert_df2d", 0b000>; + def F2_conv_df2ud : F2_RDD_RSS_CONVERT <"convert_df2ud", 0b001>; + def F2_conv_sf2ud : F2_RDD_RS_CONVERT <"convert_sf2ud", 0b011>; + def F2_conv_sf2d : F2_RDD_RS_CONVERT <"convert_sf2d", 0b100>; + def F2_conv_df2uw : F2_RD_RSS_CONVERT <"convert_df2uw", 0b011>; + def F2_conv_df2w : F2_RD_RSS_CONVERT <"convert_df2w", 0b100>; + def F2_conv_sf2uw : F2_RD_RS_CONVERT <"convert_sf2uw", 0b011, 0b000>; + def F2_conv_sf2w : F2_RD_RS_CONVERT <"convert_sf2w", 0b100, 0b000>; } // Fix up radicand. @@ -710,14 +309,6 @@ def F2_sffixupr: SInst<(outs IntRegs:$Rd), (ins IntRegs:$Rs), let Inst{4-0} = Rd; } -// Bitcast is different than [fp|sint|uint]_to_[sint|uint|fp]. -let Predicates = [HasV5T] in { - def: Pat <(i32 (bitconvert F32:$src)), (I32:$src)>; - def: Pat <(f32 (bitconvert I32:$src)), (F32:$src)>; - def: Pat <(i64 (bitconvert F64:$src)), (I64:$src)>; - def: Pat <(f64 (bitconvert I64:$src)), (F64:$src)>; -} - // F2_sffma: Floating-point fused multiply add. let Uses = [USR], isFP = 1, hasNewValue = 1 in class T_sfmpy_acc @@ -747,15 +338,6 @@ def F2_sffms: T_sfmpy_acc <1, 0>; def F2_sffma_lib: T_sfmpy_acc <0, 1>; def F2_sffms_lib: T_sfmpy_acc <1, 1>; -def : Pat <(fma F32:$src2, F32:$src3, F32:$src1), - (F2_sffma F32:$src1, F32:$src2, F32:$src3)>; - -def : Pat <(fma (fneg F32:$src2), F32:$src3, F32:$src1), - (F2_sffms F32:$src1, F32:$src2, F32:$src3)>; - -def : Pat <(fma F32:$src2, (fneg F32:$src3), F32:$src1), - (F2_sffms F32:$src1, F32:$src2, F32:$src3)>; - // Floating-point fused multiply add w/ additional scaling (2**pu). let Uses = [USR], isFP = 1, hasNewValue = 1 in def F2_sffma_sc: MInst < @@ -780,46 +362,6 @@ def F2_sffma_sc: MInst < let Inst{4-0} = Rx; } -def: Pat<(select I1:$Pu, F32:$Rs, f32ImmPred:$imm), - (C2_muxir I1:$Pu, F32:$Rs, (ftoi $imm))>, - Requires<[HasV5T]>; - -def: Pat<(select I1:$Pu, f32ImmPred:$imm, F32:$Rt), - (C2_muxri I1:$Pu, (ftoi $imm), F32:$Rt)>, - Requires<[HasV5T]>; - -def: Pat<(select I1:$src1, F32:$src2, F32:$src3), - (C2_mux I1:$src1, F32:$src2, F32:$src3)>, - Requires<[HasV5T]>; - -def: Pat<(select (i1 (setult F32:$src1, F32:$src2)), F32:$src3, F32:$src4), - (C2_mux (F2_sfcmpgt F32:$src2, F32:$src1), F32:$src4, F32:$src3)>, - Requires<[HasV5T]>; - -def: Pat<(select I1:$src1, F64:$src2, F64:$src3), - (C2_vmux I1:$src1, F64:$src2, F64:$src3)>, - Requires<[HasV5T]>; - -def: Pat<(select (i1 (setult F64:$src1, F64:$src2)), F64:$src3, F64:$src4), - (C2_vmux (F2_dfcmpgt F64:$src2, F64:$src1), F64:$src3, F64:$src4)>, - Requires<[HasV5T]>; - -// Map from p0 = pnot(p0); r0 = select(p0, #i, r1) -// => r0 = mux(p0, #i, r1) -def: Pat<(select (not I1:$src1), f32ImmPred:$src2, F32:$src3), - (C2_muxir I1:$src1, F32:$src3, (ftoi $src2))>, - Requires<[HasV5T]>; - -// Map from p0 = pnot(p0); r0 = mux(p0, r1, #i) -// => r0 = mux(p0, r1, #i) -def: Pat<(select (not I1:$src1), F32:$src2, f32ImmPred:$src3), - (C2_muxri I1:$src1, (ftoi $src3), F32:$src2)>, - Requires<[HasV5T]>; - -def: Pat<(i32 (fp_to_sint F64:$src1)), - (LoReg (F2_conv_df2d_chop F64:$src1))>, - Requires<[HasV5T]>; - //===----------------------------------------------------------------------===// // :natural forms of vasrh and vasrhub insns //===----------------------------------------------------------------------===// @@ -910,7 +452,7 @@ let Defs = [USR_OVF], Itinerary = S_3op_tc_3x_SLOT23 in { // Classify floating-point value let Uses = [USR], isFP = 1 in -def F2_sfclass : T_TEST_BIT_IMM<"sfclass", 0b111>; +def F2_sfclass : T_TEST_BIT_IMM<"sfclass", 0b111>, Requires<[HasV5T]>; let Uses = [USR], isFP = 1 in def F2_dfclass: ALU64Inst<(outs PredRegs:$Pd), (ins DoubleRegs:$Rss, u5_0Imm:$u5), @@ -953,11 +495,3 @@ let hasNewValue = 1, opNewValue = 0 in { def F2_dfimm_p : T_fimm <"dfmake", DoubleRegs, 0b1001, 0>; def F2_dfimm_n : T_fimm <"dfmake", DoubleRegs, 0b1001, 1>; - -def : Pat <(fabs (f32 IntRegs:$src1)), - (S2_clrbit_i (f32 IntRegs:$src1), 31)>, - Requires<[HasV5T]>; - -def : Pat <(fneg (f32 IntRegs:$src1)), - (S2_togglebit_i (f32 IntRegs:$src1), 31)>, - Requires<[HasV5T]>; diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfoV60.td b/llvm/lib/Target/Hexagon/HexagonInstrInfoV60.td index 9064804..c50141b 100644 --- a/llvm/lib/Target/Hexagon/HexagonInstrInfoV60.td +++ b/llvm/lib/Target/Hexagon/HexagonInstrInfoV60.td @@ -10,23 +10,6 @@ // This file describes the Hexagon V60 instructions in TableGen format. // //===----------------------------------------------------------------------===// -def alignedload : PatFrag<(ops node:$addr), (load $addr), [{ - return isAlignedMemNode(dyn_cast(N)); -}]>; - -def unalignedload : PatFrag<(ops node:$addr), (load $addr), [{ - return !isAlignedMemNode(dyn_cast(N)); -}]>; - -def alignedstore : PatFrag<(ops node:$val, node:$addr), (store $val, $addr), [{ - return isAlignedMemNode(dyn_cast(N)); -}]>; - -def unalignedstore : PatFrag<(ops node:$val, node:$addr), (store $val, $addr), [{ - return !isAlignedMemNode(dyn_cast(N)); -}]>; - - // Vector load let Predicates = [HasV60T, UseHVX] in let mayLoad = 1, validSubTargets = HasV60SubT, hasSideEffects = 0 in @@ -767,96 +750,6 @@ def V6_vS32b_nt_new_npred_ppu : T_vstore_new_pred_ppu<1, 1>, V6_vS32b_nt_new_npred_ppu_enc; } -multiclass vS32b_ai_pats { - // Aligned stores - def : Pat<(alignedstore (VTSgl VectorRegs:$src1), IntRegs:$addr), - (V6_vS32b_ai IntRegs:$addr, 0, (VTSgl VectorRegs:$src1))>, - Requires<[UseHVXSgl]>; - def : Pat<(unalignedstore (VTSgl VectorRegs:$src1), IntRegs:$addr), - (V6_vS32Ub_ai IntRegs:$addr, 0, (VTSgl VectorRegs:$src1))>, - Requires<[UseHVXSgl]>; - - // 128B Aligned stores - def : Pat<(alignedstore (VTDbl VectorRegs128B:$src1), IntRegs:$addr), - (V6_vS32b_ai_128B IntRegs:$addr, 0, (VTDbl VectorRegs128B:$src1))>, - Requires<[UseHVXDbl]>; - def : Pat<(unalignedstore (VTDbl VectorRegs128B:$src1), IntRegs:$addr), - (V6_vS32Ub_ai_128B IntRegs:$addr, 0, (VTDbl VectorRegs128B:$src1))>, - Requires<[UseHVXDbl]>; - - // Fold Add R+OFF into vector store. - let AddedComplexity = 10 in { - def : Pat<(alignedstore (VTSgl VectorRegs:$src1), - (add IntRegs:$src2, s4_6ImmPred:$offset)), - (V6_vS32b_ai IntRegs:$src2, s4_6ImmPred:$offset, - (VTSgl VectorRegs:$src1))>, - Requires<[UseHVXSgl]>; - def : Pat<(unalignedstore (VTSgl VectorRegs:$src1), - (add IntRegs:$src2, s4_6ImmPred:$offset)), - (V6_vS32Ub_ai IntRegs:$src2, s4_6ImmPred:$offset, - (VTSgl VectorRegs:$src1))>, - Requires<[UseHVXSgl]>; - - // Fold Add R+OFF into vector store 128B. - def : Pat<(alignedstore (VTDbl VectorRegs128B:$src1), - (add IntRegs:$src2, s4_7ImmPred:$offset)), - (V6_vS32b_ai_128B IntRegs:$src2, s4_7ImmPred:$offset, - (VTDbl VectorRegs128B:$src1))>, - Requires<[UseHVXDbl]>; - def : Pat<(unalignedstore (VTDbl VectorRegs128B:$src1), - (add IntRegs:$src2, s4_7ImmPred:$offset)), - (V6_vS32Ub_ai_128B IntRegs:$src2, s4_7ImmPred:$offset, - (VTDbl VectorRegs128B:$src1))>, - Requires<[UseHVXDbl]>; - } -} - -defm : vS32b_ai_pats ; -defm : vS32b_ai_pats ; -defm : vS32b_ai_pats ; -defm : vS32b_ai_pats ; - - -multiclass vL32b_ai_pats { - // Aligned loads - def : Pat < (VTSgl (alignedload IntRegs:$addr)), - (V6_vL32b_ai IntRegs:$addr, 0) >, - Requires<[UseHVXSgl]>; - def : Pat < (VTSgl (unalignedload IntRegs:$addr)), - (V6_vL32Ub_ai IntRegs:$addr, 0) >, - Requires<[UseHVXSgl]>; - - // 128B Load - def : Pat < (VTDbl (alignedload IntRegs:$addr)), - (V6_vL32b_ai_128B IntRegs:$addr, 0) >, - Requires<[UseHVXDbl]>; - def : Pat < (VTDbl (unalignedload IntRegs:$addr)), - (V6_vL32Ub_ai_128B IntRegs:$addr, 0) >, - Requires<[UseHVXDbl]>; - - // Fold Add R+OFF into vector load. - let AddedComplexity = 10 in { - def : Pat<(VTDbl (alignedload (add IntRegs:$src2, s4_7ImmPred:$offset))), - (V6_vL32b_ai_128B IntRegs:$src2, s4_7ImmPred:$offset)>, - Requires<[UseHVXDbl]>; - def : Pat<(VTDbl (unalignedload (add IntRegs:$src2, s4_7ImmPred:$offset))), - (V6_vL32Ub_ai_128B IntRegs:$src2, s4_7ImmPred:$offset)>, - Requires<[UseHVXDbl]>; - - def : Pat<(VTSgl (alignedload (add IntRegs:$src2, s4_6ImmPred:$offset))), - (V6_vL32b_ai IntRegs:$src2, s4_6ImmPred:$offset)>, - Requires<[UseHVXSgl]>; - def : Pat<(VTSgl (unalignedload (add IntRegs:$src2, s4_6ImmPred:$offset))), - (V6_vL32Ub_ai IntRegs:$src2, s4_6ImmPred:$offset)>, - Requires<[UseHVXSgl]>; - } -} - -defm : vL32b_ai_pats ; -defm : vL32b_ai_pats ; -defm : vL32b_ai_pats ; -defm : vL32b_ai_pats ; - // Vector load/store pseudos @@ -873,29 +766,6 @@ def PS_vstorerw_ai_128B: STrivv_template, def PS_vstorerwu_ai_128B: STrivv_template, Requires<[HasV60T,UseHVXDbl]>; -multiclass STrivv_pats { - def : Pat<(alignedstore (VTSgl VecDblRegs:$src1), IntRegs:$addr), - (PS_vstorerw_ai IntRegs:$addr, 0, (VTSgl VecDblRegs:$src1))>, - Requires<[UseHVXSgl]>; - def : Pat<(unalignedstore (VTSgl VecDblRegs:$src1), IntRegs:$addr), - (PS_vstorerwu_ai IntRegs:$addr, 0, (VTSgl VecDblRegs:$src1))>, - Requires<[UseHVXSgl]>; - - def : Pat<(alignedstore (VTDbl VecDblRegs128B:$src1), IntRegs:$addr), - (PS_vstorerw_ai_128B IntRegs:$addr, 0, - (VTDbl VecDblRegs128B:$src1))>, - Requires<[UseHVXDbl]>; - def : Pat<(unalignedstore (VTDbl VecDblRegs128B:$src1), IntRegs:$addr), - (PS_vstorerwu_ai_128B IntRegs:$addr, 0, - (VTDbl VecDblRegs128B:$src1))>, - Requires<[UseHVXDbl]>; -} - -defm : STrivv_pats ; -defm : STrivv_pats ; -defm : STrivv_pats ; -defm : STrivv_pats ; - let isPseudo = 1, isCodeGenOnly = 1, validSubTargets = HasV60SubT in class LDrivv_template @@ -910,27 +780,6 @@ def PS_vloadrw_ai_128B: LDrivv_template, def PS_vloadrwu_ai_128B: LDrivv_template, Requires<[HasV60T,UseHVXDbl]>; -multiclass LDrivv_pats { - def : Pat<(VTSgl (alignedload I32:$addr)), - (PS_vloadrw_ai I32:$addr, 0)>, - Requires<[UseHVXSgl]>; - def : Pat<(VTSgl (unalignedload I32:$addr)), - (PS_vloadrwu_ai I32:$addr, 0)>, - Requires<[UseHVXSgl]>; - - def : Pat<(VTDbl (alignedload I32:$addr)), - (PS_vloadrw_ai_128B I32:$addr, 0)>, - Requires<[UseHVXDbl]>; - def : Pat<(VTDbl (unalignedload I32:$addr)), - (PS_vloadrwu_ai_128B I32:$addr, 0)>, - Requires<[UseHVXDbl]>; -} - -defm : LDrivv_pats ; -defm : LDrivv_pats ; -defm : LDrivv_pats ; -defm : LDrivv_pats ; - // Store vector predicate pseudo. let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 13, isCodeGenOnly = 1, isPseudo = 1, mayStore = 1, hasSideEffects = 0 in { @@ -977,20 +826,6 @@ let isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 0 in { "", []>, Requires<[HasV60T,UseHVXDbl]>; } -let Predicates = [HasV60T,UseHVXSgl] in { - def: Pat<(select I1:$Pu, (v16i32 VectorRegs:$Vs), VectorRegs:$Vt), - (PS_vselect I1:$Pu, VectorRegs:$Vs, VectorRegs:$Vt)>; - def: Pat<(select I1:$Pu, (v32i32 VecDblRegs:$Vs), VecDblRegs:$Vt), - (PS_wselect I1:$Pu, VecDblRegs:$Vs, VecDblRegs:$Vt)>; -} -let Predicates = [HasV60T,UseHVXDbl] in { - def: Pat<(select I1:$Pu, (v32i32 VectorRegs128B:$Vs), VectorRegs128B:$Vt), - (PS_vselect_128B I1:$Pu, VectorRegs128B:$Vs, VectorRegs128B:$Vt)>; - def: Pat<(select I1:$Pu, (v64i32 VecDblRegs128B:$Vs), VecDblRegs128B:$Vt), - (PS_wselect_128B I1:$Pu, VecDblRegs128B:$Vs, VecDblRegs128B:$Vt)>; -} - - let hasNewValue = 1 in class T_vmpy : CVI_VX_DV_Resource1<(outs RCout:$dst), (ins RCin:$src1, IntRegs:$src2), @@ -1519,20 +1354,6 @@ let isRegSequence = 1, Itinerary = CVI_VA_DV, Type = TypeCVI_VA_DV in defm V6_vcombine : T_HVX_alu_WV <"$dst = vcombine($src1,$src2)">, V6_vcombine_enc; -def SDTHexagonVCOMBINE: SDTypeProfile<1, 2, [SDTCisSameAs<1, 2>, - SDTCisSubVecOfVec<1, 0>]>; - -def HexagonVCOMBINE: SDNode<"HexagonISD::VCOMBINE", SDTHexagonVCOMBINE>; - -def: Pat<(v32i32 (HexagonVCOMBINE (v16i32 VectorRegs:$Vs), - (v16i32 VectorRegs:$Vt))), - (V6_vcombine VectorRegs:$Vs, VectorRegs:$Vt)>, - Requires<[UseHVXSgl]>; -def: Pat<(v64i32 (HexagonVCOMBINE (v32i32 VecDblRegs:$Vs), - (v32i32 VecDblRegs:$Vt))), - (V6_vcombine_128B VecDblRegs:$Vs, VecDblRegs:$Vt)>, - Requires<[UseHVXDbl]>; - let Itinerary = CVI_VINLANESAT, Type = TypeCVI_VINLANESAT in { defm V6_vsathub : T_HVX_alu_VV <"$dst.ub = vsat($src1.h,$src2.h)">, V6_vsathub_enc; @@ -1593,46 +1414,6 @@ defm V6_vpackoh : T_HVX_alu_VV <"$dst.h = vpacko($src1.w,$src2.w)">, V6_vpackoh_enc; } -def SDTHexagonVPACK: SDTypeProfile<1, 3, [SDTCisSameAs<1, 2>, - SDTCisInt<3>]>; - -def HexagonVPACK: SDNode<"HexagonISD::VPACK", SDTHexagonVPACK>; - -// 0 as the last argument denotes vpacke. 1 denotes vpacko -def: Pat<(v64i8 (HexagonVPACK (v64i8 VectorRegs:$Vs), - (v64i8 VectorRegs:$Vt), (i32 0))), - (V6_vpackeb VectorRegs:$Vs, VectorRegs:$Vt)>, - Requires<[UseHVXSgl]>; -def: Pat<(v64i8 (HexagonVPACK (v64i8 VectorRegs:$Vs), - (v64i8 VectorRegs:$Vt), (i32 1))), - (V6_vpackob VectorRegs:$Vs, VectorRegs:$Vt)>, - Requires<[UseHVXSgl]>; -def: Pat<(v32i16 (HexagonVPACK (v32i16 VectorRegs:$Vs), - (v32i16 VectorRegs:$Vt), (i32 0))), - (V6_vpackeh VectorRegs:$Vs, VectorRegs:$Vt)>, - Requires<[UseHVXSgl]>; -def: Pat<(v32i16 (HexagonVPACK (v32i16 VectorRegs:$Vs), - (v32i16 VectorRegs:$Vt), (i32 1))), - (V6_vpackoh VectorRegs:$Vs, VectorRegs:$Vt)>, - Requires<[UseHVXSgl]>; - -def: Pat<(v128i8 (HexagonVPACK (v128i8 VecDblRegs:$Vs), - (v128i8 VecDblRegs:$Vt), (i32 0))), - (V6_vpackeb_128B VecDblRegs:$Vs, VecDblRegs:$Vt)>, - Requires<[UseHVXDbl]>; -def: Pat<(v128i8 (HexagonVPACK (v128i8 VecDblRegs:$Vs), - (v128i8 VecDblRegs:$Vt), (i32 1))), - (V6_vpackob_128B VecDblRegs:$Vs, VecDblRegs:$Vt)>, - Requires<[UseHVXDbl]>; -def: Pat<(v64i16 (HexagonVPACK (v64i16 VecDblRegs:$Vs), - (v64i16 VecDblRegs:$Vt), (i32 0))), - (V6_vpackeh_128B VecDblRegs:$Vs, VecDblRegs:$Vt)>, - Requires<[UseHVXDbl]>; -def: Pat<(v64i16 (HexagonVPACK (v64i16 VecDblRegs:$Vs), - (v64i16 VecDblRegs:$Vt), (i32 1))), - (V6_vpackoh_128B VecDblRegs:$Vs, VecDblRegs:$Vt)>, - Requires<[UseHVXDbl]>; - let hasNewValue = 1, hasSideEffects = 0 in class T_HVX_condALU : CVI_VA_Resource1 <(outs RC2:$dst), diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfoVector.td b/llvm/lib/Target/Hexagon/HexagonInstrInfoVector.td index e0b08a02..e3520bd 100644 --- a/llvm/lib/Target/Hexagon/HexagonInstrInfoVector.td +++ b/llvm/lib/Target/Hexagon/HexagonInstrInfoVector.td @@ -11,37 +11,6 @@ // //===----------------------------------------------------------------------===// -def V2I1: PatLeaf<(v2i1 PredRegs:$R)>; -def V4I1: PatLeaf<(v4i1 PredRegs:$R)>; -def V8I1: PatLeaf<(v8i1 PredRegs:$R)>; -def V4I8: PatLeaf<(v4i8 IntRegs:$R)>; -def V2I16: PatLeaf<(v2i16 IntRegs:$R)>; -def V8I8: PatLeaf<(v8i8 DoubleRegs:$R)>; -def V4I16: PatLeaf<(v4i16 DoubleRegs:$R)>; -def V2I32: PatLeaf<(v2i32 DoubleRegs:$R)>; - - -multiclass bitconvert_32 { - def : Pat <(b (bitconvert (a IntRegs:$src))), - (b IntRegs:$src)>; - def : Pat <(a (bitconvert (b IntRegs:$src))), - (a IntRegs:$src)>; -} - -multiclass bitconvert_64 { - def : Pat <(b (bitconvert (a DoubleRegs:$src))), - (b DoubleRegs:$src)>; - def : Pat <(a (bitconvert (b DoubleRegs:$src))), - (a DoubleRegs:$src)>; -} - -// Bit convert vector types to integers. -defm : bitconvert_32; -defm : bitconvert_32; -defm : bitconvert_64; -defm : bitconvert_64; -defm : bitconvert_64; - // Vector shift support. Vector shifting in Hexagon is rather different // from internal representation of LLVM. // LLVM assumes all shifts (in vector case) will have the form @@ -51,27 +20,17 @@ defm : bitconvert_64; // As a result, special care is needed to guarantee correctness and // performance. class vshift_v4i16MajOp, bits<3>MinOp> - : S_2OpInstImm { + : S_2OpInstImm { bits<4> src2; let Inst{11-8} = src2; } class vshift_v2i32MajOp, bits<3>MinOp> - : S_2OpInstImm { + : S_2OpInstImm { bits<5> src2; let Inst{12-8} = src2; } -def : Pat<(v2i16 (add (v2i16 IntRegs:$src1), (v2i16 IntRegs:$src2))), - (A2_svaddh IntRegs:$src1, IntRegs:$src2)>; - -def : Pat<(v2i16 (sub (v2i16 IntRegs:$src1), (v2i16 IntRegs:$src2))), - (A2_svsubh IntRegs:$src1, IntRegs:$src2)>; - def S2_asr_i_vw : vshift_v2i32; def S2_lsr_i_vw : vshift_v2i32; def S2_asl_i_vw : vshift_v2i32; @@ -80,87 +39,6 @@ def S2_asr_i_vh : vshift_v4i16; def S2_lsr_i_vh : vshift_v4i16; def S2_asl_i_vh : vshift_v4i16; - -def HexagonVSPLATB: SDNode<"HexagonISD::VSPLATB", SDTUnaryOp>; -def HexagonVSPLATH: SDNode<"HexagonISD::VSPLATH", SDTUnaryOp>; - -// Replicate the low 8-bits from 32-bits input register into each of the -// four bytes of 32-bits destination register. -def: Pat<(v4i8 (HexagonVSPLATB I32:$Rs)), (S2_vsplatrb I32:$Rs)>; - -// Replicate the low 16-bits from 32-bits input register into each of the -// four halfwords of 64-bits destination register. -def: Pat<(v4i16 (HexagonVSPLATH I32:$Rs)), (S2_vsplatrh I32:$Rs)>; - - -class VArith_pat - : Pat <(Op Type:$Rss, Type:$Rtt), - (MI Type:$Rss, Type:$Rtt)>; - -def: VArith_pat ; -def: VArith_pat ; -def: VArith_pat ; -def: VArith_pat ; -def: VArith_pat ; -def: VArith_pat ; - -def: VArith_pat ; -def: VArith_pat ; -def: VArith_pat ; - -def: VArith_pat ; -def: VArith_pat ; -def: VArith_pat ; -def: VArith_pat ; -def: VArith_pat ; -def: VArith_pat ; -def: VArith_pat ; -def: VArith_pat ; -def: VArith_pat ; - -def: Pat<(v2i32 (sra V2I32:$b, (i64 (HexagonCOMBINE (i32 u5_0ImmPred:$c), - (i32 u5_0ImmPred:$c))))), - (S2_asr_i_vw V2I32:$b, imm:$c)>; -def: Pat<(v2i32 (srl V2I32:$b, (i64 (HexagonCOMBINE (i32 u5_0ImmPred:$c), - (i32 u5_0ImmPred:$c))))), - (S2_lsr_i_vw V2I32:$b, imm:$c)>; -def: Pat<(v2i32 (shl V2I32:$b, (i64 (HexagonCOMBINE (i32 u5_0ImmPred:$c), - (i32 u5_0ImmPred:$c))))), - (S2_asl_i_vw V2I32:$b, imm:$c)>; - -def: Pat<(v4i16 (sra V4I16:$b, (v4i16 (HexagonVSPLATH (i32 (u4_0ImmPred:$c)))))), - (S2_asr_i_vh V4I16:$b, imm:$c)>; -def: Pat<(v4i16 (srl V4I16:$b, (v4i16 (HexagonVSPLATH (i32 (u4_0ImmPred:$c)))))), - (S2_lsr_i_vh V4I16:$b, imm:$c)>; -def: Pat<(v4i16 (shl V4I16:$b, (v4i16 (HexagonVSPLATH (i32 (u4_0ImmPred:$c)))))), - (S2_asl_i_vh V4I16:$b, imm:$c)>; - - -def SDTHexagon_v2i32_v2i32_i32 : SDTypeProfile<1, 2, - [SDTCisSameAs<0, 1>, SDTCisVT<0, v2i32>, SDTCisInt<2>]>; -def SDTHexagon_v4i16_v4i16_i32 : SDTypeProfile<1, 2, - [SDTCisSameAs<0, 1>, SDTCisVT<0, v4i16>, SDTCisInt<2>]>; - -def HexagonVSRAW: SDNode<"HexagonISD::VSRAW", SDTHexagon_v2i32_v2i32_i32>; -def HexagonVSRAH: SDNode<"HexagonISD::VSRAH", SDTHexagon_v4i16_v4i16_i32>; -def HexagonVSRLW: SDNode<"HexagonISD::VSRLW", SDTHexagon_v2i32_v2i32_i32>; -def HexagonVSRLH: SDNode<"HexagonISD::VSRLH", SDTHexagon_v4i16_v4i16_i32>; -def HexagonVSHLW: SDNode<"HexagonISD::VSHLW", SDTHexagon_v2i32_v2i32_i32>; -def HexagonVSHLH: SDNode<"HexagonISD::VSHLH", SDTHexagon_v4i16_v4i16_i32>; - -def: Pat<(v2i32 (HexagonVSRAW V2I32:$Rs, u5_0ImmPred:$u5)), - (S2_asr_i_vw V2I32:$Rs, imm:$u5)>; -def: Pat<(v4i16 (HexagonVSRAH V4I16:$Rs, u4_0ImmPred:$u4)), - (S2_asr_i_vh V4I16:$Rs, imm:$u4)>; -def: Pat<(v2i32 (HexagonVSRLW V2I32:$Rs, u5_0ImmPred:$u5)), - (S2_lsr_i_vw V2I32:$Rs, imm:$u5)>; -def: Pat<(v4i16 (HexagonVSRLH V4I16:$Rs, u4_0ImmPred:$u4)), - (S2_lsr_i_vh V4I16:$Rs, imm:$u4)>; -def: Pat<(v2i32 (HexagonVSHLW V2I32:$Rs, u5_0ImmPred:$u5)), - (S2_asl_i_vw V2I32:$Rs, imm:$u5)>; -def: Pat<(v4i16 (HexagonVSHLH V4I16:$Rs, u4_0ImmPred:$u4)), - (S2_asl_i_vh V4I16:$Rs, imm:$u4)>; - // Vector shift words by register def S2_asr_r_vw : T_S3op_shiftVect < "vasrw", 0b00, 0b00>; def S2_lsr_r_vw : T_S3op_shiftVect < "vlsrw", 0b00, 0b01>; @@ -173,65 +51,6 @@ def S2_lsr_r_vh : T_S3op_shiftVect < "vlsrh", 0b01, 0b01>; def S2_asl_r_vh : T_S3op_shiftVect < "vaslh", 0b01, 0b10>; def S2_lsl_r_vh : T_S3op_shiftVect < "vlslh", 0b01, 0b11>; -class vshift_rr_pat - : Pat <(Op Value:$Rs, I32:$Rt), - (MI Value:$Rs, I32:$Rt)>; - -def: vshift_rr_pat ; -def: vshift_rr_pat ; -def: vshift_rr_pat ; -def: vshift_rr_pat ; -def: vshift_rr_pat ; -def: vshift_rr_pat ; - - -def SDTHexagonVecCompare_v8i8 : SDTypeProfile<1, 2, - [SDTCisSameAs<1, 2>, SDTCisVT<0, i1>, SDTCisVT<1, v8i8>]>; -def SDTHexagonVecCompare_v4i16 : SDTypeProfile<1, 2, - [SDTCisSameAs<1, 2>, SDTCisVT<0, i1>, SDTCisVT<1, v4i16>]>; -def SDTHexagonVecCompare_v2i32 : SDTypeProfile<1, 2, - [SDTCisSameAs<1, 2>, SDTCisVT<0, i1>, SDTCisVT<1, v2i32>]>; - -def HexagonVCMPBEQ: SDNode<"HexagonISD::VCMPBEQ", SDTHexagonVecCompare_v8i8>; -def HexagonVCMPBGT: SDNode<"HexagonISD::VCMPBGT", SDTHexagonVecCompare_v8i8>; -def HexagonVCMPBGTU: SDNode<"HexagonISD::VCMPBGTU", SDTHexagonVecCompare_v8i8>; -def HexagonVCMPHEQ: SDNode<"HexagonISD::VCMPHEQ", SDTHexagonVecCompare_v4i16>; -def HexagonVCMPHGT: SDNode<"HexagonISD::VCMPHGT", SDTHexagonVecCompare_v4i16>; -def HexagonVCMPHGTU: SDNode<"HexagonISD::VCMPHGTU", SDTHexagonVecCompare_v4i16>; -def HexagonVCMPWEQ: SDNode<"HexagonISD::VCMPWEQ", SDTHexagonVecCompare_v2i32>; -def HexagonVCMPWGT: SDNode<"HexagonISD::VCMPWGT", SDTHexagonVecCompare_v2i32>; -def HexagonVCMPWGTU: SDNode<"HexagonISD::VCMPWGTU", SDTHexagonVecCompare_v2i32>; - - -class vcmp_i1_pat - : Pat <(i1 (Op Value:$Rs, Value:$Rt)), - (MI Value:$Rs, Value:$Rt)>; - -def: vcmp_i1_pat; -def: vcmp_i1_pat; -def: vcmp_i1_pat; - -def: vcmp_i1_pat; -def: vcmp_i1_pat; -def: vcmp_i1_pat; - -def: vcmp_i1_pat; -def: vcmp_i1_pat; -def: vcmp_i1_pat; - - -class vcmp_vi1_pat - : Pat <(OutTy (Op InVal:$Rs, InVal:$Rt)), - (MI InVal:$Rs, InVal:$Rt)>; - -def: vcmp_vi1_pat; -def: vcmp_vi1_pat; -def: vcmp_vi1_pat; - -def: vcmp_vi1_pat; -def: vcmp_vi1_pat; -def: vcmp_vi1_pat; - // Hexagon doesn't have a vector multiply with C semantics. // Instead, generate a pseudo instruction that gets expaneded into two @@ -239,237 +58,12 @@ def: vcmp_vi1_pat; // This is expanded by ExpandPostRAPseudos. let isPseudo = 1 in def PS_vmulw : PseudoM<(outs DoubleRegs:$Rd), - (ins DoubleRegs:$Rs, DoubleRegs:$Rt), "", - [(set V2I32:$Rd, (mul V2I32:$Rs, V2I32:$Rt))]>; + (ins DoubleRegs:$Rs, DoubleRegs:$Rt), "", []>; let isPseudo = 1 in def PS_vmulw_acc : PseudoM<(outs DoubleRegs:$Rd), - (ins DoubleRegs:$Rx, DoubleRegs:$Rs, DoubleRegs:$Rt), "", - [(set V2I32:$Rd, (add V2I32:$Rx, (mul V2I32:$Rs, V2I32:$Rt)))], + (ins DoubleRegs:$Rx, DoubleRegs:$Rs, DoubleRegs:$Rt), "", [], "$Rd = $Rx">; -// Adds two v4i8: Hexagon does not have an insn for this one, so we -// use the double add v8i8, and use only the low part of the result. -def: Pat<(v4i8 (add (v4i8 IntRegs:$Rs), (v4i8 IntRegs:$Rt))), - (LoReg (A2_vaddub (Zext64 $Rs), (Zext64 $Rt)))>; - -// Subtract two v4i8: Hexagon does not have an insn for this one, so we -// use the double sub v8i8, and use only the low part of the result. -def: Pat<(v4i8 (sub (v4i8 IntRegs:$Rs), (v4i8 IntRegs:$Rt))), - (LoReg (A2_vsubub (Zext64 $Rs), (Zext64 $Rt)))>; - -// -// No 32 bit vector mux. -// -def: Pat<(v4i8 (select I1:$Pu, V4I8:$Rs, V4I8:$Rt)), - (LoReg (C2_vmux I1:$Pu, (Zext64 $Rs), (Zext64 $Rt)))>; -def: Pat<(v2i16 (select I1:$Pu, V2I16:$Rs, V2I16:$Rt)), - (LoReg (C2_vmux I1:$Pu, (Zext64 $Rs), (Zext64 $Rt)))>; - -// -// 64-bit vector mux. -// -def: Pat<(v8i8 (vselect V8I1:$Pu, V8I8:$Rs, V8I8:$Rt)), - (C2_vmux V8I1:$Pu, V8I8:$Rs, V8I8:$Rt)>; -def: Pat<(v4i16 (vselect V4I1:$Pu, V4I16:$Rs, V4I16:$Rt)), - (C2_vmux V4I1:$Pu, V4I16:$Rs, V4I16:$Rt)>; -def: Pat<(v2i32 (vselect V2I1:$Pu, V2I32:$Rs, V2I32:$Rt)), - (C2_vmux V2I1:$Pu, V2I32:$Rs, V2I32:$Rt)>; - -// -// No 32 bit vector compare. -// -def: Pat<(i1 (seteq V4I8:$Rs, V4I8:$Rt)), - (A2_vcmpbeq (Zext64 $Rs), (Zext64 $Rt))>; -def: Pat<(i1 (setgt V4I8:$Rs, V4I8:$Rt)), - (A4_vcmpbgt (Zext64 $Rs), (Zext64 $Rt))>; -def: Pat<(i1 (setugt V4I8:$Rs, V4I8:$Rt)), - (A2_vcmpbgtu (Zext64 $Rs), (Zext64 $Rt))>; - -def: Pat<(i1 (seteq V2I16:$Rs, V2I16:$Rt)), - (A2_vcmpheq (Zext64 $Rs), (Zext64 $Rt))>; -def: Pat<(i1 (setgt V2I16:$Rs, V2I16:$Rt)), - (A2_vcmphgt (Zext64 $Rs), (Zext64 $Rt))>; -def: Pat<(i1 (setugt V2I16:$Rs, V2I16:$Rt)), - (A2_vcmphgtu (Zext64 $Rs), (Zext64 $Rt))>; - - -class InvertCmp_pat - : Pat<(CmpTy (CmpOp Value:$Rs, Value:$Rt)), - (InvMI Value:$Rt, Value:$Rs)>; - -// Map from a compare operation to the corresponding instruction with the -// order of operands reversed, e.g. x > y --> cmp.lt(y,x). -def: InvertCmp_pat; -def: InvertCmp_pat; -def: InvertCmp_pat; -def: InvertCmp_pat; -def: InvertCmp_pat; -def: InvertCmp_pat; - -def: InvertCmp_pat; -def: InvertCmp_pat; -def: InvertCmp_pat; -def: InvertCmp_pat; -def: InvertCmp_pat; -def: InvertCmp_pat; - -// Map from vcmpne(Rss) -> !vcmpew(Rss). -// rs != rt -> !(rs == rt). -def: Pat<(v2i1 (setne V2I32:$Rs, V2I32:$Rt)), - (C2_not (v2i1 (A2_vcmpbeq V2I32:$Rs, V2I32:$Rt)))>; - - -// Truncate: from vector B copy all 'E'ven 'B'yte elements: -// A[0] = B[0]; A[1] = B[2]; A[2] = B[4]; A[3] = B[6]; -def: Pat<(v4i8 (trunc V4I16:$Rs)), - (S2_vtrunehb V4I16:$Rs)>; - -// Truncate: from vector B copy all 'O'dd 'B'yte elements: -// A[0] = B[1]; A[1] = B[3]; A[2] = B[5]; A[3] = B[7]; -// S2_vtrunohb - -// Truncate: from vectors B and C copy all 'E'ven 'H'alf-word elements: -// A[0] = B[0]; A[1] = B[2]; A[2] = C[0]; A[3] = C[2]; -// S2_vtruneh - -def: Pat<(v2i16 (trunc V2I32:$Rs)), - (LoReg (S2_packhl (HiReg $Rs), (LoReg $Rs)))>; - - -def HexagonVSXTBH : SDNode<"HexagonISD::VSXTBH", SDTUnaryOp>; -def HexagonVSXTBW : SDNode<"HexagonISD::VSXTBW", SDTUnaryOp>; - -def: Pat<(i64 (HexagonVSXTBH I32:$Rs)), (S2_vsxtbh I32:$Rs)>; -def: Pat<(i64 (HexagonVSXTBW I32:$Rs)), (S2_vsxthw I32:$Rs)>; - -def: Pat<(v4i16 (zext V4I8:$Rs)), (S2_vzxtbh V4I8:$Rs)>; -def: Pat<(v2i32 (zext V2I16:$Rs)), (S2_vzxthw V2I16:$Rs)>; -def: Pat<(v4i16 (anyext V4I8:$Rs)), (S2_vzxtbh V4I8:$Rs)>; -def: Pat<(v2i32 (anyext V2I16:$Rs)), (S2_vzxthw V2I16:$Rs)>; -def: Pat<(v4i16 (sext V4I8:$Rs)), (S2_vsxtbh V4I8:$Rs)>; -def: Pat<(v2i32 (sext V2I16:$Rs)), (S2_vsxthw V2I16:$Rs)>; - -// Sign extends a v2i8 into a v2i32. -def: Pat<(v2i32 (sext_inreg V2I32:$Rs, v2i8)), - (A2_combinew (A2_sxtb (HiReg $Rs)), (A2_sxtb (LoReg $Rs)))>; - -// Sign extends a v2i16 into a v2i32. -def: Pat<(v2i32 (sext_inreg V2I32:$Rs, v2i16)), - (A2_combinew (A2_sxth (HiReg $Rs)), (A2_sxth (LoReg $Rs)))>; - - -// Multiplies two v2i16 and returns a v2i32. We are using here the -// saturating multiply, as hexagon does not provide a non saturating -// vector multiply, and saturation does not impact the result that is -// in double precision of the operands. - -// Multiplies two v2i16 vectors: as Hexagon does not have a multiply -// with the C semantics for this one, this pattern uses the half word -// multiply vmpyh that takes two v2i16 and returns a v2i32. This is -// then truncated to fit this back into a v2i16 and to simulate the -// wrap around semantics for unsigned in C. -def vmpyh: OutPatFrag<(ops node:$Rs, node:$Rt), - (M2_vmpy2s_s0 (i32 $Rs), (i32 $Rt))>; - -def: Pat<(v2i16 (mul V2I16:$Rs, V2I16:$Rt)), - (LoReg (S2_vtrunewh (v2i32 (A2_combineii 0, 0)), - (v2i32 (vmpyh V2I16:$Rs, V2I16:$Rt))))>; - -// Multiplies two v4i16 vectors. -def: Pat<(v4i16 (mul V4I16:$Rs, V4I16:$Rt)), - (S2_vtrunewh (vmpyh (HiReg $Rs), (HiReg $Rt)), - (vmpyh (LoReg $Rs), (LoReg $Rt)))>; - -def VMPYB_no_V5: OutPatFrag<(ops node:$Rs, node:$Rt), - (S2_vtrunewh (vmpyh (HiReg (S2_vsxtbh $Rs)), (HiReg (S2_vsxtbh $Rt))), - (vmpyh (LoReg (S2_vsxtbh $Rs)), (LoReg (S2_vsxtbh $Rt))))>; - -// Multiplies two v4i8 vectors. -def: Pat<(v4i8 (mul V4I8:$Rs, V4I8:$Rt)), - (S2_vtrunehb (M5_vmpybsu V4I8:$Rs, V4I8:$Rt))>, - Requires<[HasV5T]>; - -def: Pat<(v4i8 (mul V4I8:$Rs, V4I8:$Rt)), - (S2_vtrunehb (VMPYB_no_V5 V4I8:$Rs, V4I8:$Rt))>; - -// Multiplies two v8i8 vectors. -def: Pat<(v8i8 (mul V8I8:$Rs, V8I8:$Rt)), - (A2_combinew (S2_vtrunehb (M5_vmpybsu (HiReg $Rs), (HiReg $Rt))), - (S2_vtrunehb (M5_vmpybsu (LoReg $Rs), (LoReg $Rt))))>, - Requires<[HasV5T]>; - -def: Pat<(v8i8 (mul V8I8:$Rs, V8I8:$Rt)), - (A2_combinew (S2_vtrunehb (VMPYB_no_V5 (HiReg $Rs), (HiReg $Rt))), - (S2_vtrunehb (VMPYB_no_V5 (LoReg $Rs), (LoReg $Rt))))>; - - -class shuffler - : SInst<(outs DoubleRegs:$a), (ins DoubleRegs:$b, DoubleRegs:$c), - "$a = " # Str # "($b, $c)", - [(set (i64 DoubleRegs:$a), - (i64 (Op (i64 DoubleRegs:$b), (i64 DoubleRegs:$c))))], - "", S_3op_tc_1_SLOT23>; - -def SDTHexagonBinOp64 : SDTypeProfile<1, 2, - [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisVT<0, i64>]>; - -def HexagonSHUFFEB: SDNode<"HexagonISD::SHUFFEB", SDTHexagonBinOp64>; -def HexagonSHUFFEH: SDNode<"HexagonISD::SHUFFEH", SDTHexagonBinOp64>; -def HexagonSHUFFOB: SDNode<"HexagonISD::SHUFFOB", SDTHexagonBinOp64>; -def HexagonSHUFFOH: SDNode<"HexagonISD::SHUFFOH", SDTHexagonBinOp64>; - -class ShufflePat - : Pat<(i64 (Op DoubleRegs:$src1, DoubleRegs:$src2)), - (i64 (MI DoubleRegs:$src1, DoubleRegs:$src2))>; - -// Shuffles even bytes for i=0..3: A[2*i].b = C[2*i].b; A[2*i+1].b = B[2*i].b -def: ShufflePat; - -// Shuffles odd bytes for i=0..3: A[2*i].b = C[2*i+1].b; A[2*i+1].b = B[2*i+1].b -def: ShufflePat; - -// Shuffles even half for i=0,1: A[2*i].h = C[2*i].h; A[2*i+1].h = B[2*i].h -def: ShufflePat; - -// Shuffles odd half for i=0,1: A[2*i].h = C[2*i+1].h; A[2*i+1].h = B[2*i+1].h -def: ShufflePat; - - -// Truncated store from v4i16 to v4i8. -def truncstorev4i8: PatFrag<(ops node:$val, node:$ptr), - (truncstore node:$val, node:$ptr), - [{ return cast(N)->getMemoryVT() == MVT::v4i8; }]>; - -// Truncated store from v2i32 to v2i16. -def truncstorev2i16: PatFrag<(ops node:$val, node:$ptr), - (truncstore node:$val, node:$ptr), - [{ return cast(N)->getMemoryVT() == MVT::v2i16; }]>; - -def: Pat<(truncstorev2i16 V2I32:$Rs, I32:$Rt), - (S2_storeri_io I32:$Rt, 0, (LoReg (S2_packhl (HiReg $Rs), - (LoReg $Rs))))>; - -def: Pat<(truncstorev4i8 V4I16:$Rs, I32:$Rt), - (S2_storeri_io I32:$Rt, 0, (S2_vtrunehb V4I16:$Rs))>; - - -// Zero and sign extended load from v2i8 into v2i16. -def zextloadv2i8: PatFrag<(ops node:$ptr), (zextload node:$ptr), - [{ return cast(N)->getMemoryVT() == MVT::v2i8; }]>; - -def sextloadv2i8: PatFrag<(ops node:$ptr), (sextload node:$ptr), - [{ return cast(N)->getMemoryVT() == MVT::v2i8; }]>; - -def: Pat<(v2i16 (zextloadv2i8 I32:$Rs)), - (LoReg (v4i16 (S2_vzxtbh (L2_loadruh_io I32:$Rs, 0))))>; - -def: Pat<(v2i16 (sextloadv2i8 I32:$Rs)), - (LoReg (v4i16 (S2_vsxtbh (L2_loadrh_io I32:$Rs, 0))))>; -def: Pat<(v2i32 (zextloadv2i8 I32:$Rs)), - (S2_vzxthw (LoReg (v4i16 (S2_vzxtbh (L2_loadruh_io I32:$Rs, 0)))))>; -def: Pat<(v2i32 (sextloadv2i8 I32:$Rs)), - (S2_vsxthw (LoReg (v4i16 (S2_vsxtbh (L2_loadrh_io I32:$Rs, 0)))))>; diff --git a/llvm/lib/Target/Hexagon/HexagonPatterns.td b/llvm/lib/Target/Hexagon/HexagonPatterns.td new file mode 100644 index 0000000..2dfe157 --- /dev/null +++ b/llvm/lib/Target/Hexagon/HexagonPatterns.td @@ -0,0 +1,3273 @@ +// Pattern fragment that combines the value type and the register class +// into a single parameter. +// The pat frags in the definitions below need to have a named register, +// otherwise i32 will be assumed regardless of the register class. The +// name of the register does not matter. +def I1 : PatLeaf<(i1 PredRegs:$R)>; +def I32 : PatLeaf<(i32 IntRegs:$R)>; +def I64 : PatLeaf<(i64 DoubleRegs:$R)>; +def F32 : PatLeaf<(f32 IntRegs:$R)>; +def F64 : PatLeaf<(f64 DoubleRegs:$R)>; + +// Pattern fragments to extract the low and high subregisters from a +// 64-bit value. +def LoReg: OutPatFrag<(ops node:$Rs), + (EXTRACT_SUBREG (i64 $Rs), subreg_loreg)>; +def HiReg: OutPatFrag<(ops node:$Rs), + (EXTRACT_SUBREG (i64 $Rs), subreg_hireg)>; + +def orisadd: PatFrag<(ops node:$Addr, node:$off), + (or node:$Addr, node:$off), [{ return orIsAdd(N); }]>; + +// SDNode for converting immediate C to C-1. +def DEC_CONST_SIGNED : SDNodeXFormgetSExtValue(); + return XformSToSM1Imm(imm, SDLoc(N)); +}]>; + +// SDNode for converting immediate C to C-2. +def DEC2_CONST_SIGNED : SDNodeXFormgetSExtValue(); + return XformSToSM2Imm(imm, SDLoc(N)); +}]>; + +// SDNode for converting immediate C to C-3. +def DEC3_CONST_SIGNED : SDNodeXFormgetSExtValue(); + return XformSToSM3Imm(imm, SDLoc(N)); +}]>; + +// SDNode for converting immediate C to C-1. +def DEC_CONST_UNSIGNED : SDNodeXFormgetZExtValue(); + return XformUToUM1Imm(imm, SDLoc(N)); +}]>; + +class T_CMP_pat + : Pat<(i1 (OpNode (i32 IntRegs:$src1), ImmPred:$src2)), + (MI IntRegs:$src1, ImmPred:$src2)>; + +def : T_CMP_pat ; +def : T_CMP_pat ; +def : T_CMP_pat ; + +def SDTHexagonI64I32I32 : SDTypeProfile<1, 2, + [SDTCisVT<0, i64>, SDTCisVT<1, i32>, SDTCisSameAs<1, 2>]>; + +def HexagonCOMBINE : SDNode<"HexagonISD::COMBINE", SDTHexagonI64I32I32>; +def HexagonPACKHL : SDNode<"HexagonISD::PACKHL", SDTHexagonI64I32I32>; + +// Pats for instruction selection. +class BinOp32_pat + : Pat<(ResT (Op (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))), + (ResT (MI IntRegs:$Rs, IntRegs:$Rt))>; + +def: BinOp32_pat; +def: BinOp32_pat; +def: BinOp32_pat; +def: BinOp32_pat; +def: BinOp32_pat; + +def: BinOp32_pat; +def: BinOp32_pat; + +// Patfrag to convert the usual comparison patfrags (e.g. setlt) to ones +// that reverse the order of the operands. +class RevCmp : PatFrag<(ops node:$rhs, node:$lhs), F.Fragment>; + +// Pats for compares. They use PatFrags as operands, not SDNodes, +// since seteq/setgt/etc. are defined as ParFrags. +class T_cmp32_rr_pat + : Pat<(VT (Op (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))), + (VT (MI IntRegs:$Rs, IntRegs:$Rt))>; + +def: T_cmp32_rr_pat; +def: T_cmp32_rr_pat; +def: T_cmp32_rr_pat; + +def: T_cmp32_rr_pat, i1>; +def: T_cmp32_rr_pat, i1>; + +def: Pat<(i32 (select (i1 PredRegs:$Pu), (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))), + (C2_mux PredRegs:$Pu, IntRegs:$Rs, IntRegs:$Rt)>; + +def: Pat<(i32 (add I32:$Rs, s32_0ImmPred:$s16)), + (i32 (A2_addi I32:$Rs, imm:$s16))>; + +def: Pat<(or (i32 IntRegs:$Rs), s32_0ImmPred:$s10), + (A2_orir IntRegs:$Rs, imm:$s10)>; +def: Pat<(and (i32 IntRegs:$Rs), s32_0ImmPred:$s10), + (A2_andir IntRegs:$Rs, imm:$s10)>; + +def: Pat<(sub s32_0ImmPred:$s10, IntRegs:$Rs), + (A2_subri imm:$s10, IntRegs:$Rs)>; + +// Rd = not(Rs) gets mapped to Rd=sub(#-1, Rs). +def: Pat<(not (i32 IntRegs:$src1)), + (A2_subri -1, IntRegs:$src1)>; + +def: Pat<(s32_0ImmPred:$s16), (A2_tfrsi imm:$s16)>; +def: Pat<(s8_0Imm64Pred:$s8), (A2_tfrpi imm:$s8)>; + +def : Pat<(i32 (select I1:$Pu, s32_0ImmPred:$s8, I32:$Rs)), + (C2_muxri I1:$Pu, imm:$s8, I32:$Rs)>; + +def : Pat<(i32 (select I1:$Pu, I32:$Rs, s32_0ImmPred:$s8)), + (C2_muxir I1:$Pu, I32:$Rs, imm:$s8)>; + +def : Pat<(i32 (select I1:$Pu, s32_0ImmPred:$s8, s8_0ImmPred:$S8)), + (C2_muxii I1:$Pu, imm:$s8, imm:$S8)>; + +def: Pat<(shl I32:$src1, (i32 16)), (A2_aslh I32:$src1)>; +def: Pat<(sra I32:$src1, (i32 16)), (A2_asrh I32:$src1)>; +def: Pat<(sext_inreg I32:$src1, i8), (A2_sxtb I32:$src1)>; +def: Pat<(sext_inreg I32:$src1, i16), (A2_sxth I32:$src1)>; + +class T_vcmp_pat + : Pat<(i1 (Op (T DoubleRegs:$Rss), (T DoubleRegs:$Rtt))), + (i1 (MI DoubleRegs:$Rss, DoubleRegs:$Rtt))>; + +def: T_vcmp_pat; +def: T_vcmp_pat; +def: T_vcmp_pat; +def: T_vcmp_pat; +def: T_vcmp_pat; +def: T_vcmp_pat; +def: T_vcmp_pat; +def: T_vcmp_pat; + +// Add halfword. +def: Pat<(sext_inreg (add I32:$src1, I32:$src2), i16), + (A2_addh_l16_ll I32:$src1, I32:$src2)>; + +def: Pat<(sra (add (shl I32:$src1, (i32 16)), I32:$src2), (i32 16)), + (A2_addh_l16_hl I32:$src1, I32:$src2)>; + +def: Pat<(shl (add I32:$src1, I32:$src2), (i32 16)), + (A2_addh_h16_ll I32:$src1, I32:$src2)>; + +// Subtract halfword. +def: Pat<(sext_inreg (sub I32:$src1, I32:$src2), i16), + (A2_subh_l16_ll I32:$src1, I32:$src2)>; + +def: Pat<(shl (sub I32:$src1, I32:$src2), (i32 16)), + (A2_subh_h16_ll I32:$src1, I32:$src2)>; + +// Here, depending on the operand being selected, we'll either generate a +// min or max instruction. +// Ex: +// (a>b)?a:b --> max(a,b) => Here check performed is '>' and the value selected +// is the larger of two. So, the corresponding HexagonInst is passed in 'Inst'. +// (a>b)?b:a --> min(a,b) => Here check performed is '>' but the smaller value +// is selected and the corresponding HexagonInst is passed in 'SwapInst'. + +multiclass T_MinMax_pats { + def: Pat<(select (i1 (Op (VT RC:$src1), (VT RC:$src2))), + (VT RC:$src1), (VT RC:$src2)), + (Inst RC:$src1, RC:$src2)>; + def: Pat<(select (i1 (Op (VT RC:$src1), (VT RC:$src2))), + (VT RC:$src2), (VT RC:$src1)), + (SwapInst RC:$src1, RC:$src2)>; +} + + +multiclass MinMax_pats { + defm: T_MinMax_pats; + + def: Pat<(sext_inreg (i32 (select (i1 (Op (i32 PositiveHalfWord:$src1), + (i32 PositiveHalfWord:$src2))), + (i32 PositiveHalfWord:$src1), + (i32 PositiveHalfWord:$src2))), i16), + (Inst IntRegs:$src1, IntRegs:$src2)>; + + def: Pat<(sext_inreg (i32 (select (i1 (Op (i32 PositiveHalfWord:$src1), + (i32 PositiveHalfWord:$src2))), + (i32 PositiveHalfWord:$src2), + (i32 PositiveHalfWord:$src1))), i16), + (SwapInst IntRegs:$src1, IntRegs:$src2)>; +} + +let AddedComplexity = 200 in { + defm: MinMax_pats; + defm: MinMax_pats; + defm: MinMax_pats; + defm: MinMax_pats; + defm: MinMax_pats; + defm: MinMax_pats; + defm: MinMax_pats; + defm: MinMax_pats; +} + +class T_cmp64_rr_pat + : Pat<(i1 (CmpOp (i64 DoubleRegs:$Rs), (i64 DoubleRegs:$Rt))), + (i1 (MI DoubleRegs:$Rs, DoubleRegs:$Rt))>; + +def: T_cmp64_rr_pat; +def: T_cmp64_rr_pat; +def: T_cmp64_rr_pat; +def: T_cmp64_rr_pat>; +def: T_cmp64_rr_pat>; + +def: Pat<(i64 (add I64:$Rs, I64:$Rt)), (A2_addp I64:$Rs, I64:$Rt)>; +def: Pat<(i64 (sub I64:$Rs, I64:$Rt)), (A2_subp I64:$Rs, I64:$Rt)>; + +def: Pat<(i64 (and I64:$Rs, I64:$Rt)), (A2_andp I64:$Rs, I64:$Rt)>; +def: Pat<(i64 (or I64:$Rs, I64:$Rt)), (A2_orp I64:$Rs, I64:$Rt)>; +def: Pat<(i64 (xor I64:$Rs, I64:$Rt)), (A2_xorp I64:$Rs, I64:$Rt)>; + +def: Pat<(i1 (not (i1 PredRegs:$Ps))), + (C2_not PredRegs:$Ps)>; + +def: Pat<(i1 (and I1:$Ps, I1:$Pt)), (C2_and I1:$Ps, I1:$Pt)>; +def: Pat<(i1 (or I1:$Ps, I1:$Pt)), (C2_or I1:$Ps, I1:$Pt)>; +def: Pat<(i1 (xor I1:$Ps, I1:$Pt)), (C2_xor I1:$Ps, I1:$Pt)>; +def: Pat<(i1 (and I1:$Ps, (not I1:$Pt))), (C2_andn I1:$Ps, I1:$Pt)>; +def: Pat<(i1 (or I1:$Ps, (not I1:$Pt))), (C2_orn I1:$Ps, I1:$Pt)>; + +def retflag : SDNode<"HexagonISD::RET_FLAG", SDTNone, + [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; +def eh_return: SDNode<"HexagonISD::EH_RETURN", SDTNone, [SDNPHasChain]>; + +def: Pat<(br bb:$dst), + (J2_jump brtarget:$dst)>; +def: Pat<(retflag), + (PS_jmpret (i32 R31))>; +def: Pat<(brcond (i1 PredRegs:$src1), bb:$offset), + (J2_jumpt PredRegs:$src1, bb:$offset)>; + +def: Pat<(eh_return), + (EH_RETURN_JMPR (i32 R31))>; +def: Pat<(brind (i32 IntRegs:$dst)), + (J2_jumpr IntRegs:$dst)>; + +// Patterns to select load-indexed (i.e. load from base+offset). +multiclass Loadx_pat { + def: Pat<(VT (Load AddrFI:$fi)), (VT (MI AddrFI:$fi, 0))>; + def: Pat<(VT (Load (add (i32 AddrFI:$fi), ImmPred:$Off))), + (VT (MI AddrFI:$fi, imm:$Off))>; + def: Pat<(VT (Load (orisadd (i32 AddrFI:$fi), ImmPred:$Off))), + (VT (MI AddrFI:$fi, imm:$Off))>; + def: Pat<(VT (Load (add (i32 IntRegs:$Rs), ImmPred:$Off))), + (VT (MI IntRegs:$Rs, imm:$Off))>; + def: Pat<(VT (Load (i32 IntRegs:$Rs))), (VT (MI IntRegs:$Rs, 0))>; +} + +let AddedComplexity = 20 in { + defm: Loadx_pat; + defm: Loadx_pat; + defm: Loadx_pat; + defm: Loadx_pat; + defm: Loadx_pat; + defm: Loadx_pat; + + defm: Loadx_pat; + defm: Loadx_pat; + defm: Loadx_pat; + defm: Loadx_pat; + defm: Loadx_pat; + defm: Loadx_pat; + defm: Loadx_pat; + defm: Loadx_pat; + // No sextloadi1. +} + +// Sign-extending loads of i1 need to replicate the lowest bit throughout +// the 32-bit value. Since the loaded value can only be 0 or 1, 0-v should +// do the trick. +let AddedComplexity = 20 in +def: Pat<(i32 (sextloadi1 (i32 IntRegs:$Rs))), + (A2_subri 0, (L2_loadrub_io IntRegs:$Rs, 0))>; + +def: Pat<(i32 (mul I32:$src1, I32:$src2)), (M2_mpyi I32:$src1, I32:$src2)>; +def: Pat<(i32 (mulhs I32:$src1, I32:$src2)), (M2_mpy_up I32:$src1, I32:$src2)>; +def: Pat<(i32 (mulhu I32:$src1, I32:$src2)), (M2_mpyu_up I32:$src1, I32:$src2)>; + +def: Pat<(mul IntRegs:$Rs, u32_0ImmPred:$u8), + (M2_mpysip IntRegs:$Rs, imm:$u8)>; +def: Pat<(ineg (mul IntRegs:$Rs, u8_0ImmPred:$u8)), + (M2_mpysin IntRegs:$Rs, imm:$u8)>; +def: Pat<(mul IntRegs:$src1, s32_0ImmPred:$src2), + (M2_mpysmi IntRegs:$src1, imm:$src2)>; +def: Pat<(add (mul IntRegs:$src2, u32_0ImmPred:$src3), IntRegs:$src1), + (M2_macsip IntRegs:$src1, IntRegs:$src2, imm:$src3)>; +def: Pat<(add (mul I32:$src2, I32:$src3), I32:$src1), + (M2_maci IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>; +def: Pat<(add (add IntRegs:$src2, u32_0ImmPred:$src3), IntRegs:$src1), + (M2_accii IntRegs:$src1, IntRegs:$src2, imm:$src3)>; +def: Pat<(add (add I32:$src2, I32:$src3), I32:$src1), + (M2_acci IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>; + +class T_MType_acc_pat1 + : Pat <(secOp IntRegs:$src1, (firstOp IntRegs:$src2, ImmPred:$src3)), + (MI IntRegs:$src1, IntRegs:$src2, ImmPred:$src3)>; + +class T_MType_acc_pat2 + : Pat <(i32 (secOp IntRegs:$src1, (firstOp IntRegs:$src2, IntRegs:$src3))), + (MI IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>; + +def : T_MType_acc_pat2 ; +def : T_MType_acc_pat1 ; + +def : T_MType_acc_pat1 ; +def : T_MType_acc_pat2 ; + +def: T_MType_acc_pat2 ; +def: T_MType_acc_pat2 ; +def: T_MType_acc_pat2 ; +def: T_MType_acc_pat2 ; +def: T_MType_acc_pat2 ; +def: T_MType_acc_pat2 ; +def: T_MType_acc_pat2 ; +def: T_MType_acc_pat2 ; + +class T_MType_acc_pat3 + : Pat <(i32 (secOp IntRegs:$src1, (firstOp IntRegs:$src2, + (not IntRegs:$src3)))), + (i32 (MI IntRegs:$src1, IntRegs:$src2, IntRegs:$src3))>; + +def: T_MType_acc_pat3 ; +def: T_MType_acc_pat3 ; +def: T_MType_acc_pat3 ; + +def: Pat<(i64 (mul (i64 (anyext (i32 IntRegs:$src1))), + (i64 (anyext (i32 IntRegs:$src2))))), + (M2_dpmpyuu_s0 IntRegs:$src1, IntRegs:$src2)>; + +def: Pat<(i64 (mul (i64 (sext (i32 IntRegs:$src1))), + (i64 (sext (i32 IntRegs:$src2))))), + (M2_dpmpyss_s0 IntRegs:$src1, IntRegs:$src2)>; + +def: Pat<(i64 (mul (is_sext_i32:$src1), + (is_sext_i32:$src2))), + (M2_dpmpyss_s0 (LoReg DoubleRegs:$src1), (LoReg DoubleRegs:$src2))>; + +// Multiply and accumulate, use full result. +// Rxx[+-]=mpy(Rs,Rt) + +def: Pat<(i64 (add (i64 DoubleRegs:$src1), + (mul (i64 (sext (i32 IntRegs:$src2))), + (i64 (sext (i32 IntRegs:$src3)))))), + (M2_dpmpyss_acc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>; + +def: Pat<(i64 (sub (i64 DoubleRegs:$src1), + (mul (i64 (sext (i32 IntRegs:$src2))), + (i64 (sext (i32 IntRegs:$src3)))))), + (M2_dpmpyss_nac_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>; + +def: Pat<(i64 (add (i64 DoubleRegs:$src1), + (mul (i64 (anyext (i32 IntRegs:$src2))), + (i64 (anyext (i32 IntRegs:$src3)))))), + (M2_dpmpyuu_acc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>; + +def: Pat<(i64 (add (i64 DoubleRegs:$src1), + (mul (i64 (zext (i32 IntRegs:$src2))), + (i64 (zext (i32 IntRegs:$src3)))))), + (M2_dpmpyuu_acc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>; + +def: Pat<(i64 (sub (i64 DoubleRegs:$src1), + (mul (i64 (anyext (i32 IntRegs:$src2))), + (i64 (anyext (i32 IntRegs:$src3)))))), + (M2_dpmpyuu_nac_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>; + +def: Pat<(i64 (sub (i64 DoubleRegs:$src1), + (mul (i64 (zext (i32 IntRegs:$src2))), + (i64 (zext (i32 IntRegs:$src3)))))), + (M2_dpmpyuu_nac_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>; + +class Storepi_pat + : Pat<(Store Value:$src1, I32:$src2, Offset:$offset), + (MI I32:$src2, imm:$offset, Value:$src1)>; + +def: Storepi_pat; +def: Storepi_pat; +def: Storepi_pat; +def: Storepi_pat; + +// Patterns for generating stores, where the address takes different forms: +// - frameindex, +// - frameindex + offset, +// - base + offset, +// - simple (base address without offset). +// These would usually be used together (via Storex_pat defined below), but +// in some cases one may want to apply different properties (such as +// AddedComplexity) to the individual patterns. +class Storex_fi_pat + : Pat<(Store Value:$Rs, AddrFI:$fi), (MI AddrFI:$fi, 0, Value:$Rs)>; +multiclass Storex_fi_add_pat { + def: Pat<(Store Value:$Rs, (add (i32 AddrFI:$fi), ImmPred:$Off)), + (MI AddrFI:$fi, imm:$Off, Value:$Rs)>; + def: Pat<(Store Value:$Rs, (orisadd (i32 AddrFI:$fi), ImmPred:$Off)), + (MI AddrFI:$fi, imm:$Off, Value:$Rs)>; +} +multiclass Storex_add_pat { + def: Pat<(Store Value:$Rt, (add (i32 IntRegs:$Rs), ImmPred:$Off)), + (MI IntRegs:$Rs, imm:$Off, Value:$Rt)>; + def: Pat<(Store Value:$Rt, (orisadd (i32 IntRegs:$Rs), ImmPred:$Off)), + (MI IntRegs:$Rs, imm:$Off, Value:$Rt)>; +} +class Storex_simple_pat + : Pat<(Store Value:$Rt, (i32 IntRegs:$Rs)), + (MI IntRegs:$Rs, 0, Value:$Rt)>; + +// Patterns for generating stores, where the address takes different forms, +// and where the value being stored is transformed through the value modifier +// ValueMod. The address forms are same as above. +class Storexm_fi_pat + : Pat<(Store Value:$Rs, AddrFI:$fi), + (MI AddrFI:$fi, 0, (ValueMod Value:$Rs))>; +multiclass Storexm_fi_add_pat { + def: Pat<(Store Value:$Rs, (add (i32 AddrFI:$fi), ImmPred:$Off)), + (MI AddrFI:$fi, imm:$Off, (ValueMod Value:$Rs))>; + def: Pat<(Store Value:$Rs, (orisadd (i32 AddrFI:$fi), ImmPred:$Off)), + (MI AddrFI:$fi, imm:$Off, (ValueMod Value:$Rs))>; +} +multiclass Storexm_add_pat { + def: Pat<(Store Value:$Rt, (add (i32 IntRegs:$Rs), ImmPred:$Off)), + (MI IntRegs:$Rs, imm:$Off, (ValueMod Value:$Rt))>; + def: Pat<(Store Value:$Rt, (orisadd (i32 IntRegs:$Rs), ImmPred:$Off)), + (MI IntRegs:$Rs, imm:$Off, (ValueMod Value:$Rt))>; +} +class Storexm_simple_pat + : Pat<(Store Value:$Rt, (i32 IntRegs:$Rs)), + (MI IntRegs:$Rs, 0, (ValueMod Value:$Rt))>; + +multiclass Storex_pat { + def: Storex_fi_pat ; + defm: Storex_fi_add_pat ; + defm: Storex_add_pat ; +} + +multiclass Storexm_pat { + def: Storexm_fi_pat ; + defm: Storexm_fi_add_pat ; + defm: Storexm_add_pat ; +} + +// Regular stores in the DAG have two operands: value and address. +// Atomic stores also have two, but they are reversed: address, value. +// To use atomic stores with the patterns, they need to have their operands +// swapped. This relies on the knowledge that the F.Fragment uses names +// "ptr" and "val". +class SwapSt + : PatFrag<(ops node:$val, node:$ptr), F.Fragment, F.PredicateCode, + F.OperandTransform>; + +let AddedComplexity = 20 in { + defm: Storex_pat; + defm: Storex_pat; + defm: Storex_pat; + defm: Storex_pat; + + defm: Storex_pat, I32, s32_0ImmPred, S2_storerb_io>; + defm: Storex_pat, I32, s31_1ImmPred, S2_storerh_io>; + defm: Storex_pat, I32, s30_2ImmPred, S2_storeri_io>; + defm: Storex_pat, I64, s29_3ImmPred, S2_storerd_io>; +} + +// Simple patterns should be tried with the least priority. +def: Storex_simple_pat; +def: Storex_simple_pat; +def: Storex_simple_pat; +def: Storex_simple_pat; + +def: Storex_simple_pat, I32, S2_storerb_io>; +def: Storex_simple_pat, I32, S2_storerh_io>; +def: Storex_simple_pat, I32, S2_storeri_io>; +def: Storex_simple_pat, I64, S2_storerd_io>; + +let AddedComplexity = 20 in { + defm: Storexm_pat; + defm: Storexm_pat; + defm: Storexm_pat; +} + +def: Storexm_simple_pat; +def: Storexm_simple_pat; +def: Storexm_simple_pat; + +def: Pat <(i64 (sext I32:$src)), (A2_sxtw I32:$src)>; + +def: Pat<(i32 (select (i1 (setlt (i32 IntRegs:$src), 0)), + (i32 (sub 0, (i32 IntRegs:$src))), + (i32 IntRegs:$src))), + (A2_abs IntRegs:$src)>; + +let AddedComplexity = 50 in +def: Pat<(i32 (xor (add (sra (i32 IntRegs:$src), (i32 31)), + (i32 IntRegs:$src)), + (sra (i32 IntRegs:$src), (i32 31)))), + (A2_abs IntRegs:$src)>; + +def: Pat<(sra (i32 IntRegs:$src), u5_0ImmPred:$u5), + (S2_asr_i_r IntRegs:$src, imm:$u5)>; +def: Pat<(srl (i32 IntRegs:$src), u5_0ImmPred:$u5), + (S2_lsr_i_r IntRegs:$src, imm:$u5)>; +def: Pat<(shl (i32 IntRegs:$src), u5_0ImmPred:$u5), + (S2_asl_i_r IntRegs:$src, imm:$u5)>; + +def: Pat<(i32 (sra (i32 (add (i32 (sra I32:$src1, u5_0ImmPred:$src2)), + (i32 1))), + (i32 1))), + (S2_asr_i_r_rnd IntRegs:$src1, u5_0ImmPred:$src2)>; + +def : Pat<(not (i64 DoubleRegs:$src1)), + (A2_notp DoubleRegs:$src1)>; + +// Count leading zeros. +def: Pat<(i32 (ctlz I32:$Rs)), (S2_cl0 I32:$Rs)>; +def: Pat<(i32 (trunc (ctlz I64:$Rss))), (S2_cl0p I64:$Rss)>; + +// Count trailing zeros: 32-bit. +def: Pat<(i32 (cttz I32:$Rs)), (S2_ct0 I32:$Rs)>; + +// Count leading ones. +def: Pat<(i32 (ctlz (not I32:$Rs))), (S2_cl1 I32:$Rs)>; +def: Pat<(i32 (trunc (ctlz (not I64:$Rss)))), (S2_cl1p I64:$Rss)>; + +// Count trailing ones: 32-bit. +def: Pat<(i32 (cttz (not I32:$Rs))), (S2_ct1 I32:$Rs)>; + +def: Pat<(i32 (and (i32 IntRegs:$Rs), (not (shl 1, u5_0ImmPred:$u5)))), + (S2_clrbit_i IntRegs:$Rs, u5_0ImmPred:$u5)>; +def: Pat<(i32 (or (i32 IntRegs:$Rs), (shl 1, u5_0ImmPred:$u5))), + (S2_setbit_i IntRegs:$Rs, u5_0ImmPred:$u5)>; +def: Pat<(i32 (xor (i32 IntRegs:$Rs), (shl 1, u5_0ImmPred:$u5))), + (S2_togglebit_i IntRegs:$Rs, u5_0ImmPred:$u5)>; +def: Pat<(i32 (and (i32 IntRegs:$Rs), (not (shl 1, (i32 IntRegs:$Rt))))), + (S2_clrbit_r IntRegs:$Rs, IntRegs:$Rt)>; +def: Pat<(i32 (or (i32 IntRegs:$Rs), (shl 1, (i32 IntRegs:$Rt)))), + (S2_setbit_r IntRegs:$Rs, IntRegs:$Rt)>; +def: Pat<(i32 (xor (i32 IntRegs:$Rs), (shl 1, (i32 IntRegs:$Rt)))), + (S2_togglebit_r IntRegs:$Rs, IntRegs:$Rt)>; + +let AddedComplexity = 20 in { // Complexity greater than cmp reg-imm. + def: Pat<(i1 (setne (and (shl 1, u5_0ImmPred:$u5), (i32 IntRegs:$Rs)), 0)), + (S2_tstbit_i IntRegs:$Rs, u5_0ImmPred:$u5)>; + def: Pat<(i1 (setne (and (shl 1, (i32 IntRegs:$Rt)), (i32 IntRegs:$Rs)), 0)), + (S2_tstbit_r IntRegs:$Rs, IntRegs:$Rt)>; + def: Pat<(i1 (trunc (i32 IntRegs:$Rs))), + (S2_tstbit_i IntRegs:$Rs, 0)>; + def: Pat<(i1 (trunc (i64 DoubleRegs:$Rs))), + (S2_tstbit_i (LoReg DoubleRegs:$Rs), 0)>; +} + +let AddedComplexity = 20 in { // Complexity greater than compare reg-imm. + def: Pat<(i1 (seteq (and (i32 IntRegs:$Rs), u6_0ImmPred:$u6), 0)), + (C2_bitsclri IntRegs:$Rs, u6_0ImmPred:$u6)>; + def: Pat<(i1 (seteq (and (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)), 0)), + (C2_bitsclr IntRegs:$Rs, IntRegs:$Rt)>; +} + +let AddedComplexity = 10 in // Complexity greater than compare reg-reg. +def: Pat<(i1 (seteq (and (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)), IntRegs:$Rt)), + (C2_bitsset IntRegs:$Rs, IntRegs:$Rt)>; + +def: Pat<(or (or (shl (or (shl (i32 (extloadi8 (add (i32 IntRegs:$b), 3))), + (i32 8)), + (i32 (zextloadi8 (add (i32 IntRegs:$b), 2)))), + (i32 16)), + (shl (i32 (zextloadi8 (add (i32 IntRegs:$b), 1))), (i32 8))), + (zextloadi8 (i32 IntRegs:$b))), + (A2_swiz (L2_loadri_io IntRegs:$b, 0))>; + +// Patterns for loads of i1: +def: Pat<(i1 (load AddrFI:$fi)), + (C2_tfrrp (L2_loadrub_io AddrFI:$fi, 0))>; +def: Pat<(i1 (load (add (i32 IntRegs:$Rs), s32_0ImmPred:$Off))), + (C2_tfrrp (L2_loadrub_io IntRegs:$Rs, imm:$Off))>; +def: Pat<(i1 (load (i32 IntRegs:$Rs))), + (C2_tfrrp (L2_loadrub_io IntRegs:$Rs, 0))>; + +def I1toI32: OutPatFrag<(ops node:$Rs), + (C2_muxii (i1 $Rs), 1, 0)>; + +def I32toI1: OutPatFrag<(ops node:$Rs), + (i1 (C2_tfrrp (i32 $Rs)))>; + +defm: Storexm_pat; +def: Storexm_simple_pat; + +def: Pat<(sra (i64 DoubleRegs:$src), u6_0ImmPred:$u6), + (S2_asr_i_p DoubleRegs:$src, imm:$u6)>; +def: Pat<(srl (i64 DoubleRegs:$src), u6_0ImmPred:$u6), + (S2_lsr_i_p DoubleRegs:$src, imm:$u6)>; +def: Pat<(shl (i64 DoubleRegs:$src), u6_0ImmPred:$u6), + (S2_asl_i_p DoubleRegs:$src, imm:$u6)>; + +let AddedComplexity = 100 in +def: Pat<(add (i32 IntRegs:$Rt), (shl (i32 IntRegs:$Rs), u3_0ImmPred:$u3)), + (S2_addasl_rrri IntRegs:$Rt, IntRegs:$Rs, imm:$u3)>; + +def HexagonBARRIER: SDNode<"HexagonISD::BARRIER", SDTNone, [SDNPHasChain]>; +def: Pat<(HexagonBARRIER), (Y2_barrier)>; + +def: Pat<(orisadd (i32 AddrFI:$Rs), s32_0ImmPred:$off), + (PS_fi (i32 AddrFI:$Rs), s32_0ImmPred:$off)>; + + +// Support for generating global address. +// Taken from X86InstrInfo.td. +def SDTHexagonCONST32 : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, + SDTCisVT<1, i32>, + SDTCisPtrTy<0>]>; +def HexagonCONST32 : SDNode<"HexagonISD::CONST32", SDTHexagonCONST32>; +def HexagonCONST32_GP : SDNode<"HexagonISD::CONST32_GP", SDTHexagonCONST32>; + +// Map TLS addressses to A2_tfrsi. +def: Pat<(HexagonCONST32 tglobaltlsaddr:$addr), (A2_tfrsi s16_0Ext:$addr)>; +def: Pat<(HexagonCONST32 bbl:$label), (A2_tfrsi s16_0Ext:$label)>; + +def: Pat<(i64 imm:$v), (CONST64 imm:$v)>; +def: Pat<(i1 0), (PS_false)>; +def: Pat<(i1 1), (PS_true)>; + +// Pseudo instructions. +def SDT_SPCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>; +def SDT_SPCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>, + SDTCisVT<1, i32> ]>; + +def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_SPCallSeqStart, + [SDNPHasChain, SDNPOutGlue]>; +def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_SPCallSeqEnd, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; + +def SDT_SPCall : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>; + +// For tailcalls a HexagonTCRet SDNode has 3 SDNode Properties - a chain, +// Optional Flag and Variable Arguments. +// Its 1 Operand has pointer type. +def HexagonTCRet : SDNode<"HexagonISD::TC_RETURN", SDT_SPCall, + [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; + + +def: Pat<(callseq_start timm:$amt), + (ADJCALLSTACKDOWN imm:$amt)>; +def: Pat<(callseq_end timm:$amt1, timm:$amt2), + (ADJCALLSTACKUP imm:$amt1, imm:$amt2)>; + +//Tail calls. +def: Pat<(HexagonTCRet tglobaladdr:$dst), + (PS_tailcall_i tglobaladdr:$dst)>; +def: Pat<(HexagonTCRet texternalsym:$dst), + (PS_tailcall_i texternalsym:$dst)>; +def: Pat<(HexagonTCRet I32:$dst), + (PS_tailcall_r I32:$dst)>; + +// Map from r0 = and(r1, 65535) to r0 = zxth(r1) +def: Pat<(and (i32 IntRegs:$src1), 65535), + (A2_zxth IntRegs:$src1)>; + +// Map from r0 = and(r1, 255) to r0 = zxtb(r1). +def: Pat<(and (i32 IntRegs:$src1), 255), + (A2_zxtb IntRegs:$src1)>; + +// Map Add(p1, true) to p1 = not(p1). +// Add(p1, false) should never be produced, +// if it does, it got to be mapped to NOOP. +def: Pat<(add (i1 PredRegs:$src1), -1), + (C2_not PredRegs:$src1)>; + +// Map from p0 = pnot(p0); r0 = mux(p0, #i, #j) => r0 = mux(p0, #j, #i). +def: Pat<(select (not (i1 PredRegs:$src1)), s8_0ImmPred:$src2, s32_0ImmPred:$src3), + (C2_muxii PredRegs:$src1, s32_0ImmPred:$src3, s8_0ImmPred:$src2)>; + +// Map from p0 = pnot(p0); r0 = select(p0, #i, r1) +// => r0 = C2_muxir(p0, r1, #i) +def: Pat<(select (not (i1 PredRegs:$src1)), s32_0ImmPred:$src2, + (i32 IntRegs:$src3)), + (C2_muxir PredRegs:$src1, IntRegs:$src3, s32_0ImmPred:$src2)>; + +// Map from p0 = pnot(p0); r0 = mux(p0, r1, #i) +// => r0 = C2_muxri (p0, #i, r1) +def: Pat<(select (not (i1 PredRegs:$src1)), IntRegs:$src2, s32_0ImmPred:$src3), + (C2_muxri PredRegs:$src1, s32_0ImmPred:$src3, IntRegs:$src2)>; + +// Map from p0 = pnot(p0); if (p0) jump => if (!p0) jump. +def: Pat<(brcond (not (i1 PredRegs:$src1)), bb:$offset), + (J2_jumpf PredRegs:$src1, bb:$offset)>; + +// Map from Rdd = sign_extend_inreg(Rss, i32) -> Rdd = A2_sxtw(Rss.lo). +def: Pat<(i64 (sext_inreg (i64 DoubleRegs:$src1), i32)), + (A2_sxtw (LoReg DoubleRegs:$src1))>; + +// Map from Rdd = sign_extend_inreg(Rss, i16) -> Rdd = A2_sxtw(A2_sxth(Rss.lo)). +def: Pat<(i64 (sext_inreg (i64 DoubleRegs:$src1), i16)), + (A2_sxtw (A2_sxth (LoReg DoubleRegs:$src1)))>; + +// Map from Rdd = sign_extend_inreg(Rss, i8) -> Rdd = A2_sxtw(A2_sxtb(Rss.lo)). +def: Pat<(i64 (sext_inreg (i64 DoubleRegs:$src1), i8)), + (A2_sxtw (A2_sxtb (LoReg DoubleRegs:$src1)))>; + +// We want to prevent emitting pnot's as much as possible. +// Map brcond with an unsupported setcc to a J2_jumpf. +def : Pat <(brcond (i1 (setne (i32 IntRegs:$src1), (i32 IntRegs:$src2))), + bb:$offset), + (J2_jumpf (C2_cmpeq (i32 IntRegs:$src1), (i32 IntRegs:$src2)), + bb:$offset)>; + +def : Pat <(brcond (i1 (setne (i32 IntRegs:$src1), s10_0ImmPred:$src2)), + bb:$offset), + (J2_jumpf (C2_cmpeqi (i32 IntRegs:$src1), s10_0ImmPred:$src2), bb:$offset)>; + +def: Pat<(brcond (i1 (setne (i1 PredRegs:$src1), (i1 -1))), bb:$offset), + (J2_jumpf PredRegs:$src1, bb:$offset)>; + +def: Pat<(brcond (i1 (setne (i1 PredRegs:$src1), (i1 0))), bb:$offset), + (J2_jumpt PredRegs:$src1, bb:$offset)>; + +// cmp.lt(Rs, Imm) -> !cmp.ge(Rs, Imm) -> !cmp.gt(Rs, Imm-1) +def: Pat<(brcond (i1 (setlt (i32 IntRegs:$src1), s8_0ImmPred:$src2)), bb:$offset), + (J2_jumpf (C2_cmpgti IntRegs:$src1, (DEC_CONST_SIGNED s8_0ImmPred:$src2)), + bb:$offset)>; + +// Map from a 64-bit select to an emulated 64-bit mux. +// Hexagon does not support 64-bit MUXes; so emulate with combines. +def: Pat<(select (i1 PredRegs:$src1), (i64 DoubleRegs:$src2), + (i64 DoubleRegs:$src3)), + (A2_combinew (C2_mux PredRegs:$src1, (HiReg DoubleRegs:$src2), + (HiReg DoubleRegs:$src3)), + (C2_mux PredRegs:$src1, (LoReg DoubleRegs:$src2), + (LoReg DoubleRegs:$src3)))>; + +// Map from a 1-bit select to logical ops. +// From LegalizeDAG.cpp: (B1 ? B2 : B3) <=> (B1 & B2)|(!B1&B3). +def: Pat<(select (i1 PredRegs:$src1), (i1 PredRegs:$src2), (i1 PredRegs:$src3)), + (C2_or (C2_and PredRegs:$src1, PredRegs:$src2), + (C2_and (C2_not PredRegs:$src1), PredRegs:$src3))>; + +// Map for truncating from 64 immediates to 32 bit immediates. +def: Pat<(i32 (trunc (i64 DoubleRegs:$src))), + (LoReg DoubleRegs:$src)>; + +// Map for truncating from i64 immediates to i1 bit immediates. +def: Pat<(i1 (trunc (i64 DoubleRegs:$src))), + (C2_tfrrp (LoReg DoubleRegs:$src))>; + +// rs <= rt -> !(rs > rt). +let AddedComplexity = 30 in +def: Pat<(i1 (setle (i32 IntRegs:$src1), s32_0ImmPred:$src2)), + (C2_not (C2_cmpgti IntRegs:$src1, s32_0ImmPred:$src2))>; + +// rs <= rt -> !(rs > rt). +def : Pat<(i1 (setle (i32 IntRegs:$src1), (i32 IntRegs:$src2))), + (i1 (C2_not (C2_cmpgt (i32 IntRegs:$src1), (i32 IntRegs:$src2))))>; + +// Rss <= Rtt -> !(Rss > Rtt). +def: Pat<(i1 (setle (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))), + (C2_not (C2_cmpgtp DoubleRegs:$src1, DoubleRegs:$src2))>; + +// Map cmpne -> cmpeq. +// Hexagon_TODO: We should improve on this. +// rs != rt -> !(rs == rt). +let AddedComplexity = 30 in +def: Pat<(i1 (setne (i32 IntRegs:$src1), s32_0ImmPred:$src2)), + (C2_not (C2_cmpeqi IntRegs:$src1, s32_0ImmPred:$src2))>; + +// Convert setne back to xor for hexagon since we compute w/ pred registers. +def: Pat<(i1 (setne (i1 PredRegs:$src1), (i1 PredRegs:$src2))), + (C2_xor PredRegs:$src1, PredRegs:$src2)>; + +// Map cmpne(Rss) -> !cmpew(Rss). +// rs != rt -> !(rs == rt). +def: Pat<(i1 (setne (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))), + (C2_not (C2_cmpeqp DoubleRegs:$src1, DoubleRegs:$src2))>; + +// Map cmpge(Rs, Rt) -> !cmpgt(Rs, Rt). +// rs >= rt -> !(rt > rs). +def : Pat <(i1 (setge (i32 IntRegs:$src1), (i32 IntRegs:$src2))), + (i1 (C2_not (i1 (C2_cmpgt (i32 IntRegs:$src2), (i32 IntRegs:$src1)))))>; + +// cmpge(Rs, Imm) -> cmpgt(Rs, Imm-1) +let AddedComplexity = 30 in +def: Pat<(i1 (setge (i32 IntRegs:$src1), s32_0ImmPred:$src2)), + (C2_cmpgti IntRegs:$src1, (DEC_CONST_SIGNED s32_0ImmPred:$src2))>; + +// Map cmpge(Rss, Rtt) -> !cmpgt(Rtt, Rss). +// rss >= rtt -> !(rtt > rss). +def: Pat<(i1 (setge (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))), + (C2_not (C2_cmpgtp DoubleRegs:$src2, DoubleRegs:$src1))>; + +// Map cmplt(Rs, Imm) -> !cmpge(Rs, Imm). +// !cmpge(Rs, Imm) -> !cmpgt(Rs, Imm-1). +// rs < rt -> !(rs >= rt). +let AddedComplexity = 30 in +def: Pat<(i1 (setlt (i32 IntRegs:$src1), s32_0ImmPred:$src2)), + (C2_not (C2_cmpgti IntRegs:$src1, + (DEC_CONST_SIGNED s32_0ImmPred:$src2)))>; + +// Generate cmpgeu(Rs, #0) -> cmpeq(Rs, Rs) +def: Pat<(i1 (setuge (i32 IntRegs:$src1), 0)), + (C2_cmpeq IntRegs:$src1, IntRegs:$src1)>; + +// Generate cmpgeu(Rs, #u8) -> cmpgtu(Rs, #u8 -1) +def: Pat<(i1 (setuge (i32 IntRegs:$src1), u32_0ImmPred:$src2)), + (C2_cmpgtui IntRegs:$src1, (DEC_CONST_UNSIGNED u32_0ImmPred:$src2))>; + +// Generate cmpgtu(Rs, #u9) +def: Pat<(i1 (setugt (i32 IntRegs:$src1), u32_0ImmPred:$src2)), + (C2_cmpgtui IntRegs:$src1, u32_0ImmPred:$src2)>; + +// Map from Rs >= Rt -> !(Rt > Rs). +// rs >= rt -> !(rt > rs). +def: Pat<(i1 (setuge (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))), + (C2_not (C2_cmpgtup DoubleRegs:$src2, DoubleRegs:$src1))>; + +// Map from cmpleu(Rss, Rtt) -> !cmpgtu(Rss, Rtt-1). +// Map from (Rs <= Rt) -> !(Rs > Rt). +def: Pat<(i1 (setule (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))), + (C2_not (C2_cmpgtup DoubleRegs:$src1, DoubleRegs:$src2))>; + +// Sign extends. +// i1 -> i32 +def: Pat<(i32 (sext (i1 PredRegs:$src1))), + (C2_muxii PredRegs:$src1, -1, 0)>; + +// i1 -> i64 +def: Pat<(i64 (sext (i1 PredRegs:$src1))), + (A2_combinew (A2_tfrsi -1), (C2_muxii PredRegs:$src1, -1, 0))>; + +// Zero extends. +// i1 -> i32 +def: Pat<(i32 (zext (i1 PredRegs:$src1))), + (C2_muxii PredRegs:$src1, 1, 0)>; + +// Map from Rs = Pd to Pd = mux(Pd, #1, #0) +def: Pat<(i32 (anyext (i1 PredRegs:$src1))), + (C2_muxii PredRegs:$src1, 1, 0)>; + +// Map from Rss = Pd to Rdd = sxtw (mux(Pd, #1, #0)) +def: Pat<(i64 (anyext (i1 PredRegs:$src1))), + (A2_sxtw (C2_muxii PredRegs:$src1, 1, 0))>; + +// Clear the sign bit in a 64-bit register. +def ClearSign : OutPatFrag<(ops node:$Rss), + (A2_combinew (S2_clrbit_i (HiReg $Rss), 31), (LoReg $Rss))>; + +def MulHU : OutPatFrag<(ops node:$Rss, node:$Rtt), + (A2_addp + (M2_dpmpyuu_acc_s0 + (S2_lsr_i_p + (A2_addp + (M2_dpmpyuu_acc_s0 + (S2_lsr_i_p (M2_dpmpyuu_s0 (LoReg $Rss), (LoReg $Rtt)), 32), + (HiReg $Rss), + (LoReg $Rtt)), + (A2_combinew (A2_tfrsi 0), + (LoReg (M2_dpmpyuu_s0 (LoReg $Rss), (HiReg $Rtt))))), + 32), + (HiReg $Rss), + (HiReg $Rtt)), + (S2_lsr_i_p (M2_dpmpyuu_s0 (LoReg $Rss), (HiReg $Rtt)), 32))>; + +// Multiply 64-bit unsigned and use upper result. +def : Pat <(mulhu I64:$Rss, I64:$Rtt), (MulHU $Rss, $Rtt)>; + +// Multiply 64-bit signed and use upper result. +// +// For two signed 64-bit integers A and B, let A' and B' denote A and B +// with the sign bit cleared. Then A = -2^63*s(A) + A', where s(A) is the +// sign bit of A (and identically for B). With this notation, the signed +// product A*B can be written as: +// AB = (-2^63 s(A) + A') * (-2^63 s(B) + B') +// = 2^126 s(A)s(B) - 2^63 [s(A)B'+s(B)A'] + A'B' +// = 2^126 s(A)s(B) + 2^63 [s(A)B'+s(B)A'] + A'B' - 2*2^63 [s(A)B'+s(B)A'] +// = (unsigned product AB) - 2^64 [s(A)B'+s(B)A'] + +def : Pat <(mulhs I64:$Rss, I64:$Rtt), + (A2_subp + (MulHU $Rss, $Rtt), + (A2_addp + (A2_andp (S2_asr_i_p $Rss, 63), (ClearSign $Rtt)), + (A2_andp (S2_asr_i_p $Rtt, 63), (ClearSign $Rss))))>; + +// Hexagon specific ISD nodes. +def SDTHexagonALLOCA : SDTypeProfile<1, 2, + [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>; +def HexagonALLOCA : SDNode<"HexagonISD::ALLOCA", SDTHexagonALLOCA, + [SDNPHasChain]>; + + +def: Pat<(HexagonALLOCA (i32 IntRegs:$Rs), (i32 imm:$A)), + (PS_alloca IntRegs:$Rs, imm:$A)>; + +def HexagonJT: SDNode<"HexagonISD::JT", SDTIntUnaryOp>; +def HexagonCP: SDNode<"HexagonISD::CP", SDTIntUnaryOp>; + +def: Pat<(HexagonJT tjumptable:$dst), (A2_tfrsi imm:$dst)>; +def: Pat<(HexagonCP tconstpool:$dst), (A2_tfrsi imm:$dst)>; + +let AddedComplexity = 100 in +def: Pat<(add (i32 IntRegs:$src1), (sra (i32 IntRegs:$Rs), u5_0ImmPred:$u5)), (S2_asr_i_r_acc IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>; +def: Pat<(sub (i32 IntRegs:$src1), (sra (i32 IntRegs:$Rs), u5_0ImmPred:$u5)), (S2_asr_i_r_nac IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>; +def: Pat<(and (i32 IntRegs:$src1), (sra (i32 IntRegs:$Rs), u5_0ImmPred:$u5)), (S2_asr_i_r_and IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>; +def: Pat<(or (i32 IntRegs:$src1), (sra (i32 IntRegs:$Rs), u5_0ImmPred:$u5)), (S2_asr_i_r_or IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>; + +let AddedComplexity = 100 in +def: Pat<(add (i64 DoubleRegs:$src1), (sra (i64 DoubleRegs:$Rs), u6_0ImmPred:$u5)), (S2_asr_i_p_acc DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>; +def: Pat<(sub (i64 DoubleRegs:$src1), (sra (i64 DoubleRegs:$Rs), u6_0ImmPred:$u5)), (S2_asr_i_p_nac DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>; +def: Pat<(and (i64 DoubleRegs:$src1), (sra (i64 DoubleRegs:$Rs), u6_0ImmPred:$u5)), (S2_asr_i_p_and DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>; +def: Pat<(or (i64 DoubleRegs:$src1), (sra (i64 DoubleRegs:$Rs), u6_0ImmPred:$u5)), (S2_asr_i_p_or DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>; + +let AddedComplexity = 100 in +def: Pat<(add (i32 IntRegs:$src1), (srl (i32 IntRegs:$Rs), u5_0ImmPred:$u5)), (S2_lsr_i_r_acc IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>; +def: Pat<(sub (i32 IntRegs:$src1), (srl (i32 IntRegs:$Rs), u5_0ImmPred:$u5)), (S2_lsr_i_r_nac IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>; +def: Pat<(and (i32 IntRegs:$src1), (srl (i32 IntRegs:$Rs), u5_0ImmPred:$u5)), (S2_lsr_i_r_and IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>; +def: Pat<(or (i32 IntRegs:$src1), (srl (i32 IntRegs:$Rs), u5_0ImmPred:$u5)), (S2_lsr_i_r_or IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>; +let AddedComplexity = 100 in +def: Pat<(xor (i32 IntRegs:$src1), (srl (i32 IntRegs:$Rs), u5_0ImmPred:$u5)), (S2_lsr_i_r_xacc IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>; + +let AddedComplexity = 100 in +def: Pat<(add (i64 DoubleRegs:$src1), (srl (i64 DoubleRegs:$Rs), u6_0ImmPred:$u5)), (S2_lsr_i_p_acc DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>; +def: Pat<(sub (i64 DoubleRegs:$src1), (srl (i64 DoubleRegs:$Rs), u6_0ImmPred:$u5)), (S2_lsr_i_p_nac DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>; +def: Pat<(and (i64 DoubleRegs:$src1), (srl (i64 DoubleRegs:$Rs), u6_0ImmPred:$u5)), (S2_lsr_i_p_and DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>; +def: Pat<(or (i64 DoubleRegs:$src1), (srl (i64 DoubleRegs:$Rs), u6_0ImmPred:$u5)), (S2_lsr_i_p_or DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>; +let AddedComplexity = 100 in +def: Pat<(xor (i64 DoubleRegs:$src1), (srl (i64 DoubleRegs:$Rs), u6_0ImmPred:$u5)), (S2_lsr_i_p_xacc DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>; + +let AddedComplexity = 100 in +def: Pat<(add (i32 IntRegs:$src1), (shl (i32 IntRegs:$Rs), u5_0ImmPred:$u5)), (S2_asl_i_r_acc IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>; +def: Pat<(sub (i32 IntRegs:$src1), (shl (i32 IntRegs:$Rs), u5_0ImmPred:$u5)), (S2_asl_i_r_nac IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>; +def: Pat<(and (i32 IntRegs:$src1), (shl (i32 IntRegs:$Rs), u5_0ImmPred:$u5)), (S2_asl_i_r_and IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>; +def: Pat<(or (i32 IntRegs:$src1), (shl (i32 IntRegs:$Rs), u5_0ImmPred:$u5)), (S2_asl_i_r_or IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>; +let AddedComplexity = 100 in +def: Pat<(xor (i32 IntRegs:$src1), (shl (i32 IntRegs:$Rs), u5_0ImmPred:$u5)), (S2_asl_i_r_xacc IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>; + +let AddedComplexity = 100 in +def: Pat<(add (i64 DoubleRegs:$src1), (shl (i64 DoubleRegs:$Rs), u6_0ImmPred:$u5)), (S2_asl_i_p_acc DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>; +def: Pat<(sub (i64 DoubleRegs:$src1), (shl (i64 DoubleRegs:$Rs), u6_0ImmPred:$u5)), (S2_asl_i_p_nac DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>; +def: Pat<(and (i64 DoubleRegs:$src1), (shl (i64 DoubleRegs:$Rs), u6_0ImmPred:$u5)), (S2_asl_i_p_and DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>; +def: Pat<(or (i64 DoubleRegs:$src1), (shl (i64 DoubleRegs:$Rs), u6_0ImmPred:$u5)), (S2_asl_i_p_or DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>; +let AddedComplexity = 100 in +def: Pat<(xor (i64 DoubleRegs:$src1), (shl (i64 DoubleRegs:$Rs), u6_0ImmPred:$u5)), (S2_asl_i_p_xacc DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>; + +let AddedComplexity = 100 in +def: Pat<(add (i32 IntRegs:$src1), (shl (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))), (S2_asl_r_r_acc IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>; +def: Pat<(sub (i32 IntRegs:$src1), (shl (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))), (S2_asl_r_r_nac IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>; +def: Pat<(and (i32 IntRegs:$src1), (shl (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))), (S2_asl_r_r_and IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>; +def: Pat<(or (i32 IntRegs:$src1), (shl (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))), (S2_asl_r_r_or IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>; +let AddedComplexity = 100 in +def: Pat<(add (i64 DoubleRegs:$src1), (shl (i64 DoubleRegs:$Rs), (i32 IntRegs:$Rt))), (S2_asl_r_p_acc DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>; +def: Pat<(sub (i64 DoubleRegs:$src1), (shl (i64 DoubleRegs:$Rs), (i32 IntRegs:$Rt))), (S2_asl_r_p_nac DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>; +def: Pat<(and (i64 DoubleRegs:$src1), (shl (i64 DoubleRegs:$Rs), (i32 IntRegs:$Rt))), (S2_asl_r_p_and DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>; +def: Pat<(or (i64 DoubleRegs:$src1), (shl (i64 DoubleRegs:$Rs), (i32 IntRegs:$Rt))), (S2_asl_r_p_or DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>; +def: Pat<(xor (i64 DoubleRegs:$src1), (shl (i64 DoubleRegs:$Rs), (i32 IntRegs:$Rt))), (S2_asl_r_p_xor DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>; + +let AddedComplexity = 100 in +def: Pat<(add (i32 IntRegs:$src1), (sra (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))), (S2_asr_r_r_acc IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>; +def: Pat<(sub (i32 IntRegs:$src1), (sra (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))), (S2_asr_r_r_nac IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>; +def: Pat<(and (i32 IntRegs:$src1), (sra (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))), (S2_asr_r_r_and IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>; +def: Pat<(or (i32 IntRegs:$src1), (sra (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))), (S2_asr_r_r_or IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>; +let AddedComplexity = 100 in +def: Pat<(add (i64 DoubleRegs:$src1), (sra (i64 DoubleRegs:$Rs), (i32 IntRegs:$Rt))), (S2_asr_r_p_acc DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>; +def: Pat<(sub (i64 DoubleRegs:$src1), (sra (i64 DoubleRegs:$Rs), (i32 IntRegs:$Rt))), (S2_asr_r_p_nac DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>; +def: Pat<(and (i64 DoubleRegs:$src1), (sra (i64 DoubleRegs:$Rs), (i32 IntRegs:$Rt))), (S2_asr_r_p_and DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>; +def: Pat<(or (i64 DoubleRegs:$src1), (sra (i64 DoubleRegs:$Rs), (i32 IntRegs:$Rt))), (S2_asr_r_p_or DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>; +def: Pat<(xor (i64 DoubleRegs:$src1), (sra (i64 DoubleRegs:$Rs), (i32 IntRegs:$Rt))), (S2_asr_r_p_xor DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>; + +let AddedComplexity = 100 in +def: Pat<(add (i32 IntRegs:$src1), (srl (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))), (S2_lsr_r_r_acc IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>; +def: Pat<(sub (i32 IntRegs:$src1), (srl (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))), (S2_lsr_r_r_nac IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>; +def: Pat<(and (i32 IntRegs:$src1), (srl (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))), (S2_lsr_r_r_and IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>; +def: Pat<(or (i32 IntRegs:$src1), (srl (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))), (S2_lsr_r_r_or IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>; +let AddedComplexity = 100 in +def: Pat<(add (i64 DoubleRegs:$src1), (srl (i64 DoubleRegs:$Rs), (i32 IntRegs:$Rt))), (S2_lsr_r_p_acc DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>; +def: Pat<(sub (i64 DoubleRegs:$src1), (srl (i64 DoubleRegs:$Rs), (i32 IntRegs:$Rt))), (S2_lsr_r_p_nac DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>; +def: Pat<(and (i64 DoubleRegs:$src1), (srl (i64 DoubleRegs:$Rs), (i32 IntRegs:$Rt))), (S2_lsr_r_p_and DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>; +def: Pat<(or (i64 DoubleRegs:$src1), (srl (i64 DoubleRegs:$Rs), (i32 IntRegs:$Rt))), (S2_lsr_r_p_or DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>; +def: Pat<(xor (i64 DoubleRegs:$src1), (srl (i64 DoubleRegs:$Rs), (i32 IntRegs:$Rt))), (S2_lsr_r_p_xor DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>; + +let AddedComplexity = 100 in +def: Pat<(add (i32 IntRegs:$src1), (shl (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))), (S2_lsl_r_r_acc IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>; +def: Pat<(sub (i32 IntRegs:$src1), (shl (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))), (S2_lsl_r_r_nac IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>; +def: Pat<(and (i32 IntRegs:$src1), (shl (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))), (S2_lsl_r_r_and IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>; +def: Pat<(or (i32 IntRegs:$src1), (shl (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))), (S2_lsl_r_r_or IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>; +let AddedComplexity = 100 in +def: Pat<(add (i64 DoubleRegs:$src1), (shl (i64 DoubleRegs:$Rs), (i32 IntRegs:$Rt))), (S2_lsl_r_p_acc DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>; +def: Pat<(sub (i64 DoubleRegs:$src1), (shl (i64 DoubleRegs:$Rs), (i32 IntRegs:$Rt))), (S2_lsl_r_p_nac DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>; +def: Pat<(and (i64 DoubleRegs:$src1), (shl (i64 DoubleRegs:$Rs), (i32 IntRegs:$Rt))), (S2_lsl_r_p_and DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>; +def: Pat<(or (i64 DoubleRegs:$src1), (shl (i64 DoubleRegs:$Rs), (i32 IntRegs:$Rt))), (S2_lsl_r_p_or DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>; +def: Pat<(xor (i64 DoubleRegs:$src1), (shl (i64 DoubleRegs:$Rs), (i32 IntRegs:$Rt))), (S2_lsl_r_p_xor DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>; + +def: Pat<(sra (i64 DoubleRegs:$src1), (i32 IntRegs:$src2)), (S2_asr_r_p DoubleRegs:$src1, IntRegs:$src2)>; +def: Pat<(srl (i64 DoubleRegs:$src1), (i32 IntRegs:$src2)), (S2_lsr_r_p DoubleRegs:$src1, IntRegs:$src2)>; +def: Pat<(shl (i64 DoubleRegs:$src1), (i32 IntRegs:$src2)), (S2_asl_r_p DoubleRegs:$src1, IntRegs:$src2)>; +def: Pat<(shl (i64 DoubleRegs:$src1), (i32 IntRegs:$src2)), (S2_lsl_r_p DoubleRegs:$src1, IntRegs:$src2)>; + +def: Pat<(sra (i32 IntRegs:$src1), (i32 IntRegs:$src2)), (S2_asr_r_r IntRegs:$src1, IntRegs:$src2)>; +def: Pat<(srl (i32 IntRegs:$src1), (i32 IntRegs:$src2)), (S2_lsr_r_r IntRegs:$src1, IntRegs:$src2)>; +def: Pat<(shl (i32 IntRegs:$src1), (i32 IntRegs:$src2)), (S2_asl_r_r IntRegs:$src1, IntRegs:$src2)>; +def: Pat<(shl (i32 IntRegs:$src1), (i32 IntRegs:$src2)), (S2_lsl_r_r IntRegs:$src1, IntRegs:$src2)>; + +def SDTHexagonINSERT: + SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, + SDTCisInt<0>, SDTCisVT<3, i32>, SDTCisVT<4, i32>]>; +def SDTHexagonINSERTRP: + SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, + SDTCisInt<0>, SDTCisVT<3, i64>]>; + +def HexagonINSERT : SDNode<"HexagonISD::INSERT", SDTHexagonINSERT>; +def HexagonINSERTRP : SDNode<"HexagonISD::INSERTRP", SDTHexagonINSERTRP>; + +def: Pat<(HexagonINSERT I32:$Rs, I32:$Rt, u5_0ImmPred:$u1, u5_0ImmPred:$u2), + (S2_insert I32:$Rs, I32:$Rt, u5_0ImmPred:$u1, u5_0ImmPred:$u2)>; +def: Pat<(HexagonINSERT I64:$Rs, I64:$Rt, u6_0ImmPred:$u1, u6_0ImmPred:$u2), + (S2_insertp I64:$Rs, I64:$Rt, u6_0ImmPred:$u1, u6_0ImmPred:$u2)>; +def: Pat<(HexagonINSERTRP I32:$Rs, I32:$Rt, I64:$Ru), + (S2_insert_rp I32:$Rs, I32:$Rt, I64:$Ru)>; +def: Pat<(HexagonINSERTRP I64:$Rs, I64:$Rt, I64:$Ru), + (S2_insertp_rp I64:$Rs, I64:$Rt, I64:$Ru)>; + +let AddedComplexity = 100 in +def: Pat<(or (or (shl (HexagonINSERT (i32 (zextloadi8 (add I32:$b, 2))), + (i32 (extloadi8 (add I32:$b, 3))), + 24, 8), + (i32 16)), + (shl (i32 (zextloadi8 (add I32:$b, 1))), (i32 8))), + (zextloadi8 I32:$b)), + (A2_swiz (L2_loadri_io I32:$b, 0))>; + +def SDTHexagonEXTRACTU: + SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisInt<0>, SDTCisInt<1>, + SDTCisVT<2, i32>, SDTCisVT<3, i32>]>; +def SDTHexagonEXTRACTURP: + SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, SDTCisInt<0>, SDTCisInt<1>, + SDTCisVT<2, i64>]>; + +def HexagonEXTRACTU : SDNode<"HexagonISD::EXTRACTU", SDTHexagonEXTRACTU>; +def HexagonEXTRACTURP : SDNode<"HexagonISD::EXTRACTURP", SDTHexagonEXTRACTURP>; + +def: Pat<(HexagonEXTRACTU I32:$src1, u5_0ImmPred:$src2, u5_0ImmPred:$src3), + (S2_extractu I32:$src1, u5_0ImmPred:$src2, u5_0ImmPred:$src3)>; +def: Pat<(HexagonEXTRACTU I64:$src1, u6_0ImmPred:$src2, u6_0ImmPred:$src3), + (S2_extractup I64:$src1, u6_0ImmPred:$src2, u6_0ImmPred:$src3)>; +def: Pat<(HexagonEXTRACTURP I32:$src1, I64:$src2), + (S2_extractu_rp I32:$src1, I64:$src2)>; +def: Pat<(HexagonEXTRACTURP I64:$src1, I64:$src2), + (S2_extractup_rp I64:$src1, I64:$src2)>; + +// Change the sign of the immediate for Rd=-mpyi(Rs,#u8) +def: Pat<(mul (i32 IntRegs:$src1), (ineg n8_0ImmPred:$src2)), + (M2_mpysin IntRegs:$src1, u8_0ImmPred:$src2)>; + +multiclass MinMax_pats_p { + defm: T_MinMax_pats; +} + +def: Pat<(add (i64 (sext (i32 IntRegs:$Rs))), (i64 DoubleRegs:$Rt)), + (A2_addsp IntRegs:$Rs, DoubleRegs:$Rt)>; + +let AddedComplexity = 200 in { + defm: MinMax_pats_p; + defm: MinMax_pats_p; + defm: MinMax_pats_p; + defm: MinMax_pats_p; + defm: MinMax_pats_p; + defm: MinMax_pats_p; + defm: MinMax_pats_p; + defm: MinMax_pats_p; +} + +def callv3 : SDNode<"HexagonISD::CALL", SDT_SPCall, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; + +def callv3nr : SDNode<"HexagonISD::CALLnr", SDT_SPCall, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; + + +// Map call instruction +def : Pat<(callv3 I32:$dst), + (J2_callr I32:$dst)>; +def : Pat<(callv3 tglobaladdr:$dst), + (J2_call tglobaladdr:$dst)>; +def : Pat<(callv3 texternalsym:$dst), + (J2_call texternalsym:$dst)>; +def : Pat<(callv3 tglobaltlsaddr:$dst), + (J2_call tglobaltlsaddr:$dst)>; + +def : Pat<(callv3nr I32:$dst), + (PS_callr_nr I32:$dst)>; +def : Pat<(callv3nr tglobaladdr:$dst), + (PS_call_nr tglobaladdr:$dst)>; +def : Pat<(callv3nr texternalsym:$dst), + (PS_call_nr texternalsym:$dst)>; + + +def addrga: PatLeaf<(i32 AddrGA:$Addr)>; +def addrgp: PatLeaf<(i32 AddrGP:$Addr)>; + +def BITPOS32 : SDNodeXFormgetSExtValue(); + return XformMskToBitPosU5Imm(imm, SDLoc(N)); +}]>; + + +// Pats for instruction selection. + +// A class to embed the usual comparison patfrags within a zext to i32. +// The seteq/setne frags use "lhs" and "rhs" as operands, so use the same +// names, or else the frag's "body" won't match the operands. +class CmpInReg + : PatFrag<(ops node:$lhs, node:$rhs),(i32 (zext (i1 Op.Fragment)))>; + +def: T_cmp32_rr_pat, i32>; +def: T_cmp32_rr_pat, i32>; + +def: T_cmp32_rr_pat; +def: T_cmp32_rr_pat; +def: T_cmp32_rr_pat; + +def: T_cmp32_rr_pat, i1>; +def: T_cmp32_rr_pat, i1>; + +let AddedComplexity = 100 in { + def: Pat<(i1 (seteq (and (xor (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)), + 255), 0)), + (A4_cmpbeq IntRegs:$Rs, IntRegs:$Rt)>; + def: Pat<(i1 (setne (and (xor (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)), + 255), 0)), + (C2_not (A4_cmpbeq IntRegs:$Rs, IntRegs:$Rt))>; + def: Pat<(i1 (seteq (and (xor (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)), + 65535), 0)), + (A4_cmpheq IntRegs:$Rs, IntRegs:$Rt)>; + def: Pat<(i1 (setne (and (xor (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)), + 65535), 0)), + (C2_not (A4_cmpheq IntRegs:$Rs, IntRegs:$Rt))>; +} + +def: Pat<(i32 (zext (i1 (seteq (i32 IntRegs:$Rs), s32_0ImmPred:$s8)))), + (A4_rcmpeqi IntRegs:$Rs, s32_0ImmPred:$s8)>; +def: Pat<(i32 (zext (i1 (setne (i32 IntRegs:$Rs), s32_0ImmPred:$s8)))), + (A4_rcmpneqi IntRegs:$Rs, s32_0ImmPred:$s8)>; + +// Preserve the S2_tstbit_r generation +def: Pat<(i32 (zext (i1 (setne (i32 (and (i32 (shl 1, (i32 IntRegs:$src2))), + (i32 IntRegs:$src1))), 0)))), + (C2_muxii (S2_tstbit_r IntRegs:$src1, IntRegs:$src2), 1, 0)>; + +// The complexity of the combines involving immediates should be greater +// than the complexity of the combine with two registers. +let AddedComplexity = 50 in { +def: Pat<(HexagonCOMBINE IntRegs:$r, s32_0ImmPred:$i), + (A4_combineri IntRegs:$r, s32_0ImmPred:$i)>; + +def: Pat<(HexagonCOMBINE s32_0ImmPred:$i, IntRegs:$r), + (A4_combineir s32_0ImmPred:$i, IntRegs:$r)>; +} + +// The complexity of the combine with two immediates should be greater than +// the complexity of a combine involving a register. +let AddedComplexity = 75 in { +def: Pat<(HexagonCOMBINE s8_0ImmPred:$s8, u32_0ImmPred:$u6), + (A4_combineii imm:$s8, imm:$u6)>; +def: Pat<(HexagonCOMBINE s32_0ImmPred:$s8, s8_0ImmPred:$S8), + (A2_combineii imm:$s8, imm:$S8)>; +} + + +def Zext64: OutPatFrag<(ops node:$Rs), + (i64 (A4_combineir 0, (i32 $Rs)))>; +def Sext64: OutPatFrag<(ops node:$Rs), + (i64 (A2_sxtw (i32 $Rs)))>; + +// Patterns to generate indexed loads with different forms of the address: +// - frameindex, +// - base + offset, +// - base (without offset). +multiclass Loadxm_pat { + def: Pat<(VT (Load AddrFI:$fi)), + (VT (ValueMod (MI AddrFI:$fi, 0)))>; + def: Pat<(VT (Load (add AddrFI:$fi, ImmPred:$Off))), + (VT (ValueMod (MI AddrFI:$fi, imm:$Off)))>; + def: Pat<(VT (Load (add IntRegs:$Rs, ImmPred:$Off))), + (VT (ValueMod (MI IntRegs:$Rs, imm:$Off)))>; + def: Pat<(VT (Load (i32 IntRegs:$Rs))), + (VT (ValueMod (MI IntRegs:$Rs, 0)))>; +} + +defm: Loadxm_pat; +defm: Loadxm_pat; +defm: Loadxm_pat; +defm: Loadxm_pat; +defm: Loadxm_pat; +defm: Loadxm_pat; +defm: Loadxm_pat; +defm: Loadxm_pat; + +// Map Rdd = anyext(Rs) -> Rdd = combine(#0, Rs). +def: Pat<(i64 (anyext (i32 IntRegs:$src1))), (Zext64 IntRegs:$src1)>; + +multiclass T_LoadAbsReg_Pat { + def : Pat <(VT (ldOp (add (shl IntRegs:$src1, u2_0ImmPred:$src2), + (HexagonCONST32 tglobaladdr:$src3)))), + (MI IntRegs:$src1, u2_0ImmPred:$src2, tglobaladdr:$src3)>; + def : Pat <(VT (ldOp (add IntRegs:$src1, + (HexagonCONST32 tglobaladdr:$src2)))), + (MI IntRegs:$src1, 0, tglobaladdr:$src2)>; + + def : Pat <(VT (ldOp (add (shl IntRegs:$src1, u2_0ImmPred:$src2), + (HexagonCONST32 tconstpool:$src3)))), + (MI IntRegs:$src1, u2_0ImmPred:$src2, tconstpool:$src3)>; + def : Pat <(VT (ldOp (add IntRegs:$src1, + (HexagonCONST32 tconstpool:$src2)))), + (MI IntRegs:$src1, 0, tconstpool:$src2)>; + + def : Pat <(VT (ldOp (add (shl IntRegs:$src1, u2_0ImmPred:$src2), + (HexagonCONST32 tjumptable:$src3)))), + (MI IntRegs:$src1, u2_0ImmPred:$src2, tjumptable:$src3)>; + def : Pat <(VT (ldOp (add IntRegs:$src1, + (HexagonCONST32 tjumptable:$src2)))), + (MI IntRegs:$src1, 0, tjumptable:$src2)>; +} + +let AddedComplexity = 60 in { +defm : T_LoadAbsReg_Pat ; +defm : T_LoadAbsReg_Pat ; +defm : T_LoadAbsReg_Pat ; + +defm : T_LoadAbsReg_Pat ; +defm : T_LoadAbsReg_Pat ; +defm : T_LoadAbsReg_Pat ; + +defm : T_LoadAbsReg_Pat ; +defm : T_LoadAbsReg_Pat ; +} + +// 'def pats' for load instructions with base + register offset and non-zero +// immediate value. Immediate value is used to left-shift the second +// register operand. +class Loadxs_pat + : Pat<(VT (Load (add (i32 IntRegs:$Rs), + (i32 (shl (i32 IntRegs:$Rt), u2_0ImmPred:$u2))))), + (VT (MI IntRegs:$Rs, IntRegs:$Rt, imm:$u2))>; + +let AddedComplexity = 40 in { + def: Loadxs_pat; + def: Loadxs_pat; + def: Loadxs_pat; + def: Loadxs_pat; + def: Loadxs_pat; + def: Loadxs_pat; + def: Loadxs_pat; + def: Loadxs_pat; +} + +// 'def pats' for load instruction base + register offset and +// zero immediate value. +class Loadxs_simple_pat + : Pat<(VT (Load (add (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)))), + (VT (MI IntRegs:$Rs, IntRegs:$Rt, 0))>; + +let AddedComplexity = 20 in { + def: Loadxs_simple_pat; + def: Loadxs_simple_pat; + def: Loadxs_simple_pat; + def: Loadxs_simple_pat; + def: Loadxs_simple_pat; + def: Loadxs_simple_pat; + def: Loadxs_simple_pat; + def: Loadxs_simple_pat; +} + +// zext i1->i64 +def: Pat<(i64 (zext (i1 PredRegs:$src1))), + (Zext64 (C2_muxii PredRegs:$src1, 1, 0))>; + +// zext i32->i64 +def: Pat<(i64 (zext (i32 IntRegs:$src1))), + (Zext64 IntRegs:$src1)>; + +let AddedComplexity = 40 in +multiclass T_StoreAbsReg_Pats { + def : Pat<(stOp (VT RC:$src4), + (add (shl (i32 IntRegs:$src1), u2_0ImmPred:$src2), + u32_0ImmPred:$src3)), + (MI IntRegs:$src1, u2_0ImmPred:$src2, u32_0ImmPred:$src3, RC:$src4)>; + + def : Pat<(stOp (VT RC:$src4), + (add (shl IntRegs:$src1, u2_0ImmPred:$src2), + (HexagonCONST32 tglobaladdr:$src3))), + (MI IntRegs:$src1, u2_0ImmPred:$src2, tglobaladdr:$src3, RC:$src4)>; + + def : Pat<(stOp (VT RC:$src4), + (add IntRegs:$src1, (HexagonCONST32 tglobaladdr:$src3))), + (MI IntRegs:$src1, 0, tglobaladdr:$src3, RC:$src4)>; +} + +defm : T_StoreAbsReg_Pats ; +defm : T_StoreAbsReg_Pats ; +defm : T_StoreAbsReg_Pats ; +defm : T_StoreAbsReg_Pats ; + +class Storexs_pat + : Pat<(Store Value:$Ru, (add (i32 IntRegs:$Rs), + (i32 (shl (i32 IntRegs:$Rt), u2_0ImmPred:$u2)))), + (MI IntRegs:$Rs, IntRegs:$Rt, imm:$u2, Value:$Ru)>; + +let AddedComplexity = 40 in { + def: Storexs_pat; + def: Storexs_pat; + def: Storexs_pat; + def: Storexs_pat; +} + +def s30_2ProperPred : PatLeaf<(i32 imm), [{ + int64_t v = (int64_t)N->getSExtValue(); + return isShiftedInt<30,2>(v) && !isShiftedInt<29,3>(v); +}]>; +def RoundTo8 : SDNodeXFormgetSExtValue(); + return CurDAG->getTargetConstant(Imm & -8, SDLoc(N), MVT::i32); +}]>; + +let AddedComplexity = 40 in +def: Pat<(store I64:$Ru, (add I32:$Rs, s30_2ProperPred:$Off)), + (S2_storerd_io (A2_addi I32:$Rs, 4), (RoundTo8 $Off), I64:$Ru)>; + +class Store_rr_pat + : Pat<(Store Value:$Ru, (add I32:$Rs, I32:$Rt)), + (MI IntRegs:$Rs, IntRegs:$Rt, 0, Value:$Ru)>; + +let AddedComplexity = 20 in { + def: Store_rr_pat; + def: Store_rr_pat; + def: Store_rr_pat; + def: Store_rr_pat; +} + + +def IMM_BYTE : SDNodeXFormgetSExtValue(); + return CurDAG->getTargetConstant(imm, SDLoc(N), MVT::i32); +}]>; + +def IMM_HALF : SDNodeXFormgetSExtValue(); + return CurDAG->getTargetConstant(imm, SDLoc(N), MVT::i32); +}]>; + +def IMM_WORD : SDNodeXFormgetSExtValue(); + return CurDAG->getTargetConstant(imm, SDLoc(N), MVT::i32); +}]>; + +def ToImmByte : OutPatFrag<(ops node:$R), (IMM_BYTE $R)>; +def ToImmHalf : OutPatFrag<(ops node:$R), (IMM_HALF $R)>; +def ToImmWord : OutPatFrag<(ops node:$R), (IMM_WORD $R)>; + +// Emit store-immediate, but only when the stored value will not be constant- +// extended. The reason for that is that there is no pass that can optimize +// constant extenders in store-immediate instructions. In some cases we can +// end up will a number of such stores, all of which store the same extended +// value (e.g. after unrolling a loop that initializes floating point array). + +// Predicates to determine if the 16-bit immediate is expressible as a sign- +// extended 8-bit immediate. Store-immediate-halfword will ignore any bits +// beyond 0..15, so we don't care what is in there. + +def i16in8ImmPred: PatLeaf<(i32 imm), [{ + int64_t v = (int16_t)N->getSExtValue(); + return v == (int64_t)(int8_t)v; +}]>; + +// Predicates to determine if the 32-bit immediate is expressible as a sign- +// extended 8-bit immediate. +def i32in8ImmPred: PatLeaf<(i32 imm), [{ + int64_t v = (int32_t)N->getSExtValue(); + return v == (int64_t)(int8_t)v; +}]>; + + +let AddedComplexity = 40 in { + // Even though the offset is not extendable in the store-immediate, we + // can still generate the fi# in the base address. If the final offset + // is not valid for the instruction, we will replace it with a scratch + // register. +// def: Storexm_fi_pat ; +// def: Storexm_fi_pat ; +// def: Storexm_fi_pat ; + +// defm: Storexm_fi_add_pat ; +// defm: Storexm_fi_add_pat ; +// defm: Storexm_fi_add_pat ; + + defm: Storexm_add_pat; + defm: Storexm_add_pat; + defm: Storexm_add_pat; +} + +def: Storexm_simple_pat; +def: Storexm_simple_pat; +def: Storexm_simple_pat; + +// op(Ps, op(Pt, Pu)) +class LogLog_pat + : Pat<(i1 (Op1 I1:$Ps, (Op2 I1:$Pt, I1:$Pu))), + (MI I1:$Ps, I1:$Pt, I1:$Pu)>; + +// op(Ps, op(Pt, ~Pu)) +class LogLogNot_pat + : Pat<(i1 (Op1 I1:$Ps, (Op2 I1:$Pt, (not I1:$Pu)))), + (MI I1:$Ps, I1:$Pt, I1:$Pu)>; + +def: LogLog_pat; +def: LogLog_pat; +def: LogLog_pat; +def: LogLog_pat; + +def: LogLogNot_pat; +def: LogLogNot_pat; +def: LogLogNot_pat; +def: LogLogNot_pat; + +//===----------------------------------------------------------------------===// +// PIC: Support for PIC compilations. The patterns and SD nodes defined +// below are needed to support code generation for PIC +//===----------------------------------------------------------------------===// + +def SDT_HexagonAtGot + : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, i32>]>; +def SDT_HexagonAtPcrel + : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>; + +// AT_GOT address-of-GOT, address-of-global, offset-in-global +def HexagonAtGot : SDNode<"HexagonISD::AT_GOT", SDT_HexagonAtGot>; +// AT_PCREL address-of-global +def HexagonAtPcrel : SDNode<"HexagonISD::AT_PCREL", SDT_HexagonAtPcrel>; + +def: Pat<(HexagonAtGot I32:$got, I32:$addr, (i32 0)), + (L2_loadri_io I32:$got, imm:$addr)>; +def: Pat<(HexagonAtGot I32:$got, I32:$addr, s30_2ImmPred:$off), + (A2_addi (L2_loadri_io I32:$got, imm:$addr), imm:$off)>; +def: Pat<(HexagonAtPcrel I32:$addr), + (C4_addipc imm:$addr)>; + +def: Pat<(i64 (and (i64 DoubleRegs:$Rs), (i64 (not (i64 DoubleRegs:$Rt))))), + (A4_andnp DoubleRegs:$Rs, DoubleRegs:$Rt)>; +def: Pat<(i64 (or (i64 DoubleRegs:$Rs), (i64 (not (i64 DoubleRegs:$Rt))))), + (A4_ornp DoubleRegs:$Rs, DoubleRegs:$Rt)>; + +def: Pat<(add (i32 IntRegs:$Rs), (add (i32 IntRegs:$Ru), s32_0ImmPred:$s6)), + (S4_addaddi IntRegs:$Rs, IntRegs:$Ru, imm:$s6)>; + +// Rd=add(Rs,sub(#s6,Ru)) +def: Pat<(add (i32 IntRegs:$src1), (sub s32_0ImmPred:$src2, + (i32 IntRegs:$src3))), + (S4_subaddi IntRegs:$src1, s32_0ImmPred:$src2, IntRegs:$src3)>; + +// Rd=sub(add(Rs,#s6),Ru) +def: Pat<(sub (add (i32 IntRegs:$src1), s32_0ImmPred:$src2), + (i32 IntRegs:$src3)), + (S4_subaddi IntRegs:$src1, s32_0ImmPred:$src2, IntRegs:$src3)>; + +// Rd=add(sub(Rs,Ru),#s6) +def: Pat<(add (sub (i32 IntRegs:$src1), (i32 IntRegs:$src3)), + (s32_0ImmPred:$src2)), + (S4_subaddi IntRegs:$src1, s32_0ImmPred:$src2, IntRegs:$src3)>; + +def: Pat<(xor (i64 DoubleRegs:$dst2), + (xor (i64 DoubleRegs:$Rss), (i64 DoubleRegs:$Rtt))), + (M4_xor_xacc DoubleRegs:$dst2, DoubleRegs:$Rss, DoubleRegs:$Rtt)>; +def: Pat<(or (i32 IntRegs:$Ru), (and (i32 IntRegs:$_src_), s32_0ImmPred:$s10)), + (S4_or_andix IntRegs:$Ru, IntRegs:$_src_, imm:$s10)>; + +def: Pat<(or (i32 IntRegs:$src1), (and (i32 IntRegs:$Rs), s32_0ImmPred:$s10)), + (S4_or_andi IntRegs:$src1, IntRegs:$Rs, imm:$s10)>; + +def: Pat<(or (i32 IntRegs:$src1), (or (i32 IntRegs:$Rs), s32_0ImmPred:$s10)), + (S4_or_ori IntRegs:$src1, IntRegs:$Rs, imm:$s10)>; + + + +// Count trailing zeros: 64-bit. +def: Pat<(i32 (trunc (cttz I64:$Rss))), (S2_ct0p I64:$Rss)>; + +// Count trailing ones: 64-bit. +def: Pat<(i32 (trunc (cttz (not I64:$Rss)))), (S2_ct1p I64:$Rss)>; + +// Define leading/trailing patterns that require zero-extensions to 64 bits. +def: Pat<(i64 (ctlz I64:$Rss)), (Zext64 (S2_cl0p I64:$Rss))>; +def: Pat<(i64 (cttz I64:$Rss)), (Zext64 (S2_ct0p I64:$Rss))>; +def: Pat<(i64 (ctlz (not I64:$Rss))), (Zext64 (S2_cl1p I64:$Rss))>; +def: Pat<(i64 (cttz (not I64:$Rss))), (Zext64 (S2_ct1p I64:$Rss))>; + + +let AddedComplexity = 20 in { // Complexity greater than cmp reg-imm. + def: Pat<(i1 (seteq (and (shl 1, u5_0ImmPred:$u5), (i32 IntRegs:$Rs)), 0)), + (S4_ntstbit_i (i32 IntRegs:$Rs), u5_0ImmPred:$u5)>; + def: Pat<(i1 (seteq (and (shl 1, (i32 IntRegs:$Rt)), (i32 IntRegs:$Rs)), 0)), + (S4_ntstbit_r (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))>; +} + +// Add extra complexity to prefer these instructions over bitsset/bitsclr. +// The reason is that tstbit/ntstbit can be folded into a compound instruction: +// if ([!]tstbit(...)) jump ... +let AddedComplexity = 100 in +def: Pat<(i1 (setne (and (i32 IntRegs:$Rs), (i32 Set5ImmPred:$u5)), (i32 0))), + (S2_tstbit_i (i32 IntRegs:$Rs), (BITPOS32 Set5ImmPred:$u5))>; + +let AddedComplexity = 100 in +def: Pat<(i1 (seteq (and (i32 IntRegs:$Rs), (i32 Set5ImmPred:$u5)), (i32 0))), + (S4_ntstbit_i (i32 IntRegs:$Rs), (BITPOS32 Set5ImmPred:$u5))>; + +// Do not increase complexity of these patterns. In the DAG, "cmp i8" may be +// represented as a compare against "value & 0xFF", which is an exact match +// for cmpb (same for cmph). The patterns below do not contain any additional +// complexity that would make them preferable, and if they were actually used +// instead of cmpb/cmph, they would result in a compare against register that +// is loaded with the byte/half mask (i.e. 0xFF or 0xFFFF). +def: Pat<(i1 (setne (and I32:$Rs, u6_0ImmPred:$u6), 0)), + (C4_nbitsclri I32:$Rs, u6_0ImmPred:$u6)>; +def: Pat<(i1 (setne (and I32:$Rs, I32:$Rt), 0)), + (C4_nbitsclr I32:$Rs, I32:$Rt)>; +def: Pat<(i1 (setne (and I32:$Rs, I32:$Rt), I32:$Rt)), + (C4_nbitsset I32:$Rs, I32:$Rt)>; + + +def: Pat<(add (mul (i32 IntRegs:$Rs), u6_0ImmPred:$U6), u32_0ImmPred:$u6), + (M4_mpyri_addi imm:$u6, IntRegs:$Rs, imm:$U6)>; +def: Pat<(add (mul (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)), u32_0ImmPred:$u6), + (M4_mpyrr_addi imm:$u6, IntRegs:$Rs, IntRegs:$Rt)>; + +def: Pat<(add (i32 IntRegs:$src1), (mul (i32 IntRegs:$src3), u6_2ImmPred:$src2)), + (M4_mpyri_addr_u2 IntRegs:$src1, imm:$src2, IntRegs:$src3)>; +def: Pat<(add (i32 IntRegs:$src1), (mul (i32 IntRegs:$src3), u32_0ImmPred:$src2)), + (M4_mpyri_addr IntRegs:$src1, IntRegs:$src3, imm:$src2)>; + +def: Pat<(add (i32 IntRegs:$Ru), (mul (i32 IntRegs:$_src_), (i32 IntRegs:$Rs))), + (M4_mpyrr_addr IntRegs:$Ru, IntRegs:$_src_, IntRegs:$Rs)>; + +def: T_vcmp_pat; + +class T_Shift_CommOp_pat + : Pat<(Op (ShOp IntRegs:$Rx, u5_0ImmPred:$U5), u32_0ImmPred:$u8), + (MI u32_0ImmPred:$u8, IntRegs:$Rx, u5_0ImmPred:$U5)>; + +let AddedComplexity = 200 in { + def : T_Shift_CommOp_pat ; + def : T_Shift_CommOp_pat ; + def : T_Shift_CommOp_pat ; + def : T_Shift_CommOp_pat ; +} + +let AddedComplexity = 30 in { + def : T_Shift_CommOp_pat ; + def : T_Shift_CommOp_pat ; +} + +class T_Shift_Op_pat + : Pat<(Op u32_0ImmPred:$u8, (ShOp IntRegs:$Rx, u5_0ImmPred:$U5)), + (MI u32_0ImmPred:$u8, IntRegs:$Rx, u5_0ImmPred:$U5)>; + +def : T_Shift_Op_pat ; +def : T_Shift_Op_pat ; + +let AddedComplexity = 200 in { + def: Pat<(add addrga:$addr, (shl I32:$src2, u5_0ImmPred:$src3)), + (S4_addi_asl_ri addrga:$addr, IntRegs:$src2, u5_0ImmPred:$src3)>; + def: Pat<(add addrga:$addr, (srl I32:$src2, u5_0ImmPred:$src3)), + (S4_addi_lsr_ri addrga:$addr, IntRegs:$src2, u5_0ImmPred:$src3)>; + def: Pat<(sub addrga:$addr, (shl I32:$src2, u5_0ImmPred:$src3)), + (S4_subi_asl_ri addrga:$addr, IntRegs:$src2, u5_0ImmPred:$src3)>; + def: Pat<(sub addrga:$addr, (srl I32:$src2, u5_0ImmPred:$src3)), + (S4_subi_lsr_ri addrga:$addr, IntRegs:$src2, u5_0ImmPred:$src3)>; +} + +def: Pat<(shl s6_0ImmPred:$s6, (i32 IntRegs:$Rt)), + (S4_lsli imm:$s6, IntRegs:$Rt)>; + + +//===----------------------------------------------------------------------===// +// MEMOP +//===----------------------------------------------------------------------===// + +def m5_0Imm8Pred : PatLeaf<(i32 imm), [{ + int8_t v = (int8_t)N->getSExtValue(); + return v > -32 && v <= -1; +}]>; + +def m5_0Imm16Pred : PatLeaf<(i32 imm), [{ + int16_t v = (int16_t)N->getSExtValue(); + return v > -32 && v <= -1; +}]>; + +def Clr5Imm8Pred : PatLeaf<(i32 imm), [{ + uint32_t v = (uint8_t)~N->getZExtValue(); + return ImmIsSingleBit(v); +}]>; + +def Clr5Imm16Pred : PatLeaf<(i32 imm), [{ + uint32_t v = (uint16_t)~N->getZExtValue(); + return ImmIsSingleBit(v); +}]>; + +def Set5Imm8 : SDNodeXFormgetZExtValue(); + return XformMskToBitPosU5Imm(imm, SDLoc(N)); +}]>; + +def Set5Imm16 : SDNodeXFormgetZExtValue(); + return XformMskToBitPosU5Imm(imm, SDLoc(N)); +}]>; + +def Set5Imm32 : SDNodeXFormgetZExtValue(); + return XformMskToBitPosU5Imm(imm, SDLoc(N)); +}]>; + +def Clr5Imm8 : SDNodeXFormgetZExtValue(); + return XformMskToBitPosU5Imm(imm, SDLoc(N)); +}]>; + +def Clr5Imm16 : SDNodeXFormgetZExtValue(); + return XformMskToBitPosU5Imm(imm, SDLoc(N)); +}]>; + +def Clr5Imm32 : SDNodeXFormgetZExtValue(); + return XformMskToBitPosU5Imm(imm, SDLoc(N)); +}]>; + +def NegImm8 : SDNodeXFormgetSExtValue(); + return CurDAG->getTargetConstant(-V, SDLoc(N), MVT::i32); +}]>; + +def NegImm16 : SDNodeXFormgetSExtValue(); + return CurDAG->getTargetConstant(-V, SDLoc(N), MVT::i32); +}]>; + +def NegImm32 : SDNodeXFormgetTargetConstant(-N->getSExtValue(), SDLoc(N), MVT::i32); +}]>; + +def IdImm : SDNodeXForm; + +multiclass Memopxr_simple_pat { + // Addr: i32 + def: Pat<(Store (Oper (Load I32:$Rs), I32:$A), I32:$Rs), + (MI I32:$Rs, 0, I32:$A)>; + // Addr: fi + def: Pat<(Store (Oper (Load AddrFI:$Rs), I32:$A), AddrFI:$Rs), + (MI AddrFI:$Rs, 0, I32:$A)>; +} + +multiclass Memopxr_add_pat { + // Addr: i32 + def: Pat<(Store (Oper (Load (add I32:$Rs, ImmPred:$Off)), I32:$A), + (add I32:$Rs, ImmPred:$Off)), + (MI I32:$Rs, imm:$Off, I32:$A)>; + def: Pat<(Store (Oper (Load (orisadd I32:$Rs, ImmPred:$Off)), I32:$A), + (orisadd I32:$Rs, ImmPred:$Off)), + (MI I32:$Rs, imm:$Off, I32:$A)>; + // Addr: fi + def: Pat<(Store (Oper (Load (add AddrFI:$Rs, ImmPred:$Off)), I32:$A), + (add AddrFI:$Rs, ImmPred:$Off)), + (MI AddrFI:$Rs, imm:$Off, I32:$A)>; + def: Pat<(Store (Oper (Load (orisadd AddrFI:$Rs, ImmPred:$Off)), I32:$A), + (orisadd AddrFI:$Rs, ImmPred:$Off)), + (MI AddrFI:$Rs, imm:$Off, I32:$A)>; +} + +multiclass Memopxr_pat { + defm: Memopxr_simple_pat ; + defm: Memopxr_add_pat ; +} + +let AddedComplexity = 180 in { + // add reg + defm: Memopxr_pat; + defm: Memopxr_pat; + defm: Memopxr_pat; + defm: Memopxr_pat; + defm: Memopxr_pat; + defm: Memopxr_pat; + defm: Memopxr_pat; + + // sub reg + defm: Memopxr_pat; + defm: Memopxr_pat; + defm: Memopxr_pat; + defm: Memopxr_pat; + defm: Memopxr_pat; + defm: Memopxr_pat; + defm: Memopxr_pat; + + // and reg + defm: Memopxr_pat; + defm: Memopxr_pat; + defm: Memopxr_pat; + defm: Memopxr_pat; + defm: Memopxr_pat; + defm: Memopxr_pat; + defm: Memopxr_pat; + + // or reg + defm: Memopxr_pat; + defm: Memopxr_pat; + defm: Memopxr_pat; + defm: Memopxr_pat; + defm: Memopxr_pat; + defm: Memopxr_pat; + defm: Memopxr_pat; +} + + +multiclass Memopxi_simple_pat { + // Addr: i32 + def: Pat<(Store (Oper (Load I32:$Rs), Arg:$A), I32:$Rs), + (MI I32:$Rs, 0, (ArgMod Arg:$A))>; + // Addr: fi + def: Pat<(Store (Oper (Load AddrFI:$Rs), Arg:$A), AddrFI:$Rs), + (MI AddrFI:$Rs, 0, (ArgMod Arg:$A))>; +} + +multiclass Memopxi_add_pat { + // Addr: i32 + def: Pat<(Store (Oper (Load (add I32:$Rs, ImmPred:$Off)), Arg:$A), + (add I32:$Rs, ImmPred:$Off)), + (MI I32:$Rs, imm:$Off, (ArgMod Arg:$A))>; + def: Pat<(Store (Oper (Load (orisadd I32:$Rs, ImmPred:$Off)), Arg:$A), + (orisadd I32:$Rs, ImmPred:$Off)), + (MI I32:$Rs, imm:$Off, (ArgMod Arg:$A))>; + // Addr: fi + def: Pat<(Store (Oper (Load (add AddrFI:$Rs, ImmPred:$Off)), Arg:$A), + (add AddrFI:$Rs, ImmPred:$Off)), + (MI AddrFI:$Rs, imm:$Off, (ArgMod Arg:$A))>; + def: Pat<(Store (Oper (Load (orisadd AddrFI:$Rs, ImmPred:$Off)), Arg:$A), + (orisadd AddrFI:$Rs, ImmPred:$Off)), + (MI AddrFI:$Rs, imm:$Off, (ArgMod Arg:$A))>; +} + +multiclass Memopxi_pat { + defm: Memopxi_simple_pat ; + defm: Memopxi_add_pat ; +} + + +let AddedComplexity = 200 in { + // add imm + defm: Memopxi_pat; + defm: Memopxi_pat; + defm: Memopxi_pat; + defm: Memopxi_pat; + defm: Memopxi_pat; + defm: Memopxi_pat; + defm: Memopxi_pat; + defm: Memopxi_pat; + defm: Memopxi_pat; + defm: Memopxi_pat; + defm: Memopxi_pat; + defm: Memopxi_pat; + defm: Memopxi_pat; + defm: Memopxi_pat; + + // sub imm + defm: Memopxi_pat; + defm: Memopxi_pat; + defm: Memopxi_pat; + defm: Memopxi_pat; + defm: Memopxi_pat; + defm: Memopxi_pat; + defm: Memopxi_pat; + defm: Memopxi_pat; + defm: Memopxi_pat; + defm: Memopxi_pat; + defm: Memopxi_pat; + defm: Memopxi_pat; + defm: Memopxi_pat; + defm: Memopxi_pat; + + // clrbit imm + defm: Memopxi_pat; + defm: Memopxi_pat; + defm: Memopxi_pat; + defm: Memopxi_pat; + defm: Memopxi_pat; + defm: Memopxi_pat; + defm: Memopxi_pat; + + // setbit imm + defm: Memopxi_pat; + defm: Memopxi_pat; + defm: Memopxi_pat; + defm: Memopxi_pat; + defm: Memopxi_pat; + defm: Memopxi_pat; + defm: Memopxi_pat; +} + +def : T_CMP_pat ; +def : T_CMP_pat ; +def : T_CMP_pat ; + +// Map cmplt(Rs, Imm) -> !cmpgt(Rs, Imm-1). +def: Pat<(i1 (setlt (i32 IntRegs:$src1), s32_0ImmPred:$src2)), + (C4_cmpltei IntRegs:$src1, (DEC_CONST_SIGNED s32_0ImmPred:$src2))>; + +// rs != rt -> !(rs == rt). +def: Pat<(i1 (setne (i32 IntRegs:$src1), s32_0ImmPred:$src2)), + (C4_cmpneqi IntRegs:$src1, s32_0ImmPred:$src2)>; + +// SDNode for converting immediate C to C-1. +def DEC_CONST_BYTE : SDNodeXFormgetSExtValue(); + return XformU7ToU7M1Imm(imm, SDLoc(N)); +}]>; + +// For the sequence +// zext( setult ( and(Rs, 255), u8)) +// Use the isdigit transformation below + +// Generate code of the form 'C2_muxii(cmpbgtui(Rdd, C-1),0,1)' +// for C code of the form r = ((c>='0') & (c<='9')) ? 1 : 0;. +// The isdigit transformation relies on two 'clever' aspects: +// 1) The data type is unsigned which allows us to eliminate a zero test after +// biasing the expression by 48. We are depending on the representation of +// the unsigned types, and semantics. +// 2) The front end has converted <= 9 into < 10 on entry to LLVM +// +// For the C code: +// retval = ((c>='0') & (c<='9')) ? 1 : 0; +// The code is transformed upstream of llvm into +// retval = (c-48) < 10 ? 1 : 0; +let AddedComplexity = 139 in +def: Pat<(i32 (zext (i1 (setult (i32 (and (i32 IntRegs:$src1), 255)), + u7_0StrictPosImmPred:$src2)))), + (C2_muxii (A4_cmpbgtui IntRegs:$src1, + (DEC_CONST_BYTE u7_0StrictPosImmPred:$src2)), + 0, 1)>; + +class Loada_pat + : Pat<(VT (Load Addr:$addr)), (MI Addr:$addr)>; + +class Loadam_pat + : Pat<(VT (Load Addr:$addr)), (ValueMod (MI Addr:$addr))>; + +class Storea_pat + : Pat<(Store Value:$val, Addr:$addr), (MI Addr:$addr, Value:$val)>; + +class Stoream_pat + : Pat<(Store Value:$val, Addr:$addr), + (MI Addr:$addr, (ValueMod Value:$val))>; + +let AddedComplexity = 30 in { + def: Storea_pat; + def: Storea_pat; + def: Storea_pat; + def: Storea_pat; + + def: Stoream_pat; + def: Stoream_pat; + def: Stoream_pat; +} + +def: Storea_pat, I32, addrgp, S2_storerbgp>; +def: Storea_pat, I32, addrgp, S2_storerhgp>; +def: Storea_pat, I32, addrgp, S2_storerigp>; +def: Storea_pat, I64, addrgp, S2_storerdgp>; + +let AddedComplexity = 100 in { + def: Storea_pat; + def: Storea_pat; + def: Storea_pat; + def: Storea_pat; + + // Map from "i1 = constant<-1>; memw(CONST32(#foo)) = i1" + // to "r0 = 1; memw(#foo) = r0" + let AddedComplexity = 100 in + def: Pat<(store (i1 -1), (HexagonCONST32_GP tglobaladdr:$global)), + (S2_storerbgp tglobaladdr:$global, (A2_tfrsi 1))>; +} + +class LoadAbs_pats + : Pat <(VT (ldOp (HexagonCONST32 tglobaladdr:$absaddr))), + (VT (MI tglobaladdr:$absaddr))>; + +let AddedComplexity = 30 in { + def: LoadAbs_pats ; + def: LoadAbs_pats ; + def: LoadAbs_pats ; + def: LoadAbs_pats ; + def: LoadAbs_pats ; + def: LoadAbs_pats ; + def: LoadAbs_pats ; + def: LoadAbs_pats ; + def: LoadAbs_pats ; +} + +let AddedComplexity = 30 in +def: Pat<(i64 (zextloadi1 (HexagonCONST32 tglobaladdr:$absaddr))), + (Zext64 (PS_loadrubabs tglobaladdr:$absaddr))>; + +def: Loada_pat; +def: Loada_pat; +def: Loada_pat; +def: Loada_pat; + +// Map from Pd = load(globaladdress) -> Rd = memb(globaladdress), Pd = Rd +def: Loadam_pat; +def: Loadam_pat; + +def: Stoream_pat; +def: Stoream_pat; + +// Map from load(globaladdress) -> mem[u][bhwd](#foo) +class LoadGP_pats + : Pat <(VT (ldOp (HexagonCONST32_GP tglobaladdr:$global))), + (VT (MI tglobaladdr:$global))>; + +let AddedComplexity = 100 in { + def: LoadGP_pats ; + def: LoadGP_pats ; + def: LoadGP_pats ; + def: LoadGP_pats ; + def: LoadGP_pats ; + def: LoadGP_pats ; + def: LoadGP_pats ; + def: LoadGP_pats ; +} + +// When the Interprocedural Global Variable optimizer realizes that a certain +// global variable takes only two constant values, it shrinks the global to +// a boolean. Catch those loads here in the following 3 patterns. +let AddedComplexity = 100 in { + def: LoadGP_pats ; + def: LoadGP_pats ; +} + +// Transfer global address into a register +def: Pat<(HexagonCONST32 tglobaladdr:$Rs), (A2_tfrsi imm:$Rs)>; +def: Pat<(HexagonCONST32_GP tblockaddress:$Rs), (A2_tfrsi imm:$Rs)>; +def: Pat<(HexagonCONST32_GP tglobaladdr:$Rs), (A2_tfrsi imm:$Rs)>; + +let AddedComplexity = 30 in { + def: Storea_pat; + def: Storea_pat; + def: Storea_pat; +} + +let AddedComplexity = 30 in { + def: Loada_pat; + def: Loada_pat; + def: Loada_pat; + def: Loada_pat; + def: Loada_pat; +} + +// Indexed store word - global address. +// memw(Rs+#u6:2)=#S8 +let AddedComplexity = 100 in +defm: Storex_add_pat; + +// Load from a global address that has only one use in the current basic block. +let AddedComplexity = 100 in { + def: Loada_pat; + def: Loada_pat; + def: Loada_pat; + + def: Loada_pat; + def: Loada_pat; + def: Loada_pat; + + def: Loada_pat; + def: Loada_pat; +} + +// Store to a global address that has only one use in the current basic block. +let AddedComplexity = 100 in { + def: Storea_pat; + def: Storea_pat; + def: Storea_pat; + def: Storea_pat; + + def: Stoream_pat; +} + +// i8/i16/i32 -> i64 loads +// We need a complexity of 120 here to override preceding handling of +// zextload. +let AddedComplexity = 120 in { + def: Loadam_pat; + def: Loadam_pat; + def: Loadam_pat; + + def: Loadam_pat; + def: Loadam_pat; + def: Loadam_pat; + + def: Loadam_pat; + def: Loadam_pat; + def: Loadam_pat; +} + +let AddedComplexity = 100 in { + def: Loada_pat; + def: Loada_pat; + def: Loada_pat; + + def: Loada_pat; + def: Loada_pat; + def: Loada_pat; + + def: Loada_pat; + def: Loada_pat; +} + +let AddedComplexity = 100 in { + def: Storea_pat; + def: Storea_pat; + def: Storea_pat; + def: Storea_pat; +} + +def: Loada_pat; +def: Loada_pat; +def: Loada_pat; +def: Loada_pat; + +def: Storea_pat, I32, addrgp, PS_storerbabs>; +def: Storea_pat, I32, addrgp, PS_storerhabs>; +def: Storea_pat, I32, addrgp, PS_storeriabs>; +def: Storea_pat, I64, addrgp, PS_storerdabs>; + +def: Pat<(or (or (or (shl (i64 (zext (i32 (and (i32 IntRegs:$b), (i32 65535))))), + (i32 16)), + (i64 (zext (i32 (and (i32 IntRegs:$a), (i32 65535)))))), + (shl (i64 (anyext (i32 (and (i32 IntRegs:$c), (i32 65535))))), + (i32 32))), + (shl (i64 (anyext (i32 IntRegs:$d))), (i32 48))), + (Insert4 IntRegs:$a, IntRegs:$b, IntRegs:$c, IntRegs:$d)>; + +// We need custom lowering of ISD::PREFETCH into HexagonISD::DCFETCH +// because the SDNode ISD::PREFETCH has properties MayLoad and MayStore. +// We don't really want either one here. +def SDTHexagonDCFETCH : SDTypeProfile<0, 2, [SDTCisPtrTy<0>,SDTCisInt<1>]>; +def HexagonDCFETCH : SDNode<"HexagonISD::DCFETCH", SDTHexagonDCFETCH, + [SDNPHasChain]>; + +def: Pat<(HexagonDCFETCH IntRegs:$Rs, u11_3ImmPred:$u11_3), + (Y2_dcfetchbo IntRegs:$Rs, imm:$u11_3)>; +def: Pat<(HexagonDCFETCH (i32 (add IntRegs:$Rs, u11_3ImmPred:$u11_3)), (i32 0)), + (Y2_dcfetchbo IntRegs:$Rs, imm:$u11_3)>; + +def f32ImmPred : PatLeaf<(f32 fpimm:$F)>; +def f64ImmPred : PatLeaf<(f64 fpimm:$F)>; + +def ftoi : SDNodeXFormgetValueAPF().bitcastToAPInt(); + return CurDAG->getTargetConstant(I.getZExtValue(), SDLoc(N), + MVT::getIntegerVT(I.getBitWidth())); +}]>; + + +def: Pat<(sra (i64 (add (i64 (sra I64:$src1, u6_0ImmPred:$src2)), 1)), (i32 1)), + (S2_asr_i_p_rnd I64:$src1, imm:$src2)>; + +def SDTHexagonI32I64: SDTypeProfile<1, 1, [SDTCisVT<0, i32>, + SDTCisVT<1, i64>]>; + +def HexagonPOPCOUNT: SDNode<"HexagonISD::POPCOUNT", SDTHexagonI32I64>; + +def: Pat<(HexagonPOPCOUNT I64:$Rss), (S5_popcountp I64:$Rss)>; + +let AddedComplexity = 20 in { + defm: Loadx_pat; + defm: Loadx_pat; +} + +let AddedComplexity = 60 in { + defm : T_LoadAbsReg_Pat ; + defm : T_LoadAbsReg_Pat ; +} + +let AddedComplexity = 40 in { + def: Loadxs_pat; + def: Loadxs_pat; +} + +let AddedComplexity = 20 in { + def: Loadxs_simple_pat; + def: Loadxs_simple_pat; +} + +let AddedComplexity = 80 in { + def: Loada_pat; + def: Loada_pat; + def: Loada_pat; +} + +let AddedComplexity = 100 in { + def: LoadGP_pats ; + def: LoadGP_pats ; +} + +let AddedComplexity = 20 in { + defm: Storex_pat; + defm: Storex_pat; +} + +// Simple patterns should be tried with the least priority. +def: Storex_simple_pat; +def: Storex_simple_pat; + +let AddedComplexity = 60 in { + defm : T_StoreAbsReg_Pats ; + defm : T_StoreAbsReg_Pats ; +} + +let AddedComplexity = 40 in { + def: Storexs_pat; + def: Storexs_pat; +} + +let AddedComplexity = 20 in { + def: Store_rr_pat; + def: Store_rr_pat; +} + +let AddedComplexity = 80 in { + def: Storea_pat; + def: Storea_pat; +} + +let AddedComplexity = 100 in { + def: Storea_pat; + def: Storea_pat; +} + +defm: Storex_pat; +defm: Storex_pat; +def: Storex_simple_pat; +def: Storex_simple_pat; + +def: Pat<(fadd F32:$src1, F32:$src2), + (F2_sfadd F32:$src1, F32:$src2)>; + +def: Pat<(fsub F32:$src1, F32:$src2), + (F2_sfsub F32:$src1, F32:$src2)>; + +def: Pat<(fmul F32:$src1, F32:$src2), + (F2_sfmpy F32:$src1, F32:$src2)>; + +let Predicates = [HasV5T] in { + def: Pat<(f32 (fminnum F32:$Rs, F32:$Rt)), (F2_sfmin F32:$Rs, F32:$Rt)>; + def: Pat<(f32 (fmaxnum F32:$Rs, F32:$Rt)), (F2_sfmax F32:$Rs, F32:$Rt)>; +} + +let AddedComplexity = 100, Predicates = [HasV5T] in { + class SfSel12 + : Pat<(select (i1 (Cmp F32:$Rs, F32:$Rt)), F32:$Rs, F32:$Rt), + (MI F32:$Rs, F32:$Rt)>; + class SfSel21 + : Pat<(select (i1 (Cmp F32:$Rs, F32:$Rt)), F32:$Rt, F32:$Rs), + (MI F32:$Rs, F32:$Rt)>; + + def: SfSel12; + def: SfSel12; + def: SfSel12; + def: SfSel12; + def: SfSel21; + def: SfSel21; + def: SfSel21; + def: SfSel21; +} + +class T_fcmp32_pat + : Pat<(i1 (OpNode F32:$src1, F32:$src2)), + (MI F32:$src1, F32:$src2)>; +class T_fcmp64_pat + : Pat<(i1 (OpNode F64:$src1, F64:$src2)), + (MI F64:$src1, F64:$src2)>; + +def: T_fcmp32_pat; +def: T_fcmp32_pat; +def: T_fcmp32_pat; +def: T_fcmp32_pat; + +def: T_fcmp64_pat; +def: T_fcmp64_pat; +def: T_fcmp64_pat; +def: T_fcmp64_pat; + +let Predicates = [HasV5T] in +multiclass T_fcmp_pats { + // IntRegs + def: Pat<(i1 (cmpOp F32:$src1, F32:$src2)), + (IntMI F32:$src1, F32:$src2)>; + // DoubleRegs + def: Pat<(i1 (cmpOp F64:$src1, F64:$src2)), + (DoubleMI F64:$src1, F64:$src2)>; +} + +defm : T_fcmp_pats ; +defm : T_fcmp_pats ; +defm : T_fcmp_pats ; + +//===----------------------------------------------------------------------===// +// Multiclass to define 'Def Pats' for unordered gt, ge, eq operations. +//===----------------------------------------------------------------------===// +let Predicates = [HasV5T] in +multiclass unord_Pats { + // IntRegs + def: Pat<(i1 (cmpOp F32:$src1, F32:$src2)), + (C2_or (F2_sfcmpuo F32:$src1, F32:$src2), + (IntMI F32:$src1, F32:$src2))>; + + // DoubleRegs + def: Pat<(i1 (cmpOp F64:$src1, F64:$src2)), + (C2_or (F2_dfcmpuo F64:$src1, F64:$src2), + (DoubleMI F64:$src1, F64:$src2))>; +} + +defm : unord_Pats ; +defm : unord_Pats ; +defm : unord_Pats ; + +//===----------------------------------------------------------------------===// +// Multiclass to define 'Def Pats' for the following dags: +// seteq(setoeq(op1, op2), 0) -> not(setoeq(op1, op2)) +// seteq(setoeq(op1, op2), 1) -> setoeq(op1, op2) +// setne(setoeq(op1, op2), 0) -> setoeq(op1, op2) +// setne(setoeq(op1, op2), 1) -> not(setoeq(op1, op2)) +//===----------------------------------------------------------------------===// +let Predicates = [HasV5T] in +multiclass eq_ordgePats { + // IntRegs + def: Pat<(i1 (seteq (i1 (cmpOp F32:$src1, F32:$src2)), 0)), + (C2_not (IntMI F32:$src1, F32:$src2))>; + def: Pat<(i1 (seteq (i1 (cmpOp F32:$src1, F32:$src2)), 1)), + (IntMI F32:$src1, F32:$src2)>; + def: Pat<(i1 (setne (i1 (cmpOp F32:$src1, F32:$src2)), 0)), + (IntMI F32:$src1, F32:$src2)>; + def: Pat<(i1 (setne (i1 (cmpOp F32:$src1, F32:$src2)), 1)), + (C2_not (IntMI F32:$src1, F32:$src2))>; + + // DoubleRegs + def : Pat<(i1 (seteq (i1 (cmpOp F64:$src1, F64:$src2)), 0)), + (C2_not (DoubleMI F64:$src1, F64:$src2))>; + def : Pat<(i1 (seteq (i1 (cmpOp F64:$src1, F64:$src2)), 1)), + (DoubleMI F64:$src1, F64:$src2)>; + def : Pat<(i1 (setne (i1 (cmpOp F64:$src1, F64:$src2)), 0)), + (DoubleMI F64:$src1, F64:$src2)>; + def : Pat<(i1 (setne (i1 (cmpOp F64:$src1, F64:$src2)), 1)), + (C2_not (DoubleMI F64:$src1, F64:$src2))>; +} + +defm : eq_ordgePats; +defm : eq_ordgePats; +defm : eq_ordgePats; + +//===----------------------------------------------------------------------===// +// Multiclass to define 'Def Pats' for the following dags: +// seteq(setolt(op1, op2), 0) -> not(setogt(op2, op1)) +// seteq(setolt(op1, op2), 1) -> setogt(op2, op1) +// setne(setolt(op1, op2), 0) -> setogt(op2, op1) +// setne(setolt(op1, op2), 1) -> not(setogt(op2, op1)) +//===----------------------------------------------------------------------===// +let Predicates = [HasV5T] in +multiclass eq_ordltPats { + // IntRegs + def: Pat<(i1 (seteq (i1 (cmpOp F32:$src1, F32:$src2)), 0)), + (C2_not (IntMI F32:$src2, F32:$src1))>; + def: Pat<(i1 (seteq (i1 (cmpOp F32:$src1, F32:$src2)), 1)), + (IntMI F32:$src2, F32:$src1)>; + def: Pat<(i1 (setne (i1 (cmpOp F32:$src1, F32:$src2)), 0)), + (IntMI F32:$src2, F32:$src1)>; + def: Pat<(i1 (setne (i1 (cmpOp F32:$src1, F32:$src2)), 1)), + (C2_not (IntMI F32:$src2, F32:$src1))>; + + // DoubleRegs + def: Pat<(i1 (seteq (i1 (cmpOp F64:$src1, F64:$src2)), 0)), + (C2_not (DoubleMI F64:$src2, F64:$src1))>; + def: Pat<(i1 (seteq (i1 (cmpOp F64:$src1, F64:$src2)), 1)), + (DoubleMI F64:$src2, F64:$src1)>; + def: Pat<(i1 (setne (i1 (cmpOp F64:$src1, F64:$src2)), 0)), + (DoubleMI F64:$src2, F64:$src1)>; + def: Pat<(i1 (setne (i1 (cmpOp F64:$src1, F64:$src2)), 0)), + (C2_not (DoubleMI F64:$src2, F64:$src1))>; +} + +defm : eq_ordltPats; +defm : eq_ordltPats; + + +// o. seto inverse of setuo. http://llvm.org/docs/LangRef.html#i_fcmp +let Predicates = [HasV5T] in { + def: Pat<(i1 (seto F32:$src1, F32:$src2)), + (C2_not (F2_sfcmpuo F32:$src2, F32:$src1))>; + def: Pat<(i1 (seto F32:$src1, f32ImmPred:$src2)), + (C2_not (F2_sfcmpuo (f32 (A2_tfrsi (ftoi $src2))), F32:$src1))>; + def: Pat<(i1 (seto F64:$src1, F64:$src2)), + (C2_not (F2_dfcmpuo F64:$src2, F64:$src1))>; + def: Pat<(i1 (seto F64:$src1, f64ImmPred:$src2)), + (C2_not (F2_dfcmpuo (CONST64 (ftoi $src2)), F64:$src1))>; +} + +// Ordered lt. +let Predicates = [HasV5T] in { + def: Pat<(i1 (setolt F32:$src1, F32:$src2)), + (F2_sfcmpgt F32:$src2, F32:$src1)>; + def: Pat<(i1 (setolt F32:$src1, f32ImmPred:$src2)), + (F2_sfcmpgt (f32 (A2_tfrsi (ftoi $src2))), F32:$src1)>; + def: Pat<(i1 (setolt F64:$src1, F64:$src2)), + (F2_dfcmpgt F64:$src2, F64:$src1)>; + def: Pat<(i1 (setolt F64:$src1, f64ImmPred:$src2)), + (F2_dfcmpgt (CONST64 (ftoi $src2)), F64:$src1)>; +} + +// Unordered lt. +let Predicates = [HasV5T] in { + def: Pat<(i1 (setult F32:$src1, F32:$src2)), + (C2_or (F2_sfcmpuo F32:$src1, F32:$src2), + (F2_sfcmpgt F32:$src2, F32:$src1))>; + def: Pat<(i1 (setult F32:$src1, f32ImmPred:$src2)), + (C2_or (F2_sfcmpuo F32:$src1, (f32 (A2_tfrsi (ftoi $src2)))), + (F2_sfcmpgt (f32 (A2_tfrsi (ftoi $src2))), F32:$src1))>; + def: Pat<(i1 (setult F64:$src1, F64:$src2)), + (C2_or (F2_dfcmpuo F64:$src1, F64:$src2), + (F2_dfcmpgt F64:$src2, F64:$src1))>; + def: Pat<(i1 (setult F64:$src1, f64ImmPred:$src2)), + (C2_or (F2_dfcmpuo F64:$src1, (CONST64 (ftoi $src2))), + (F2_dfcmpgt (CONST64 (ftoi $src2)), F64:$src1))>; +} + +// Ordered le. +let Predicates = [HasV5T] in { + // rs <= rt -> rt >= rs. + def: Pat<(i1 (setole F32:$src1, F32:$src2)), + (F2_sfcmpge F32:$src2, F32:$src1)>; + def: Pat<(i1 (setole F32:$src1, f32ImmPred:$src2)), + (F2_sfcmpge (f32 (A2_tfrsi (ftoi $src2))), F32:$src1)>; + + // Rss <= Rtt -> Rtt >= Rss. + def: Pat<(i1 (setole F64:$src1, F64:$src2)), + (F2_dfcmpge F64:$src2, F64:$src1)>; + def: Pat<(i1 (setole F64:$src1, f64ImmPred:$src2)), + (F2_dfcmpge (CONST64 (ftoi $src2)), F64:$src1)>; +} + +// Unordered le. +let Predicates = [HasV5T] in { +// rs <= rt -> rt >= rs. + def: Pat<(i1 (setule F32:$src1, F32:$src2)), + (C2_or (F2_sfcmpuo F32:$src1, F32:$src2), + (F2_sfcmpge F32:$src2, F32:$src1))>; + def: Pat<(i1 (setule F32:$src1, f32ImmPred:$src2)), + (C2_or (F2_sfcmpuo F32:$src1, (f32 (A2_tfrsi (ftoi $src2)))), + (F2_sfcmpge (f32 (A2_tfrsi (ftoi $src2))), F32:$src1))>; + def: Pat<(i1 (setule F64:$src1, F64:$src2)), + (C2_or (F2_dfcmpuo F64:$src1, F64:$src2), + (F2_dfcmpge F64:$src2, F64:$src1))>; + def: Pat<(i1 (setule F64:$src1, f64ImmPred:$src2)), + (C2_or (F2_dfcmpuo F64:$src1, (CONST64 (ftoi $src2))), + (F2_dfcmpge (CONST64 (ftoi $src2)), F64:$src1))>; +} + +// Ordered ne. +let Predicates = [HasV5T] in { + def: Pat<(i1 (setone F32:$src1, F32:$src2)), + (C2_not (F2_sfcmpeq F32:$src1, F32:$src2))>; + def: Pat<(i1 (setone F64:$src1, F64:$src2)), + (C2_not (F2_dfcmpeq F64:$src1, F64:$src2))>; + def: Pat<(i1 (setone F32:$src1, f32ImmPred:$src2)), + (C2_not (F2_sfcmpeq F32:$src1, (f32 (A2_tfrsi (ftoi $src2)))))>; + def: Pat<(i1 (setone F64:$src1, f64ImmPred:$src2)), + (C2_not (F2_dfcmpeq F64:$src1, (CONST64 (ftoi $src2))))>; +} + +// Unordered ne. +let Predicates = [HasV5T] in { + def: Pat<(i1 (setune F32:$src1, F32:$src2)), + (C2_or (F2_sfcmpuo F32:$src1, F32:$src2), + (C2_not (F2_sfcmpeq F32:$src1, F32:$src2)))>; + def: Pat<(i1 (setune F64:$src1, F64:$src2)), + (C2_or (F2_dfcmpuo F64:$src1, F64:$src2), + (C2_not (F2_dfcmpeq F64:$src1, F64:$src2)))>; + def: Pat<(i1 (setune F32:$src1, f32ImmPred:$src2)), + (C2_or (F2_sfcmpuo F32:$src1, (f32 (A2_tfrsi (ftoi $src2)))), + (C2_not (F2_sfcmpeq F32:$src1, + (f32 (A2_tfrsi (ftoi $src2))))))>; + def: Pat<(i1 (setune F64:$src1, f64ImmPred:$src2)), + (C2_or (F2_dfcmpuo F64:$src1, (CONST64 (ftoi $src2))), + (C2_not (F2_dfcmpeq F64:$src1, + (CONST64 (ftoi $src2)))))>; +} + +// Besides set[o|u][comparions], we also need set[comparisons]. +let Predicates = [HasV5T] in { + // lt. + def: Pat<(i1 (setlt F32:$src1, F32:$src2)), + (F2_sfcmpgt F32:$src2, F32:$src1)>; + def: Pat<(i1 (setlt F32:$src1, f32ImmPred:$src2)), + (F2_sfcmpgt (f32 (A2_tfrsi (ftoi $src2))), F32:$src1)>; + def: Pat<(i1 (setlt F64:$src1, F64:$src2)), + (F2_dfcmpgt F64:$src2, F64:$src1)>; + def: Pat<(i1 (setlt F64:$src1, f64ImmPred:$src2)), + (F2_dfcmpgt (CONST64 (ftoi $src2)), F64:$src1)>; + + // le. + // rs <= rt -> rt >= rs. + def: Pat<(i1 (setle F32:$src1, F32:$src2)), + (F2_sfcmpge F32:$src2, F32:$src1)>; + def: Pat<(i1 (setle F32:$src1, f32ImmPred:$src2)), + (F2_sfcmpge (f32 (A2_tfrsi (ftoi $src2))), F32:$src1)>; + + // Rss <= Rtt -> Rtt >= Rss. + def: Pat<(i1 (setle F64:$src1, F64:$src2)), + (F2_dfcmpge F64:$src2, F64:$src1)>; + def: Pat<(i1 (setle F64:$src1, f64ImmPred:$src2)), + (F2_dfcmpge (CONST64 (ftoi $src2)), F64:$src1)>; + + // ne. + def: Pat<(i1 (setne F32:$src1, F32:$src2)), + (C2_not (F2_sfcmpeq F32:$src1, F32:$src2))>; + def: Pat<(i1 (setne F64:$src1, F64:$src2)), + (C2_not (F2_dfcmpeq F64:$src1, F64:$src2))>; + def: Pat<(i1 (setne F32:$src1, f32ImmPred:$src2)), + (C2_not (F2_sfcmpeq F32:$src1, (f32 (A2_tfrsi (ftoi $src2)))))>; + def: Pat<(i1 (setne F64:$src1, f64ImmPred:$src2)), + (C2_not (F2_dfcmpeq F64:$src1, (CONST64 (ftoi $src2))))>; +} + + +def: Pat<(f64 (fpextend F32:$Rs)), (F2_conv_sf2df F32:$Rs)>; +def: Pat<(f32 (fpround F64:$Rs)), (F2_conv_df2sf F64:$Rs)>; + +def: Pat<(f32 (sint_to_fp I32:$Rs)), (F2_conv_w2sf I32:$Rs)>; +def: Pat<(f32 (sint_to_fp I64:$Rs)), (F2_conv_d2sf I64:$Rs)>; +def: Pat<(f64 (sint_to_fp I32:$Rs)), (F2_conv_w2df I32:$Rs)>; +def: Pat<(f64 (sint_to_fp I64:$Rs)), (F2_conv_d2df I64:$Rs)>; + +def: Pat<(f32 (uint_to_fp I32:$Rs)), (F2_conv_uw2sf I32:$Rs)>; +def: Pat<(f32 (uint_to_fp I64:$Rs)), (F2_conv_ud2sf I64:$Rs)>; +def: Pat<(f64 (uint_to_fp I32:$Rs)), (F2_conv_uw2df I32:$Rs)>; +def: Pat<(f64 (uint_to_fp I64:$Rs)), (F2_conv_ud2df I64:$Rs)>; + +def: Pat<(i32 (fp_to_sint F32:$Rs)), (F2_conv_sf2w_chop F32:$Rs)>; +def: Pat<(i32 (fp_to_sint F64:$Rs)), (F2_conv_df2w_chop F64:$Rs)>; +def: Pat<(i64 (fp_to_sint F32:$Rs)), (F2_conv_sf2d_chop F32:$Rs)>; +def: Pat<(i64 (fp_to_sint F64:$Rs)), (F2_conv_df2d_chop F64:$Rs)>; + +def: Pat<(i32 (fp_to_uint F32:$Rs)), (F2_conv_sf2uw_chop F32:$Rs)>; +def: Pat<(i32 (fp_to_uint F64:$Rs)), (F2_conv_df2uw_chop F64:$Rs)>; +def: Pat<(i64 (fp_to_uint F32:$Rs)), (F2_conv_sf2ud_chop F32:$Rs)>; +def: Pat<(i64 (fp_to_uint F64:$Rs)), (F2_conv_df2ud_chop F64:$Rs)>; + +// Bitcast is different than [fp|sint|uint]_to_[sint|uint|fp]. +let Predicates = [HasV5T] in { + def: Pat <(i32 (bitconvert F32:$src)), (I32:$src)>; + def: Pat <(f32 (bitconvert I32:$src)), (F32:$src)>; + def: Pat <(i64 (bitconvert F64:$src)), (I64:$src)>; + def: Pat <(f64 (bitconvert I64:$src)), (F64:$src)>; +} + +def : Pat <(fma F32:$src2, F32:$src3, F32:$src1), + (F2_sffma F32:$src1, F32:$src2, F32:$src3)>; + +def : Pat <(fma (fneg F32:$src2), F32:$src3, F32:$src1), + (F2_sffms F32:$src1, F32:$src2, F32:$src3)>; + +def : Pat <(fma F32:$src2, (fneg F32:$src3), F32:$src1), + (F2_sffms F32:$src1, F32:$src2, F32:$src3)>; + +def: Pat<(select I1:$Pu, F32:$Rs, f32ImmPred:$imm), + (C2_muxir I1:$Pu, F32:$Rs, (ftoi $imm))>, + Requires<[HasV5T]>; + +def: Pat<(select I1:$Pu, f32ImmPred:$imm, F32:$Rt), + (C2_muxri I1:$Pu, (ftoi $imm), F32:$Rt)>, + Requires<[HasV5T]>; + +def: Pat<(select I1:$src1, F32:$src2, F32:$src3), + (C2_mux I1:$src1, F32:$src2, F32:$src3)>, + Requires<[HasV5T]>; + +def: Pat<(select (i1 (setult F32:$src1, F32:$src2)), F32:$src3, F32:$src4), + (C2_mux (F2_sfcmpgt F32:$src2, F32:$src1), F32:$src4, F32:$src3)>, + Requires<[HasV5T]>; + +def: Pat<(select I1:$src1, F64:$src2, F64:$src3), + (C2_vmux I1:$src1, F64:$src2, F64:$src3)>, + Requires<[HasV5T]>; + +def: Pat<(select (i1 (setult F64:$src1, F64:$src2)), F64:$src3, F64:$src4), + (C2_vmux (F2_dfcmpgt F64:$src2, F64:$src1), F64:$src3, F64:$src4)>, + Requires<[HasV5T]>; + +// Map from p0 = pnot(p0); r0 = select(p0, #i, r1) +// => r0 = mux(p0, #i, r1) +def: Pat<(select (not I1:$src1), f32ImmPred:$src2, F32:$src3), + (C2_muxir I1:$src1, F32:$src3, (ftoi $src2))>, + Requires<[HasV5T]>; + +// Map from p0 = pnot(p0); r0 = mux(p0, r1, #i) +// => r0 = mux(p0, r1, #i) +def: Pat<(select (not I1:$src1), F32:$src2, f32ImmPred:$src3), + (C2_muxri I1:$src1, (ftoi $src3), F32:$src2)>, + Requires<[HasV5T]>; + +def: Pat<(i32 (fp_to_sint F64:$src1)), + (LoReg (F2_conv_df2d_chop F64:$src1))>, + Requires<[HasV5T]>; + +def : Pat <(fabs (f32 IntRegs:$src1)), + (S2_clrbit_i (f32 IntRegs:$src1), 31)>, + Requires<[HasV5T]>; + +def : Pat <(fneg (f32 IntRegs:$src1)), + (S2_togglebit_i (f32 IntRegs:$src1), 31)>, + Requires<[HasV5T]>; + + +def alignedload : PatFrag<(ops node:$addr), (load $addr), [{ + return isAlignedMemNode(dyn_cast(N)); +}]>; + +def unalignedload : PatFrag<(ops node:$addr), (load $addr), [{ + return !isAlignedMemNode(dyn_cast(N)); +}]>; + +def alignedstore : PatFrag<(ops node:$val, node:$addr), (store $val, $addr), [{ + return isAlignedMemNode(dyn_cast(N)); +}]>; + +def unalignedstore : PatFrag<(ops node:$val, node:$addr), (store $val, $addr), [{ + return !isAlignedMemNode(dyn_cast(N)); +}]>; + + +multiclass vS32b_ai_pats { + // Aligned stores + def : Pat<(alignedstore (VTSgl VectorRegs:$src1), IntRegs:$addr), + (V6_vS32b_ai IntRegs:$addr, 0, (VTSgl VectorRegs:$src1))>, + Requires<[UseHVXSgl]>; + def : Pat<(unalignedstore (VTSgl VectorRegs:$src1), IntRegs:$addr), + (V6_vS32Ub_ai IntRegs:$addr, 0, (VTSgl VectorRegs:$src1))>, + Requires<[UseHVXSgl]>; + + // 128B Aligned stores + def : Pat<(alignedstore (VTDbl VectorRegs128B:$src1), IntRegs:$addr), + (V6_vS32b_ai_128B IntRegs:$addr, 0, (VTDbl VectorRegs128B:$src1))>, + Requires<[UseHVXDbl]>; + def : Pat<(unalignedstore (VTDbl VectorRegs128B:$src1), IntRegs:$addr), + (V6_vS32Ub_ai_128B IntRegs:$addr, 0, (VTDbl VectorRegs128B:$src1))>, + Requires<[UseHVXDbl]>; + + // Fold Add R+OFF into vector store. + let AddedComplexity = 10 in { + def : Pat<(alignedstore (VTSgl VectorRegs:$src1), + (add IntRegs:$src2, s4_6ImmPred:$offset)), + (V6_vS32b_ai IntRegs:$src2, s4_6ImmPred:$offset, + (VTSgl VectorRegs:$src1))>, + Requires<[UseHVXSgl]>; + def : Pat<(unalignedstore (VTSgl VectorRegs:$src1), + (add IntRegs:$src2, s4_6ImmPred:$offset)), + (V6_vS32Ub_ai IntRegs:$src2, s4_6ImmPred:$offset, + (VTSgl VectorRegs:$src1))>, + Requires<[UseHVXSgl]>; + + // Fold Add R+OFF into vector store 128B. + def : Pat<(alignedstore (VTDbl VectorRegs128B:$src1), + (add IntRegs:$src2, s4_7ImmPred:$offset)), + (V6_vS32b_ai_128B IntRegs:$src2, s4_7ImmPred:$offset, + (VTDbl VectorRegs128B:$src1))>, + Requires<[UseHVXDbl]>; + def : Pat<(unalignedstore (VTDbl VectorRegs128B:$src1), + (add IntRegs:$src2, s4_7ImmPred:$offset)), + (V6_vS32Ub_ai_128B IntRegs:$src2, s4_7ImmPred:$offset, + (VTDbl VectorRegs128B:$src1))>, + Requires<[UseHVXDbl]>; + } +} + +defm : vS32b_ai_pats ; +defm : vS32b_ai_pats ; +defm : vS32b_ai_pats ; +defm : vS32b_ai_pats ; + + +multiclass vL32b_ai_pats { + // Aligned loads + def : Pat < (VTSgl (alignedload IntRegs:$addr)), + (V6_vL32b_ai IntRegs:$addr, 0) >, + Requires<[UseHVXSgl]>; + def : Pat < (VTSgl (unalignedload IntRegs:$addr)), + (V6_vL32Ub_ai IntRegs:$addr, 0) >, + Requires<[UseHVXSgl]>; + + // 128B Load + def : Pat < (VTDbl (alignedload IntRegs:$addr)), + (V6_vL32b_ai_128B IntRegs:$addr, 0) >, + Requires<[UseHVXDbl]>; + def : Pat < (VTDbl (unalignedload IntRegs:$addr)), + (V6_vL32Ub_ai_128B IntRegs:$addr, 0) >, + Requires<[UseHVXDbl]>; + + // Fold Add R+OFF into vector load. + let AddedComplexity = 10 in { + def : Pat<(VTDbl (alignedload (add IntRegs:$src2, s4_7ImmPred:$offset))), + (V6_vL32b_ai_128B IntRegs:$src2, s4_7ImmPred:$offset)>, + Requires<[UseHVXDbl]>; + def : Pat<(VTDbl (unalignedload (add IntRegs:$src2, s4_7ImmPred:$offset))), + (V6_vL32Ub_ai_128B IntRegs:$src2, s4_7ImmPred:$offset)>, + Requires<[UseHVXDbl]>; + + def : Pat<(VTSgl (alignedload (add IntRegs:$src2, s4_6ImmPred:$offset))), + (V6_vL32b_ai IntRegs:$src2, s4_6ImmPred:$offset)>, + Requires<[UseHVXSgl]>; + def : Pat<(VTSgl (unalignedload (add IntRegs:$src2, s4_6ImmPred:$offset))), + (V6_vL32Ub_ai IntRegs:$src2, s4_6ImmPred:$offset)>, + Requires<[UseHVXSgl]>; + } +} + +defm : vL32b_ai_pats ; +defm : vL32b_ai_pats ; +defm : vL32b_ai_pats ; +defm : vL32b_ai_pats ; + +multiclass STrivv_pats { + def : Pat<(alignedstore (VTSgl VecDblRegs:$src1), IntRegs:$addr), + (PS_vstorerw_ai IntRegs:$addr, 0, (VTSgl VecDblRegs:$src1))>, + Requires<[UseHVXSgl]>; + def : Pat<(unalignedstore (VTSgl VecDblRegs:$src1), IntRegs:$addr), + (PS_vstorerwu_ai IntRegs:$addr, 0, (VTSgl VecDblRegs:$src1))>, + Requires<[UseHVXSgl]>; + + def : Pat<(alignedstore (VTDbl VecDblRegs128B:$src1), IntRegs:$addr), + (PS_vstorerw_ai_128B IntRegs:$addr, 0, + (VTDbl VecDblRegs128B:$src1))>, + Requires<[UseHVXDbl]>; + def : Pat<(unalignedstore (VTDbl VecDblRegs128B:$src1), IntRegs:$addr), + (PS_vstorerwu_ai_128B IntRegs:$addr, 0, + (VTDbl VecDblRegs128B:$src1))>, + Requires<[UseHVXDbl]>; +} + +defm : STrivv_pats ; +defm : STrivv_pats ; +defm : STrivv_pats ; +defm : STrivv_pats ; + +multiclass LDrivv_pats { + def : Pat<(VTSgl (alignedload I32:$addr)), + (PS_vloadrw_ai I32:$addr, 0)>, + Requires<[UseHVXSgl]>; + def : Pat<(VTSgl (unalignedload I32:$addr)), + (PS_vloadrwu_ai I32:$addr, 0)>, + Requires<[UseHVXSgl]>; + + def : Pat<(VTDbl (alignedload I32:$addr)), + (PS_vloadrw_ai_128B I32:$addr, 0)>, + Requires<[UseHVXDbl]>; + def : Pat<(VTDbl (unalignedload I32:$addr)), + (PS_vloadrwu_ai_128B I32:$addr, 0)>, + Requires<[UseHVXDbl]>; +} + +defm : LDrivv_pats ; +defm : LDrivv_pats ; +defm : LDrivv_pats ; +defm : LDrivv_pats ; + +let Predicates = [HasV60T,UseHVXSgl] in { + def: Pat<(select I1:$Pu, (v16i32 VectorRegs:$Vs), VectorRegs:$Vt), + (PS_vselect I1:$Pu, VectorRegs:$Vs, VectorRegs:$Vt)>; + def: Pat<(select I1:$Pu, (v32i32 VecDblRegs:$Vs), VecDblRegs:$Vt), + (PS_wselect I1:$Pu, VecDblRegs:$Vs, VecDblRegs:$Vt)>; +} +let Predicates = [HasV60T,UseHVXDbl] in { + def: Pat<(select I1:$Pu, (v32i32 VectorRegs128B:$Vs), VectorRegs128B:$Vt), + (PS_vselect_128B I1:$Pu, VectorRegs128B:$Vs, VectorRegs128B:$Vt)>; + def: Pat<(select I1:$Pu, (v64i32 VecDblRegs128B:$Vs), VecDblRegs128B:$Vt), + (PS_wselect_128B I1:$Pu, VecDblRegs128B:$Vs, VecDblRegs128B:$Vt)>; +} + + +def SDTHexagonVCOMBINE: SDTypeProfile<1, 2, [SDTCisSameAs<1, 2>, + SDTCisSubVecOfVec<1, 0>]>; + +def HexagonVCOMBINE: SDNode<"HexagonISD::VCOMBINE", SDTHexagonVCOMBINE>; + +def: Pat<(v32i32 (HexagonVCOMBINE (v16i32 VectorRegs:$Vs), + (v16i32 VectorRegs:$Vt))), + (V6_vcombine VectorRegs:$Vs, VectorRegs:$Vt)>, + Requires<[UseHVXSgl]>; +def: Pat<(v64i32 (HexagonVCOMBINE (v32i32 VecDblRegs:$Vs), + (v32i32 VecDblRegs:$Vt))), + (V6_vcombine_128B VecDblRegs:$Vs, VecDblRegs:$Vt)>, + Requires<[UseHVXDbl]>; + +def SDTHexagonVPACK: SDTypeProfile<1, 3, [SDTCisSameAs<1, 2>, + SDTCisInt<3>]>; + +def HexagonVPACK: SDNode<"HexagonISD::VPACK", SDTHexagonVPACK>; + +// 0 as the last argument denotes vpacke. 1 denotes vpacko +def: Pat<(v64i8 (HexagonVPACK (v64i8 VectorRegs:$Vs), + (v64i8 VectorRegs:$Vt), (i32 0))), + (V6_vpackeb VectorRegs:$Vs, VectorRegs:$Vt)>, + Requires<[UseHVXSgl]>; +def: Pat<(v64i8 (HexagonVPACK (v64i8 VectorRegs:$Vs), + (v64i8 VectorRegs:$Vt), (i32 1))), + (V6_vpackob VectorRegs:$Vs, VectorRegs:$Vt)>, + Requires<[UseHVXSgl]>; +def: Pat<(v32i16 (HexagonVPACK (v32i16 VectorRegs:$Vs), + (v32i16 VectorRegs:$Vt), (i32 0))), + (V6_vpackeh VectorRegs:$Vs, VectorRegs:$Vt)>, + Requires<[UseHVXSgl]>; +def: Pat<(v32i16 (HexagonVPACK (v32i16 VectorRegs:$Vs), + (v32i16 VectorRegs:$Vt), (i32 1))), + (V6_vpackoh VectorRegs:$Vs, VectorRegs:$Vt)>, + Requires<[UseHVXSgl]>; + +def: Pat<(v128i8 (HexagonVPACK (v128i8 VecDblRegs:$Vs), + (v128i8 VecDblRegs:$Vt), (i32 0))), + (V6_vpackeb_128B VecDblRegs:$Vs, VecDblRegs:$Vt)>, + Requires<[UseHVXDbl]>; +def: Pat<(v128i8 (HexagonVPACK (v128i8 VecDblRegs:$Vs), + (v128i8 VecDblRegs:$Vt), (i32 1))), + (V6_vpackob_128B VecDblRegs:$Vs, VecDblRegs:$Vt)>, + Requires<[UseHVXDbl]>; +def: Pat<(v64i16 (HexagonVPACK (v64i16 VecDblRegs:$Vs), + (v64i16 VecDblRegs:$Vt), (i32 0))), + (V6_vpackeh_128B VecDblRegs:$Vs, VecDblRegs:$Vt)>, + Requires<[UseHVXDbl]>; +def: Pat<(v64i16 (HexagonVPACK (v64i16 VecDblRegs:$Vs), + (v64i16 VecDblRegs:$Vt), (i32 1))), + (V6_vpackoh_128B VecDblRegs:$Vs, VecDblRegs:$Vt)>, + Requires<[UseHVXDbl]>; + +def V2I1: PatLeaf<(v2i1 PredRegs:$R)>; +def V4I1: PatLeaf<(v4i1 PredRegs:$R)>; +def V8I1: PatLeaf<(v8i1 PredRegs:$R)>; +def V4I8: PatLeaf<(v4i8 IntRegs:$R)>; +def V2I16: PatLeaf<(v2i16 IntRegs:$R)>; +def V8I8: PatLeaf<(v8i8 DoubleRegs:$R)>; +def V4I16: PatLeaf<(v4i16 DoubleRegs:$R)>; +def V2I32: PatLeaf<(v2i32 DoubleRegs:$R)>; + + +multiclass bitconvert_32 { + def : Pat <(b (bitconvert (a IntRegs:$src))), + (b IntRegs:$src)>; + def : Pat <(a (bitconvert (b IntRegs:$src))), + (a IntRegs:$src)>; +} + +multiclass bitconvert_64 { + def : Pat <(b (bitconvert (a DoubleRegs:$src))), + (b DoubleRegs:$src)>; + def : Pat <(a (bitconvert (b DoubleRegs:$src))), + (a DoubleRegs:$src)>; +} + +// Bit convert vector types to integers. +defm : bitconvert_32; +defm : bitconvert_32; +defm : bitconvert_64; +defm : bitconvert_64; +defm : bitconvert_64; + +def: Pat<(sra (v4i16 DoubleRegs:$src1), u4_0ImmPred:$src2), + (S2_asr_i_vh DoubleRegs:$src1, imm:$src2)>; +def: Pat<(srl (v4i16 DoubleRegs:$src1), u4_0ImmPred:$src2), + (S2_lsr_i_vh DoubleRegs:$src1, imm:$src2)>; +def: Pat<(shl (v4i16 DoubleRegs:$src1), u4_0ImmPred:$src2), + (S2_asl_i_vh DoubleRegs:$src1, imm:$src2)>; + +def: Pat<(sra (v2i32 DoubleRegs:$src1), u5_0ImmPred:$src2), + (S2_asr_i_vw DoubleRegs:$src1, imm:$src2)>; +def: Pat<(srl (v2i32 DoubleRegs:$src1), u5_0ImmPred:$src2), + (S2_lsr_i_vw DoubleRegs:$src1, imm:$src2)>; +def: Pat<(shl (v2i32 DoubleRegs:$src1), u5_0ImmPred:$src2), + (S2_asl_i_vw DoubleRegs:$src1, imm:$src2)>; + +def : Pat<(v2i16 (add (v2i16 IntRegs:$src1), (v2i16 IntRegs:$src2))), + (A2_svaddh IntRegs:$src1, IntRegs:$src2)>; + +def : Pat<(v2i16 (sub (v2i16 IntRegs:$src1), (v2i16 IntRegs:$src2))), + (A2_svsubh IntRegs:$src1, IntRegs:$src2)>; + +def HexagonVSPLATB: SDNode<"HexagonISD::VSPLATB", SDTUnaryOp>; +def HexagonVSPLATH: SDNode<"HexagonISD::VSPLATH", SDTUnaryOp>; + +// Replicate the low 8-bits from 32-bits input register into each of the +// four bytes of 32-bits destination register. +def: Pat<(v4i8 (HexagonVSPLATB I32:$Rs)), (S2_vsplatrb I32:$Rs)>; + +// Replicate the low 16-bits from 32-bits input register into each of the +// four halfwords of 64-bits destination register. +def: Pat<(v4i16 (HexagonVSPLATH I32:$Rs)), (S2_vsplatrh I32:$Rs)>; + + +class VArith_pat + : Pat <(Op Type:$Rss, Type:$Rtt), + (MI Type:$Rss, Type:$Rtt)>; + +def: VArith_pat ; +def: VArith_pat ; +def: VArith_pat ; +def: VArith_pat ; +def: VArith_pat ; +def: VArith_pat ; + +def: VArith_pat ; +def: VArith_pat ; +def: VArith_pat ; + +def: VArith_pat ; +def: VArith_pat ; +def: VArith_pat ; +def: VArith_pat ; +def: VArith_pat ; +def: VArith_pat ; +def: VArith_pat ; +def: VArith_pat ; +def: VArith_pat ; + +def: Pat<(v2i32 (sra V2I32:$b, (i64 (HexagonCOMBINE (i32 u5_0ImmPred:$c), + (i32 u5_0ImmPred:$c))))), + (S2_asr_i_vw V2I32:$b, imm:$c)>; +def: Pat<(v2i32 (srl V2I32:$b, (i64 (HexagonCOMBINE (i32 u5_0ImmPred:$c), + (i32 u5_0ImmPred:$c))))), + (S2_lsr_i_vw V2I32:$b, imm:$c)>; +def: Pat<(v2i32 (shl V2I32:$b, (i64 (HexagonCOMBINE (i32 u5_0ImmPred:$c), + (i32 u5_0ImmPred:$c))))), + (S2_asl_i_vw V2I32:$b, imm:$c)>; + +def: Pat<(v4i16 (sra V4I16:$b, (v4i16 (HexagonVSPLATH (i32 (u4_0ImmPred:$c)))))), + (S2_asr_i_vh V4I16:$b, imm:$c)>; +def: Pat<(v4i16 (srl V4I16:$b, (v4i16 (HexagonVSPLATH (i32 (u4_0ImmPred:$c)))))), + (S2_lsr_i_vh V4I16:$b, imm:$c)>; +def: Pat<(v4i16 (shl V4I16:$b, (v4i16 (HexagonVSPLATH (i32 (u4_0ImmPred:$c)))))), + (S2_asl_i_vh V4I16:$b, imm:$c)>; + + +def SDTHexagon_v2i32_v2i32_i32 : SDTypeProfile<1, 2, + [SDTCisSameAs<0, 1>, SDTCisVT<0, v2i32>, SDTCisInt<2>]>; +def SDTHexagon_v4i16_v4i16_i32 : SDTypeProfile<1, 2, + [SDTCisSameAs<0, 1>, SDTCisVT<0, v4i16>, SDTCisInt<2>]>; + +def HexagonVSRAW: SDNode<"HexagonISD::VSRAW", SDTHexagon_v2i32_v2i32_i32>; +def HexagonVSRAH: SDNode<"HexagonISD::VSRAH", SDTHexagon_v4i16_v4i16_i32>; +def HexagonVSRLW: SDNode<"HexagonISD::VSRLW", SDTHexagon_v2i32_v2i32_i32>; +def HexagonVSRLH: SDNode<"HexagonISD::VSRLH", SDTHexagon_v4i16_v4i16_i32>; +def HexagonVSHLW: SDNode<"HexagonISD::VSHLW", SDTHexagon_v2i32_v2i32_i32>; +def HexagonVSHLH: SDNode<"HexagonISD::VSHLH", SDTHexagon_v4i16_v4i16_i32>; + +def: Pat<(v2i32 (HexagonVSRAW V2I32:$Rs, u5_0ImmPred:$u5)), + (S2_asr_i_vw V2I32:$Rs, imm:$u5)>; +def: Pat<(v4i16 (HexagonVSRAH V4I16:$Rs, u4_0ImmPred:$u4)), + (S2_asr_i_vh V4I16:$Rs, imm:$u4)>; +def: Pat<(v2i32 (HexagonVSRLW V2I32:$Rs, u5_0ImmPred:$u5)), + (S2_lsr_i_vw V2I32:$Rs, imm:$u5)>; +def: Pat<(v4i16 (HexagonVSRLH V4I16:$Rs, u4_0ImmPred:$u4)), + (S2_lsr_i_vh V4I16:$Rs, imm:$u4)>; +def: Pat<(v2i32 (HexagonVSHLW V2I32:$Rs, u5_0ImmPred:$u5)), + (S2_asl_i_vw V2I32:$Rs, imm:$u5)>; +def: Pat<(v4i16 (HexagonVSHLH V4I16:$Rs, u4_0ImmPred:$u4)), + (S2_asl_i_vh V4I16:$Rs, imm:$u4)>; + +class vshift_rr_pat + : Pat <(Op Value:$Rs, I32:$Rt), + (MI Value:$Rs, I32:$Rt)>; + +def: vshift_rr_pat ; +def: vshift_rr_pat ; +def: vshift_rr_pat ; +def: vshift_rr_pat ; +def: vshift_rr_pat ; +def: vshift_rr_pat ; + + +def SDTHexagonVecCompare_v8i8 : SDTypeProfile<1, 2, + [SDTCisSameAs<1, 2>, SDTCisVT<0, i1>, SDTCisVT<1, v8i8>]>; +def SDTHexagonVecCompare_v4i16 : SDTypeProfile<1, 2, + [SDTCisSameAs<1, 2>, SDTCisVT<0, i1>, SDTCisVT<1, v4i16>]>; +def SDTHexagonVecCompare_v2i32 : SDTypeProfile<1, 2, + [SDTCisSameAs<1, 2>, SDTCisVT<0, i1>, SDTCisVT<1, v2i32>]>; + +def HexagonVCMPBEQ: SDNode<"HexagonISD::VCMPBEQ", SDTHexagonVecCompare_v8i8>; +def HexagonVCMPBGT: SDNode<"HexagonISD::VCMPBGT", SDTHexagonVecCompare_v8i8>; +def HexagonVCMPBGTU: SDNode<"HexagonISD::VCMPBGTU", SDTHexagonVecCompare_v8i8>; +def HexagonVCMPHEQ: SDNode<"HexagonISD::VCMPHEQ", SDTHexagonVecCompare_v4i16>; +def HexagonVCMPHGT: SDNode<"HexagonISD::VCMPHGT", SDTHexagonVecCompare_v4i16>; +def HexagonVCMPHGTU: SDNode<"HexagonISD::VCMPHGTU", SDTHexagonVecCompare_v4i16>; +def HexagonVCMPWEQ: SDNode<"HexagonISD::VCMPWEQ", SDTHexagonVecCompare_v2i32>; +def HexagonVCMPWGT: SDNode<"HexagonISD::VCMPWGT", SDTHexagonVecCompare_v2i32>; +def HexagonVCMPWGTU: SDNode<"HexagonISD::VCMPWGTU", SDTHexagonVecCompare_v2i32>; + + +class vcmp_i1_pat + : Pat <(i1 (Op Value:$Rs, Value:$Rt)), + (MI Value:$Rs, Value:$Rt)>; + +def: vcmp_i1_pat; +def: vcmp_i1_pat; +def: vcmp_i1_pat; + +def: vcmp_i1_pat; +def: vcmp_i1_pat; +def: vcmp_i1_pat; + +def: vcmp_i1_pat; +def: vcmp_i1_pat; +def: vcmp_i1_pat; + + +class vcmp_vi1_pat + : Pat <(OutTy (Op InVal:$Rs, InVal:$Rt)), + (MI InVal:$Rs, InVal:$Rt)>; + +def: vcmp_vi1_pat; +def: vcmp_vi1_pat; +def: vcmp_vi1_pat; + +def: vcmp_vi1_pat; +def: vcmp_vi1_pat; +def: vcmp_vi1_pat; + +def: Pat<(mul V2I32:$Rs, V2I32:$Rt), + (PS_vmulw DoubleRegs:$Rs, DoubleRegs:$Rt)>; +def: Pat<(add V2I32:$Rx, (mul V2I32:$Rs, V2I32:$Rt)), + (PS_vmulw_acc DoubleRegs:$Rx, DoubleRegs:$Rs, DoubleRegs:$Rt)>; + + +// Adds two v4i8: Hexagon does not have an insn for this one, so we +// use the double add v8i8, and use only the low part of the result. +def: Pat<(v4i8 (add (v4i8 IntRegs:$Rs), (v4i8 IntRegs:$Rt))), + (LoReg (A2_vaddub (Zext64 $Rs), (Zext64 $Rt)))>; + +// Subtract two v4i8: Hexagon does not have an insn for this one, so we +// use the double sub v8i8, and use only the low part of the result. +def: Pat<(v4i8 (sub (v4i8 IntRegs:$Rs), (v4i8 IntRegs:$Rt))), + (LoReg (A2_vsubub (Zext64 $Rs), (Zext64 $Rt)))>; + +// +// No 32 bit vector mux. +// +def: Pat<(v4i8 (select I1:$Pu, V4I8:$Rs, V4I8:$Rt)), + (LoReg (C2_vmux I1:$Pu, (Zext64 $Rs), (Zext64 $Rt)))>; +def: Pat<(v2i16 (select I1:$Pu, V2I16:$Rs, V2I16:$Rt)), + (LoReg (C2_vmux I1:$Pu, (Zext64 $Rs), (Zext64 $Rt)))>; + +// +// 64-bit vector mux. +// +def: Pat<(v8i8 (vselect V8I1:$Pu, V8I8:$Rs, V8I8:$Rt)), + (C2_vmux V8I1:$Pu, V8I8:$Rs, V8I8:$Rt)>; +def: Pat<(v4i16 (vselect V4I1:$Pu, V4I16:$Rs, V4I16:$Rt)), + (C2_vmux V4I1:$Pu, V4I16:$Rs, V4I16:$Rt)>; +def: Pat<(v2i32 (vselect V2I1:$Pu, V2I32:$Rs, V2I32:$Rt)), + (C2_vmux V2I1:$Pu, V2I32:$Rs, V2I32:$Rt)>; + +// +// No 32 bit vector compare. +// +def: Pat<(i1 (seteq V4I8:$Rs, V4I8:$Rt)), + (A2_vcmpbeq (Zext64 $Rs), (Zext64 $Rt))>; +def: Pat<(i1 (setgt V4I8:$Rs, V4I8:$Rt)), + (A4_vcmpbgt (Zext64 $Rs), (Zext64 $Rt))>; +def: Pat<(i1 (setugt V4I8:$Rs, V4I8:$Rt)), + (A2_vcmpbgtu (Zext64 $Rs), (Zext64 $Rt))>; + +def: Pat<(i1 (seteq V2I16:$Rs, V2I16:$Rt)), + (A2_vcmpheq (Zext64 $Rs), (Zext64 $Rt))>; +def: Pat<(i1 (setgt V2I16:$Rs, V2I16:$Rt)), + (A2_vcmphgt (Zext64 $Rs), (Zext64 $Rt))>; +def: Pat<(i1 (setugt V2I16:$Rs, V2I16:$Rt)), + (A2_vcmphgtu (Zext64 $Rs), (Zext64 $Rt))>; + + +class InvertCmp_pat + : Pat<(CmpTy (CmpOp Value:$Rs, Value:$Rt)), + (InvMI Value:$Rt, Value:$Rs)>; + +// Map from a compare operation to the corresponding instruction with the +// order of operands reversed, e.g. x > y --> cmp.lt(y,x). +def: InvertCmp_pat; +def: InvertCmp_pat; +def: InvertCmp_pat; +def: InvertCmp_pat; +def: InvertCmp_pat; +def: InvertCmp_pat; + +def: InvertCmp_pat; +def: InvertCmp_pat; +def: InvertCmp_pat; +def: InvertCmp_pat; +def: InvertCmp_pat; +def: InvertCmp_pat; + +// Map from vcmpne(Rss) -> !vcmpew(Rss). +// rs != rt -> !(rs == rt). +def: Pat<(v2i1 (setne V2I32:$Rs, V2I32:$Rt)), + (C2_not (v2i1 (A2_vcmpbeq V2I32:$Rs, V2I32:$Rt)))>; + + +// Truncate: from vector B copy all 'E'ven 'B'yte elements: +// A[0] = B[0]; A[1] = B[2]; A[2] = B[4]; A[3] = B[6]; +def: Pat<(v4i8 (trunc V4I16:$Rs)), + (S2_vtrunehb V4I16:$Rs)>; + +// Truncate: from vector B copy all 'O'dd 'B'yte elements: +// A[0] = B[1]; A[1] = B[3]; A[2] = B[5]; A[3] = B[7]; +// S2_vtrunohb + +// Truncate: from vectors B and C copy all 'E'ven 'H'alf-word elements: +// A[0] = B[0]; A[1] = B[2]; A[2] = C[0]; A[3] = C[2]; +// S2_vtruneh + +def: Pat<(v2i16 (trunc V2I32:$Rs)), + (LoReg (S2_packhl (HiReg $Rs), (LoReg $Rs)))>; + + +def HexagonVSXTBH : SDNode<"HexagonISD::VSXTBH", SDTUnaryOp>; +def HexagonVSXTBW : SDNode<"HexagonISD::VSXTBW", SDTUnaryOp>; + +def: Pat<(i64 (HexagonVSXTBH I32:$Rs)), (S2_vsxtbh I32:$Rs)>; +def: Pat<(i64 (HexagonVSXTBW I32:$Rs)), (S2_vsxthw I32:$Rs)>; + +def: Pat<(v4i16 (zext V4I8:$Rs)), (S2_vzxtbh V4I8:$Rs)>; +def: Pat<(v2i32 (zext V2I16:$Rs)), (S2_vzxthw V2I16:$Rs)>; +def: Pat<(v4i16 (anyext V4I8:$Rs)), (S2_vzxtbh V4I8:$Rs)>; +def: Pat<(v2i32 (anyext V2I16:$Rs)), (S2_vzxthw V2I16:$Rs)>; +def: Pat<(v4i16 (sext V4I8:$Rs)), (S2_vsxtbh V4I8:$Rs)>; +def: Pat<(v2i32 (sext V2I16:$Rs)), (S2_vsxthw V2I16:$Rs)>; + +// Sign extends a v2i8 into a v2i32. +def: Pat<(v2i32 (sext_inreg V2I32:$Rs, v2i8)), + (A2_combinew (A2_sxtb (HiReg $Rs)), (A2_sxtb (LoReg $Rs)))>; + +// Sign extends a v2i16 into a v2i32. +def: Pat<(v2i32 (sext_inreg V2I32:$Rs, v2i16)), + (A2_combinew (A2_sxth (HiReg $Rs)), (A2_sxth (LoReg $Rs)))>; + + +// Multiplies two v2i16 and returns a v2i32. We are using here the +// saturating multiply, as hexagon does not provide a non saturating +// vector multiply, and saturation does not impact the result that is +// in double precision of the operands. + +// Multiplies two v2i16 vectors: as Hexagon does not have a multiply +// with the C semantics for this one, this pattern uses the half word +// multiply vmpyh that takes two v2i16 and returns a v2i32. This is +// then truncated to fit this back into a v2i16 and to simulate the +// wrap around semantics for unsigned in C. +def vmpyh: OutPatFrag<(ops node:$Rs, node:$Rt), + (M2_vmpy2s_s0 (i32 $Rs), (i32 $Rt))>; + +def: Pat<(v2i16 (mul V2I16:$Rs, V2I16:$Rt)), + (LoReg (S2_vtrunewh (v2i32 (A2_combineii 0, 0)), + (v2i32 (vmpyh V2I16:$Rs, V2I16:$Rt))))>; + +// Multiplies two v4i16 vectors. +def: Pat<(v4i16 (mul V4I16:$Rs, V4I16:$Rt)), + (S2_vtrunewh (vmpyh (HiReg $Rs), (HiReg $Rt)), + (vmpyh (LoReg $Rs), (LoReg $Rt)))>; + +def VMPYB_no_V5: OutPatFrag<(ops node:$Rs, node:$Rt), + (S2_vtrunewh (vmpyh (HiReg (S2_vsxtbh $Rs)), (HiReg (S2_vsxtbh $Rt))), + (vmpyh (LoReg (S2_vsxtbh $Rs)), (LoReg (S2_vsxtbh $Rt))))>; + +// Multiplies two v4i8 vectors. +def: Pat<(v4i8 (mul V4I8:$Rs, V4I8:$Rt)), + (S2_vtrunehb (M5_vmpybsu V4I8:$Rs, V4I8:$Rt))>, + Requires<[HasV5T]>; + +def: Pat<(v4i8 (mul V4I8:$Rs, V4I8:$Rt)), + (S2_vtrunehb (VMPYB_no_V5 V4I8:$Rs, V4I8:$Rt))>; + +// Multiplies two v8i8 vectors. +def: Pat<(v8i8 (mul V8I8:$Rs, V8I8:$Rt)), + (A2_combinew (S2_vtrunehb (M5_vmpybsu (HiReg $Rs), (HiReg $Rt))), + (S2_vtrunehb (M5_vmpybsu (LoReg $Rs), (LoReg $Rt))))>, + Requires<[HasV5T]>; + +def: Pat<(v8i8 (mul V8I8:$Rs, V8I8:$Rt)), + (A2_combinew (S2_vtrunehb (VMPYB_no_V5 (HiReg $Rs), (HiReg $Rt))), + (S2_vtrunehb (VMPYB_no_V5 (LoReg $Rs), (LoReg $Rt))))>; + +def SDTHexagonBinOp64 : SDTypeProfile<1, 2, + [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisVT<0, i64>]>; + +def HexagonSHUFFEB: SDNode<"HexagonISD::SHUFFEB", SDTHexagonBinOp64>; +def HexagonSHUFFEH: SDNode<"HexagonISD::SHUFFEH", SDTHexagonBinOp64>; +def HexagonSHUFFOB: SDNode<"HexagonISD::SHUFFOB", SDTHexagonBinOp64>; +def HexagonSHUFFOH: SDNode<"HexagonISD::SHUFFOH", SDTHexagonBinOp64>; + +class ShufflePat + : Pat<(i64 (Op DoubleRegs:$src1, DoubleRegs:$src2)), + (i64 (MI DoubleRegs:$src1, DoubleRegs:$src2))>; + +// Shuffles even bytes for i=0..3: A[2*i].b = C[2*i].b; A[2*i+1].b = B[2*i].b +def: ShufflePat; + +// Shuffles odd bytes for i=0..3: A[2*i].b = C[2*i+1].b; A[2*i+1].b = B[2*i+1].b +def: ShufflePat; + +// Shuffles even half for i=0,1: A[2*i].h = C[2*i].h; A[2*i+1].h = B[2*i].h +def: ShufflePat; + +// Shuffles odd half for i=0,1: A[2*i].h = C[2*i+1].h; A[2*i+1].h = B[2*i+1].h +def: ShufflePat; + + +// Truncated store from v4i16 to v4i8. +def truncstorev4i8: PatFrag<(ops node:$val, node:$ptr), + (truncstore node:$val, node:$ptr), + [{ return cast(N)->getMemoryVT() == MVT::v4i8; }]>; + +// Truncated store from v2i32 to v2i16. +def truncstorev2i16: PatFrag<(ops node:$val, node:$ptr), + (truncstore node:$val, node:$ptr), + [{ return cast(N)->getMemoryVT() == MVT::v2i16; }]>; + +def: Pat<(truncstorev2i16 V2I32:$Rs, I32:$Rt), + (S2_storeri_io I32:$Rt, 0, (LoReg (S2_packhl (HiReg $Rs), + (LoReg $Rs))))>; + +def: Pat<(truncstorev4i8 V4I16:$Rs, I32:$Rt), + (S2_storeri_io I32:$Rt, 0, (S2_vtrunehb V4I16:$Rs))>; + + +// Zero and sign extended load from v2i8 into v2i16. +def zextloadv2i8: PatFrag<(ops node:$ptr), (zextload node:$ptr), + [{ return cast(N)->getMemoryVT() == MVT::v2i8; }]>; + +def sextloadv2i8: PatFrag<(ops node:$ptr), (sextload node:$ptr), + [{ return cast(N)->getMemoryVT() == MVT::v2i8; }]>; + +def: Pat<(v2i16 (zextloadv2i8 I32:$Rs)), + (LoReg (v4i16 (S2_vzxtbh (L2_loadruh_io I32:$Rs, 0))))>; + +def: Pat<(v2i16 (sextloadv2i8 I32:$Rs)), + (LoReg (v4i16 (S2_vsxtbh (L2_loadrh_io I32:$Rs, 0))))>; + +def: Pat<(v2i32 (zextloadv2i8 I32:$Rs)), + (S2_vzxthw (LoReg (v4i16 (S2_vzxtbh (L2_loadruh_io I32:$Rs, 0)))))>; + +def: Pat<(v2i32 (sextloadv2i8 I32:$Rs)), + (S2_vsxthw (LoReg (v4i16 (S2_vsxtbh (L2_loadrh_io I32:$Rs, 0)))))>; + -- 2.7.4