From 28bb02a8c7ac87c9de1b66938f268d66cdd1e67a Mon Sep 17 00:00:00 2001 From: Colin LeMahieu Date: Mon, 5 Jan 2015 20:56:41 +0000 Subject: [PATCH] [Hexagon] Adding rounding reg/reg variants, accumulating multiplies, and accumulating shifts. llvm-svn: 225201 --- llvm/lib/Target/Hexagon/HexagonInstrInfoV4.td | 227 ++++++++++++++++------ llvm/test/MC/Disassembler/Hexagon/xtype_alu.txt | 6 + llvm/test/MC/Disassembler/Hexagon/xtype_mpy.txt | 10 + llvm/test/MC/Disassembler/Hexagon/xtype_shift.txt | 16 ++ 4 files changed, 202 insertions(+), 57 deletions(-) diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfoV4.td b/llvm/lib/Target/Hexagon/HexagonInstrInfoV4.td index 7b7fea0..ae7b7d4 100644 --- a/llvm/lib/Target/Hexagon/HexagonInstrInfoV4.td +++ b/llvm/lib/Target/Hexagon/HexagonInstrInfoV4.td @@ -2020,74 +2020,127 @@ def: Pat<(i1 (setne (and I32:$Rs, I32:$Rt), I32:$Rt)), // XTYPE/MPY + //===----------------------------------------------------------------------===// -// Multiply and user lower result. -// Rd=add(#u6,mpyi(Rs,#U6)) -let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, opExtentBits = 6, -validSubTargets = HasV4SubT in -def ADDi_MPYri_V4 : MInst<(outs IntRegs:$dst), - (ins u6Ext:$src1, IntRegs:$src2, u6Imm:$src3), - "$dst = add(#$src1, mpyi($src2, #$src3))", - [(set (i32 IntRegs:$dst), - (add (mul (i32 IntRegs:$src2), u6ImmPred:$src3), - u6ExtPred:$src1))]>, - Requires<[HasV4T]>; +// Rd=add(#u6,mpyi(Rs,#U6)) -- Multiply by immed and add immed. + +let hasNewValue = 1, isExtendable = 1, opExtentBits = 6, opExtendable = 1, + isCodeGenOnly = 0 in +def M4_mpyri_addi : MInst<(outs IntRegs:$Rd), + (ins u6Ext:$u6, IntRegs:$Rs, u6Imm:$U6), + "$Rd = add(#$u6, mpyi($Rs, #$U6))" , + [(set (i32 IntRegs:$Rd), + (add (mul (i32 IntRegs:$Rs), u6ImmPred:$U6), + u6ExtPred:$u6))] ,"",ALU64_tc_3x_SLOT23> { + bits<5> Rd; + bits<6> u6; + bits<5> Rs; + bits<6> U6; + + let IClass = 0b1101; + + let Inst{27-24} = 0b1000; + let Inst{23} = U6{5}; + let Inst{22-21} = u6{5-4}; + let Inst{20-16} = Rs; + let Inst{13} = u6{3}; + let Inst{12-8} = Rd; + let Inst{7-5} = u6{2-0}; + let Inst{4-0} = U6{4-0}; + } + +// Rd=add(#u6,mpyi(Rs,Rt)) +let CextOpcode = "ADD_MPY", InputType = "imm", hasNewValue = 1, + isExtendable = 1, opExtentBits = 6, opExtendable = 1, isCodeGenOnly = 0 in +def M4_mpyrr_addi : MInst <(outs IntRegs:$Rd), + (ins u6Ext:$u6, IntRegs:$Rs, IntRegs:$Rt), + "$Rd = add(#$u6, mpyi($Rs, $Rt))" , + [(set (i32 IntRegs:$Rd), + (add (mul (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)), u6ExtPred:$u6))], + "", ALU64_tc_3x_SLOT23>, ImmRegRel { + bits<5> Rd; + bits<6> u6; + bits<5> Rs; + bits<5> Rt; + + let IClass = 0b1101; + + let Inst{27-23} = 0b01110; + let Inst{22-21} = u6{5-4}; + let Inst{20-16} = Rs; + let Inst{13} = u6{3}; + let Inst{12-8} = Rt; + let Inst{7-5} = u6{2-0}; + let Inst{4-0} = Rd; + } + +let hasNewValue = 1 in +class T_AddMpy + : ALU64Inst <(outs IntRegs:$dst), ins, + "$dst = add($src1, mpyi("#!if(MajOp,"$src3, #$src2))", + "#$src2, $src3))"), + [(set (i32 IntRegs:$dst), + (add (i32 IntRegs:$src1), (mul (i32 IntRegs:$src3), ImmPred:$src2)))], + "", ALU64_tc_3x_SLOT23> { + bits<5> dst; + bits<5> src1; + bits<8> src2; + bits<5> src3; + + let IClass = 0b1101; + + bits<6> ImmValue = !if(MajOp, src2{5-0}, src2{7-2}); + + let Inst{27-24} = 0b1111; + let Inst{23} = MajOp; + let Inst{22-21} = ImmValue{5-4}; + let Inst{20-16} = src3; + let Inst{13} = ImmValue{3}; + let Inst{12-8} = dst; + let Inst{7-5} = ImmValue{2-0}; + let Inst{4-0} = src1; + } + +let isCodeGenOnly = 0 in +def M4_mpyri_addr_u2 : T_AddMpy<0b0, u6_2ImmPred, + (ins IntRegs:$src1, u6_2Imm:$src2, IntRegs:$src3)>; + +let isExtendable = 1, opExtentBits = 6, opExtendable = 3, + CextOpcode = "ADD_MPY", InputType = "imm", isCodeGenOnly = 0 in +def M4_mpyri_addr : T_AddMpy<0b1, u6ExtPred, + (ins IntRegs:$src1, IntRegs:$src3, u6Ext:$src2)>, ImmRegRel; + +// Rx=add(Ru,mpyi(Rx,Rs)) +let validSubTargets = HasV4SubT, CextOpcode = "ADD_MPY", InputType = "reg", + hasNewValue = 1, isCodeGenOnly = 0 in +def M4_mpyrr_addr: MInst_acc <(outs IntRegs:$Rx), + (ins IntRegs:$Ru, IntRegs:$_src_, IntRegs:$Rs), + "$Rx = add($Ru, mpyi($_src_, $Rs))", + [(set (i32 IntRegs:$Rx), (add (i32 IntRegs:$Ru), + (mul (i32 IntRegs:$_src_), (i32 IntRegs:$Rs))))], + "$_src_ = $Rx", M_tc_3x_SLOT23>, ImmRegRel { + bits<5> Rx; + bits<5> Ru; + bits<5> Rs; + + let IClass = 0b1110; + + let Inst{27-21} = 0b0011000; + let Inst{12-8} = Rx; + let Inst{4-0} = Ru; + let Inst{20-16} = Rs; + } // Rd=add(##,mpyi(Rs,#U6)) def : Pat <(add (mul (i32 IntRegs:$src2), u6ImmPred:$src3), (HexagonCONST32 tglobaladdr:$src1)), - (i32 (ADDi_MPYri_V4 tglobaladdr:$src1, IntRegs:$src2, + (i32 (M4_mpyri_addi tglobaladdr:$src1, IntRegs:$src2, u6ImmPred:$src3))>; -// Rd=add(#u6,mpyi(Rs,Rt)) -let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, opExtentBits = 6, -validSubTargets = HasV4SubT, InputType = "imm", CextOpcode = "ADD_MPY" in -def ADDi_MPYrr_V4 : MInst<(outs IntRegs:$dst), - (ins u6Ext:$src1, IntRegs:$src2, IntRegs:$src3), - "$dst = add(#$src1, mpyi($src2, $src3))", - [(set (i32 IntRegs:$dst), - (add (mul (i32 IntRegs:$src2), (i32 IntRegs:$src3)), - u6ExtPred:$src1))]>, - Requires<[HasV4T]>, ImmRegRel; - // Rd=add(##,mpyi(Rs,Rt)) def : Pat <(add (mul (i32 IntRegs:$src2), (i32 IntRegs:$src3)), (HexagonCONST32 tglobaladdr:$src1)), - (i32 (ADDi_MPYrr_V4 tglobaladdr:$src1, IntRegs:$src2, + (i32 (M4_mpyrr_addi tglobaladdr:$src1, IntRegs:$src2, IntRegs:$src3))>; -// Rd=add(Ru,mpyi(#u6:2,Rs)) -let validSubTargets = HasV4SubT in -def ADDr_MPYir_V4 : MInst<(outs IntRegs:$dst), - (ins IntRegs:$src1, u6Imm:$src2, IntRegs:$src3), - "$dst = add($src1, mpyi(#$src2, $src3))", - [(set (i32 IntRegs:$dst), - (add (i32 IntRegs:$src1), (mul (i32 IntRegs:$src3), - u6_2ImmPred:$src2)))]>, - Requires<[HasV4T]>; - -// Rd=add(Ru,mpyi(Rs,#u6)) -let isExtendable = 1, opExtendable = 3, isExtentSigned = 0, opExtentBits = 6, -validSubTargets = HasV4SubT, InputType = "imm", CextOpcode = "ADD_MPY" in -def ADDr_MPYri_V4 : MInst<(outs IntRegs:$dst), - (ins IntRegs:$src1, IntRegs:$src2, u6Ext:$src3), - "$dst = add($src1, mpyi($src2, #$src3))", - [(set (i32 IntRegs:$dst), - (add (i32 IntRegs:$src1), (mul (i32 IntRegs:$src2), - u6ExtPred:$src3)))]>, - Requires<[HasV4T]>, ImmRegRel; - -// Rx=add(Ru,mpyi(Rx,Rs)) -let validSubTargets = HasV4SubT, InputType = "reg", CextOpcode = "ADD_MPY" in -def ADDr_MPYrr_V4 : MInst_acc<(outs IntRegs:$dst), - (ins IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), - "$dst = add($src1, mpyi($src2, $src3))", - [(set (i32 IntRegs:$dst), - (add (i32 IntRegs:$src1), (mul (i32 IntRegs:$src2), - (i32 IntRegs:$src3))))], - "$src2 = $dst">, - Requires<[HasV4T]>, ImmRegRel; - - // Polynomial multiply words // Rdd=pmpyw(Rs,Rt) // Rxx^=pmpyw(Rs,Rt) @@ -2125,6 +2178,66 @@ def ADDr_MPYrr_V4 : MInst_acc<(outs IntRegs:$dst), //===----------------------------------------------------------------------===// // XTYPE/SHIFT + //===----------------------------------------------------------------------===// +// Shift by immediate and accumulate/logical. +// Rx=add(#u8,asl(Rx,#U5)) Rx=add(#u8,lsr(Rx,#U5)) +// Rx=sub(#u8,asl(Rx,#U5)) Rx=sub(#u8,lsr(Rx,#U5)) +// Rx=and(#u8,asl(Rx,#U5)) Rx=and(#u8,lsr(Rx,#U5)) +// Rx=or(#u8,asl(Rx,#U5)) Rx=or(#u8,lsr(Rx,#U5)) +let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, opExtentBits = 8, + hasNewValue = 1, opNewValue = 0, validSubTargets = HasV4SubT in +class T_S4_ShiftOperate MajOp, InstrItinClass Itin> + : MInst_acc<(outs IntRegs:$Rd), (ins u8Ext:$u8, IntRegs:$Rx, u5Imm:$U5), + "$Rd = "#MnOp#"(#$u8, "#MnSh#"($Rx, #$U5))", + [(set (i32 IntRegs:$Rd), + (Op (Sh I32:$Rx, u5ImmPred:$U5), u8ExtPred:$u8))], + "$Rd = $Rx", Itin> { + + bits<5> Rd; + bits<8> u8; + bits<5> Rx; + bits<5> U5; + + let IClass = 0b1101; + let Inst{27-24} = 0b1110; + let Inst{23-21} = u8{7-5}; + let Inst{20-16} = Rd; + let Inst{13} = u8{4}; + let Inst{12-8} = U5; + let Inst{7-5} = u8{3-1}; + let Inst{4} = asl_lsr; + let Inst{3} = u8{0}; + let Inst{2-1} = MajOp; +} + +multiclass T_ShiftOperate MajOp, + InstrItinClass Itin> { + def _asl_ri : T_S4_ShiftOperate; + def _lsr_ri : T_S4_ShiftOperate; +} + +let AddedComplexity = 200, isCodeGenOnly = 0 in { + defm S4_addi : T_ShiftOperate<"add", add, 0b10, ALU64_tc_2_SLOT23>; + defm S4_andi : T_ShiftOperate<"and", and, 0b00, ALU64_tc_2_SLOT23>; +} + +let AddedComplexity = 30, isCodeGenOnly = 0 in +defm S4_ori : T_ShiftOperate<"or", or, 0b01, ALU64_tc_1_SLOT23>; + +let isCodeGenOnly = 0 in +defm S4_subi : T_ShiftOperate<"sub", sub, 0b11, ALU64_tc_1_SLOT23>; + + +// Rd=[cround|round](Rs,Rt) +let hasNewValue = 1, Itinerary = S_3op_tc_2_SLOT23, isCodeGenOnly = 0 in { + def A4_cround_rr : T_S3op_3 < "cround", IntRegs, 0b11, 0b00>; + def A4_round_rr : T_S3op_3 < "round", IntRegs, 0b11, 0b10>; +} + +// Rd=round(Rs,Rt):sat +let hasNewValue = 1, Defs = [USR_OVF], Itinerary = S_3op_tc_2_SLOT23, + isCodeGenOnly = 0 in +def A4_round_rr_sat : T_S3op_3 < "round", IntRegs, 0b11, 0b11, 1>; // Shift by immediate and accumulate. // Rx=add(#u8,asl(Rx,#U5)) diff --git a/llvm/test/MC/Disassembler/Hexagon/xtype_alu.txt b/llvm/test/MC/Disassembler/Hexagon/xtype_alu.txt index 7d25fdb..b86165e 100644 --- a/llvm/test/MC/Disassembler/Hexagon/xtype_alu.txt +++ b/llvm/test/MC/Disassembler/Hexagon/xtype_alu.txt @@ -122,6 +122,12 @@ # CHECK: r17 = round(r21, #31) 0xd1 0xdf 0xf5 0x8c # CHECK: r17 = round(r21, #31):sat +0x11 0xdf 0xd5 0xc6 +# CHECK: r17 = cround(r21, r31) +0x91 0xdf 0xd5 0xc6 +# CHECK: r17 = round(r21, r31) +0xd1 0xdf 0xd5 0xc6 +# CHECK: r17 = round(r21, r31):sat 0x71 0xd5 0x1f 0xef # CHECK: r17 += sub(r21, r31) 0x11 0xd5 0x3f 0xd5 diff --git a/llvm/test/MC/Disassembler/Hexagon/xtype_mpy.txt b/llvm/test/MC/Disassembler/Hexagon/xtype_mpy.txt index f69e409..b6ccaa6 100644 --- a/llvm/test/MC/Disassembler/Hexagon/xtype_mpy.txt +++ b/llvm/test/MC/Disassembler/Hexagon/xtype_mpy.txt @@ -1,5 +1,15 @@ # RUN: llvm-mc --triple hexagon -disassemble < %s | FileCheck %s +0xb1 0xdf 0x35 0xd7 +# CHECK: r17 = add(#21, mpyi(r21, r31)) +0xbf 0xd1 0x35 0xd8 +# CHECK: r17 = add(#21, mpyi(r21, #31)) +0xb5 0xd1 0x3f 0xdf +# CHECK: r17 = add(r21, mpyi(#84, r31)) +0xf5 0xf1 0xb5 0xdf +# CHECK: r17 = add(r21, mpyi(r21, #31)) +0x15 0xd1 0x1f 0xe3 +# CHECK: r17 = add(r21, mpyi(r17, r31)) 0xf1 0xc3 0x15 0xe0 # CHECK: r17 =+ mpyi(r21, #31) 0xf1 0xc3 0x95 0xe0 diff --git a/llvm/test/MC/Disassembler/Hexagon/xtype_shift.txt b/llvm/test/MC/Disassembler/Hexagon/xtype_shift.txt index f18407a..5e26c1b6 100644 --- a/llvm/test/MC/Disassembler/Hexagon/xtype_shift.txt +++ b/llvm/test/MC/Disassembler/Hexagon/xtype_shift.txt @@ -36,6 +36,14 @@ # CHECK: r17 += lsr(r21, #31) 0xd1 0xdf 0x15 0x8e # CHECK: r17 += asl(r21, #31) +0x4c 0xf7 0x11 0xde +# CHECK: r17 = add(#21, asl(r17, #23)) +0x4e 0xf7 0x11 0xde +# CHECK: r17 = sub(#21, asl(r17, #23)) +0x5c 0xf7 0x11 0xde +# CHECK: r17 = add(#21, lsr(r17, #23)) +0x5e 0xf7 0x11 0xde +# CHECK: r17 = sub(#21, lsr(r17, #23)) 0xf1 0xd5 0x1f 0xc4 # CHECK: r17 = addasl(r21, r31, #7) 0x10 0xdf 0x54 0x82 @@ -54,6 +62,14 @@ # CHECK: r17:16 ^= lsr(r21:20, #31) 0x50 0xdf 0x94 0x82 # CHECK: r17:16 ^= asl(r21:20, #31) +0x48 0xff 0x11 0xde +# CHECK: r17 = and(#21, asl(r17, #31)) +0x4a 0xff 0x11 0xde +# CHECK: r17 = or(#21, asl(r17, #31)) +0x58 0xff 0x11 0xde +# CHECK: r17 = and(#21, lsr(r17, #31)) +0x5a 0xff 0x11 0xde +# CHECK: r17 = or(#21, lsr(r17, #31)) 0x11 0xdf 0x55 0x8e # CHECK: r17 &= asr(r21, #31) 0x31 0xdf 0x55 0x8e -- 2.7.4