From: Chuang-Yu Cheng Date: Mon, 28 Mar 2016 07:38:01 +0000 (+0000) Subject: [Power9] Implement new vsx instructions: quad-precision move, fp-arithmetic X-Git-Tag: llvmorg-3.9.0-rc1~10774 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=5663848996b01faeb58276be25d5e364378fc1c9;p=platform%2Fupstream%2Fllvm.git [Power9] Implement new vsx instructions: quad-precision move, fp-arithmetic This change implements the following vsx instructions: - quad-precision move xscpsgnqp, xsabsqp, xsnegqp, xsnabsqp - quad-precision fp-arithmetic xsaddqp(o) xsdivqp(o) xsmulqp(o) xssqrtqp(o) xssubqp(o) xsmaddqp(o) xsmsubqp(o) xsnmaddqp(o) xsnmsubqp(o) 22 instructions Thanks Nemanja and Kit for careful review and invaluable discussion! Reviewers: hal, nemanja, kbarton, tjablin, amehsan http://reviews.llvm.org/D16110 llvm-svn: 264565 --- diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td index e17bfd7..8b035e1 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td +++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td @@ -1819,6 +1819,55 @@ let Predicates = [HasP9Vector] in { : XX3Form; + // [PO VRT VRA VRB XO /] + class X_VT5_VA5_VB5 opcode, bits<10> xo, string opc, + list pattern> + : XForm_1; + + // [PO VRT VRA VRB XO RO], Round to Odd version of [PO VRT VRA VRB XO /] + class X_VT5_VA5_VB5_Ro opcode, bits<10> xo, string opc, + list pattern> + : X_VT5_VA5_VB5, isDOT; + + //===--------------------------------------------------------------------===// + // Quad-Precision Scalar Move Instructions: + + // Copy Sign + def XSCPSGNQP : X_VT5_VA5_VB5<63, 100, "xscpsgnqp", []>; + + // Absolute/Negative-Absolute/Negate + def XSABSQP : X_VT5_XO5_VB5<63, 0, 804, "xsabsqp" , []>; + def XSNABSQP : X_VT5_XO5_VB5<63, 8, 804, "xsnabsqp", []>; + def XSNEGQP : X_VT5_XO5_VB5<63, 16, 804, "xsnegqp" , []>; + + //===--------------------------------------------------------------------===// + // Quad-Precision Scalar Floating-Point Arithmetic Instructions: + + // Add/Divide/Multiply/Subtract + def XSADDQP : X_VT5_VA5_VB5 <63, 4, "xsaddqp" , []>; + def XSADDQPO : X_VT5_VA5_VB5_Ro<63, 4, "xsaddqpo", []>; + def XSDIVQP : X_VT5_VA5_VB5 <63, 548, "xsdivqp" , []>; + def XSDIVQPO : X_VT5_VA5_VB5_Ro<63, 548, "xsdivqpo", []>; + def XSMULQP : X_VT5_VA5_VB5 <63, 36, "xsmulqp" , []>; + def XSMULQPO : X_VT5_VA5_VB5_Ro<63, 36, "xsmulqpo", []>; + def XSSUBQP : X_VT5_VA5_VB5 <63, 516, "xssubqp" , []>; + def XSSUBQPO : X_VT5_VA5_VB5_Ro<63, 516, "xssubqpo", []>; + + // Square-Root + def XSSQRTQP : X_VT5_XO5_VB5 <63, 27, 804, "xssqrtqp" , []>; + def XSSQRTQPO : X_VT5_XO5_VB5_Ro<63, 27, 804, "xssqrtqpo", []>; + + // (Negative) Multiply-{Add/Subtract} + def XSMADDQP : X_VT5_VA5_VB5 <63, 388, "xsmaddqp" , []>; + def XSMADDQPO : X_VT5_VA5_VB5_Ro<63, 388, "xsmaddqpo" , []>; + def XSMSUBQP : X_VT5_VA5_VB5 <63, 420, "xsmsubqp" , []>; + def XSMSUBQPO : X_VT5_VA5_VB5_Ro<63, 420, "xsmsubqpo" , []>; + def XSNMADDQP : X_VT5_VA5_VB5 <63, 452, "xsnmaddqp" , []>; + def XSNMADDQPO: X_VT5_VA5_VB5_Ro<63, 452, "xsnmaddqpo", []>; + def XSNMSUBQP : X_VT5_VA5_VB5 <63, 484, "xsnmsubqp" , []>; + def XSNMSUBQPO: X_VT5_VA5_VB5_Ro<63, 484, "xsnmsubqpo", []>; + //===--------------------------------------------------------------------===// // Quad/Double-Precision Compare Instructions: diff --git a/llvm/lib/Target/PowerPC/README_P9.txt b/llvm/lib/Target/PowerPC/README_P9.txt index 5482d4a..a58f522 100644 --- a/llvm/lib/Target/PowerPC/README_P9.txt +++ b/llvm/lib/Target/PowerPC/README_P9.txt @@ -141,6 +141,128 @@ Altivec: VX1_Int_Ty< 65, "vmul10ecuq", int_ppc_altivec_vmul10ecuq, v1i128>; VSX: +- QP Copy Sign: xscpsgnqp + . Similar to xscpsgndp + . (set f128:$vT, (fcopysign f128:$vB, f128:$vA) + +- QP Absolute/Negative-Absolute/Negate: xsabsqp xsnabsqp xsnegqp + . Similar to xsabsdp/xsnabsdp/xsnegdp + . (set f128:$vT, (fabs f128:$vB)) // xsabsqp + (set f128:$vT, (fneg (fabs f128:$vB))) // xsnabsqp + (set f128:$vT, (fneg f128:$vB)) // xsnegqp + +- QP Add/Divide/Multiply/Subtract/Square-Root: + xsaddqp xsdivqp xsmulqp xssubqp xssqrtqp + . Similar to xsadddp + . isCommutable = 1 + (set f128:$vT, (fadd f128:$vA, f128:$vB)) // xsaddqp + (set f128:$vT, (fmul f128:$vA, f128:$vB)) // xsmulqp + + . isCommutable = 0 + (set f128:$vT, (fdiv f128:$vA, f128:$vB)) // xsdivqp + (set f128:$vT, (fsub f128:$vA, f128:$vB)) // xssubqp + (set f128:$vT, (fsqrt f128:$vB))) // xssqrtqp + +- Round to Odd of QP Add/Divide/Multiply/Subtract/Square-Root: + xsaddqpo xsdivqpo xsmulqpo xssubqpo xssqrtqpo + . Similar to xsrsqrtedp?? + def XSRSQRTEDP : XX2Form<60, 74, + (outs vsfrc:$XT), (ins vsfrc:$XB), + "xsrsqrtedp $XT, $XB", IIC_VecFP, + [(set f64:$XT, (PPCfrsqrte f64:$XB))]>; + + . Define DAG Node in PPCInstrInfo.td: + def PPCfaddrto: SDNode<"PPCISD::FADDRTO", SDTFPBinOp, []>; + def PPCfdivrto: SDNode<"PPCISD::FDIVRTO", SDTFPBinOp, []>; + def PPCfmulrto: SDNode<"PPCISD::FMULRTO", SDTFPBinOp, []>; + def PPCfsubrto: SDNode<"PPCISD::FSUBRTO", SDTFPBinOp, []>; + def PPCfsqrtrto: SDNode<"PPCISD::FSQRTRTO", SDTFPUnaryOp, []>; + + DAG patterns of each instruction (PPCInstrVSX.td): + . isCommutable = 1 + (set f128:$vT, (PPCfaddrto f128:$vA, f128:$vB)) // xsaddqpo + (set f128:$vT, (PPCfmulrto f128:$vA, f128:$vB)) // xsmulqpo + + . isCommutable = 0 + (set f128:$vT, (PPCfdivrto f128:$vA, f128:$vB)) // xsdivqpo + (set f128:$vT, (PPCfsubrto f128:$vA, f128:$vB)) // xssubqpo + (set f128:$vT, (PPCfsqrtrto f128:$vB)) // xssqrtqpo + +- QP (Negative) Multiply-{Add/Subtract}: xsmaddqp xsmsubqp xsnmaddqp xsnmsubqp + . Ref: xsmaddadp/xsmsubadp/xsnmaddadp/xsnmsubadp + + . isCommutable = 1 + // xsmaddqp + [(set f128:$vT, (fma f128:$vA, f128:$vB, f128:$vTi))]>, + RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">, + AltVSXFMARel; + + // xsmsubqp + [(set f128:$vT, (fma f128:$vA, f128:$vB, (fneg f128:$vTi)))]>, + RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">, + AltVSXFMARel; + + // xsnmaddqp + [(set f128:$vT, (fneg (fma f128:$vA, f128:$vB, f128:$vTi)))]>, + RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">, + AltVSXFMARel; + + // xsnmsubqp + [(set f128:$vT, (fneg (fma f128:$vA, f128:$vB, (fneg f128:$vTi))))]>, + RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">, + AltVSXFMARel; + +- Round to Odd of QP (Negative) Multiply-{Add/Subtract}: + xsmaddqpo xsmsubqpo xsnmaddqpo xsnmsubqpo + . Similar to xsrsqrtedp?? + + . Define DAG Node in PPCInstrInfo.td: + def PPCfmarto: SDNode<"PPCISD::FMARTO", SDTFPTernaryOp, []>; + + It looks like we only need to define "PPCfmarto" for these instructions, + because according to PowerISA_V3.0, these instructions perform RTO on + fma's result: + xsmaddqp(o) + v ← bfp_MULTIPLY_ADD(src1, src3, src2) + rnd ← bfp_ROUND_TO_BFP128(RO, FPSCR.RN, v) + result ← bfp_CONVERT_TO_BFP128(rnd) + + xsmsubqp(o) + v ← bfp_MULTIPLY_ADD(src1, src3, bfp_NEGATE(src2)) + rnd ← bfp_ROUND_TO_BFP128(RO, FPSCR.RN, v) + result ← bfp_CONVERT_TO_BFP128(rnd) + + xsnmaddqp(o) + v ← bfp_MULTIPLY_ADD(src1,src3,src2) + rnd ← bfp_NEGATE(bfp_ROUND_TO_BFP128(RO, FPSCR.RN, v)) + result ← bfp_CONVERT_TO_BFP128(rnd) + + xsnmsubqp(o) + v ← bfp_MULTIPLY_ADD(src1, src3, bfp_NEGATE(src2)) + rnd ← bfp_NEGATE(bfp_ROUND_TO_BFP128(RO, FPSCR.RN, v)) + result ← bfp_CONVERT_TO_BFP128(rnd) + + DAG patterns of each instruction (PPCInstrVSX.td): + . isCommutable = 1 + // xsmaddqpo + [(set f128:$vT, (PPCfmarto f128:$vA, f128:$vB, f128:$vTi))]>, + RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">, + AltVSXFMARel; + + // xsmsubqpo + [(set f128:$vT, (PPCfmarto f128:$vA, f128:$vB, (fneg f128:$vTi)))]>, + RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">, + AltVSXFMARel; + + // xsnmaddqpo + [(set f128:$vT, (fneg (PPCfmarto f128:$vA, f128:$vB, f128:$vTi)))]>, + RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">, + AltVSXFMARel; + + // xsnmsubqpo + [(set f128:$vT, (fneg (PPCfmarto f128:$vA, f128:$vB, (fneg f128:$vTi))))]>, + RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">, + AltVSXFMARel; - QP Compare Ordered/Unordered: xscmpoqp xscmpuqp . ref: XSCMPUDP diff --git a/llvm/test/MC/Disassembler/PowerPC/vsx.txt b/llvm/test/MC/Disassembler/PowerPC/vsx.txt index ed7893d..6159c3a 100644 --- a/llvm/test/MC/Disassembler/PowerPC/vsx.txt +++ b/llvm/test/MC/Disassembler/PowerPC/vsx.txt @@ -542,6 +542,72 @@ # Power9 Instructions: +# CHECK: xscpsgnqp 7, 31, 27 +0xfc 0xff 0xd8 0xc8 + +# CHECK: xsabsqp 7, 27 +0xfc 0xe0 0xde 0x48 + +# CHECK: xsnegqp 7, 27 +0xfc 0xf0 0xde 0x48 + +# CHECK: xsnabsqp 7, 27 +0xfc 0xe8 0xde 0x48 + +# CHECK: xsaddqp 7, 31, 27 +0xfc 0xff 0xd8 0x08 + +# CHECK: xsaddqpo 7, 31, 27 +0xfc 0xff 0xd8 0x09 + +# CHECK: xsdivqp 7, 31, 27 +0xfc 0xff 0xdc 0x48 + +# CHECK: xsdivqpo 7, 31, 27 +0xfc 0xff 0xdc 0x49 + +# CHECK: xsmulqp 7, 31, 27 +0xfc 0xff 0xd8 0x48 + +# CHECK: xsmulqpo 7, 31, 27 +0xfc 0xff 0xd8 0x49 + +# CHECK: xssqrtqp 7, 31 +0xfc 0xfb 0xfe 0x48 + +# CHECK: xssqrtqpo 7, 31 +0xfc 0xfb 0xfe 0x49 + +# CHECK: xssubqp 7, 31, 27 +0xfc 0xff 0xdc 0x08 + +# CHECK: xssubqpo 7, 31, 27 +0xfc 0xff 0xdc 0x09 + +# CHECK: xsmaddqp 7, 31, 27 +0xfc 0xff 0xdb 0x08 + +# CHECK: xsmaddqpo 7, 31, 27 +0xfc 0xff 0xdb 0x09 + +# CHECK: xsmsubqp 7, 31, 27 +0xfc 0xff 0xdb 0x48 + +# CHECK: xsmsubqpo 7, 31, 27 +0xfc 0xff 0xdb 0x49 + +# CHECK: xsnmaddqp 7, 31, 27 +0xfc 0xff 0xdb 0x88 + +# CHECK: xsnmaddqpo 7, 31, 27 +0xfc 0xff 0xdb 0x89 + +# CHECK: xsnmsubqp 7, 31, 27 +0xfc 0xff 0xdb 0xc8 + +# CHECK: xsnmsubqpo 7, 31, 27 +0xfc 0xff 0xdb 0xc9 + # CHECK: xscmpoqp 6, 31, 27 0xff 0x1f 0xd9 0x08 diff --git a/llvm/test/MC/PowerPC/vsx.s b/llvm/test/MC/PowerPC/vsx.s index ce341c3..a2f111e 100644 --- a/llvm/test/MC/PowerPC/vsx.s +++ b/llvm/test/MC/PowerPC/vsx.s @@ -550,6 +550,80 @@ # Power9 Instructions: +# Copy Sign +# CHECK-BE: xscpsgnqp 7, 31, 27 # encoding: [0xfc,0xff,0xd8,0xc8] +# CHECK-LE: xscpsgnqp 7, 31, 27 # encoding: [0xc8,0xd8,0xff,0xfc] + xscpsgnqp 7, 31, 27 + +# Absolute/Negative Absolute/Negate +# CHECK-BE: xsabsqp 7, 27 # encoding: [0xfc,0xe0,0xde,0x48] +# CHECK-LE: xsabsqp 7, 27 # encoding: [0x48,0xde,0xe0,0xfc] + xsabsqp 7, 27 +# CHECK-BE: xsnegqp 7, 27 # encoding: [0xfc,0xf0,0xde,0x48] +# CHECK-LE: xsnegqp 7, 27 # encoding: [0x48,0xde,0xf0,0xfc] + xsnegqp 7, 27 +# CHECK-BE: xsnabsqp 7, 27 # encoding: [0xfc,0xe8,0xde,0x48] +# CHECK-LE: xsnabsqp 7, 27 # encoding: [0x48,0xde,0xe8,0xfc] + xsnabsqp 7, 27 + +# Add/Divide/Multiply/Square-Root/Subtract +# CHECK-BE: xsaddqp 7, 31, 27 # encoding: [0xfc,0xff,0xd8,0x08] +# CHECK-LE: xsaddqp 7, 31, 27 # encoding: [0x08,0xd8,0xff,0xfc] + xsaddqp 7, 31, 27 +# CHECK-BE: xsaddqpo 7, 31, 27 # encoding: [0xfc,0xff,0xd8,0x09] +# CHECK-LE: xsaddqpo 7, 31, 27 # encoding: [0x09,0xd8,0xff,0xfc] + xsaddqpo 7, 31, 27 +# CHECK-BE: xsdivqp 7, 31, 27 # encoding: [0xfc,0xff,0xdc,0x48] +# CHECK-LE: xsdivqp 7, 31, 27 # encoding: [0x48,0xdc,0xff,0xfc] + xsdivqp 7, 31, 27 +# CHECK-BE: xsdivqpo 7, 31, 27 # encoding: [0xfc,0xff,0xdc,0x49] +# CHECK-LE: xsdivqpo 7, 31, 27 # encoding: [0x49,0xdc,0xff,0xfc] + xsdivqpo 7, 31, 27 +# CHECK-BE: xsmulqp 7, 31, 27 # encoding: [0xfc,0xff,0xd8,0x48] +# CHECK-LE: xsmulqp 7, 31, 27 # encoding: [0x48,0xd8,0xff,0xfc] + xsmulqp 7, 31, 27 +# CHECK-BE: xsmulqpo 7, 31, 27 # encoding: [0xfc,0xff,0xd8,0x49] +# CHECK-LE: xsmulqpo 7, 31, 27 # encoding: [0x49,0xd8,0xff,0xfc] + xsmulqpo 7, 31, 27 +# CHECK-BE: xssqrtqp 7, 31 # encoding: [0xfc,0xfb,0xfe,0x48] +# CHECK-LE: xssqrtqp 7, 31 # encoding: [0x48,0xfe,0xfb,0xfc] + xssqrtqp 7, 31 +# CHECK-BE: xssqrtqpo 7, 31 # encoding: [0xfc,0xfb,0xfe,0x49] +# CHECK-LE: xssqrtqpo 7, 31 # encoding: [0x49,0xfe,0xfb,0xfc] + xssqrtqpo 7, 31 +# CHECK-BE: xssubqp 7, 31, 27 # encoding: [0xfc,0xff,0xdc,0x08] +# CHECK-LE: xssubqp 7, 31, 27 # encoding: [0x08,0xdc,0xff,0xfc] + xssubqp 7, 31, 27 +# CHECK-BE: xssubqpo 7, 31, 27 # encoding: [0xfc,0xff,0xdc,0x09] +# CHECK-LE: xssubqpo 7, 31, 27 # encoding: [0x09,0xdc,0xff,0xfc] + xssubqpo 7, 31, 27 + +# (Negative) Multiply-Add/Subtract +# CHECK-BE: xsmaddqp 7, 31, 27 # encoding: [0xfc,0xff,0xdb,0x08] +# CHECK-LE: xsmaddqp 7, 31, 27 # encoding: [0x08,0xdb,0xff,0xfc] + xsmaddqp 7, 31, 27 +# CHECK-BE: xsmaddqpo 7, 31, 27 # encoding: [0xfc,0xff,0xdb,0x09] +# CHECK-LE: xsmaddqpo 7, 31, 27 # encoding: [0x09,0xdb,0xff,0xfc] + xsmaddqpo 7, 31, 27 +# CHECK-BE: xsmsubqp 7, 31, 27 # encoding: [0xfc,0xff,0xdb,0x48] +# CHECK-LE: xsmsubqp 7, 31, 27 # encoding: [0x48,0xdb,0xff,0xfc] + xsmsubqp 7, 31, 27 +# CHECK-BE: xsmsubqpo 7, 31, 27 # encoding: [0xfc,0xff,0xdb,0x49] +# CHECK-LE: xsmsubqpo 7, 31, 27 # encoding: [0x49,0xdb,0xff,0xfc] + xsmsubqpo 7, 31, 27 +# CHECK-BE: xsnmaddqp 7, 31, 27 # encoding: [0xfc,0xff,0xdb,0x88] +# CHECK-LE: xsnmaddqp 7, 31, 27 # encoding: [0x88,0xdb,0xff,0xfc] + xsnmaddqp 7, 31, 27 +# CHECK-BE: xsnmaddqpo 7, 31, 27 # encoding: [0xfc,0xff,0xdb,0x89] +# CHECK-LE: xsnmaddqpo 7, 31, 27 # encoding: [0x89,0xdb,0xff,0xfc] + xsnmaddqpo 7, 31, 27 +# CHECK-BE: xsnmsubqp 7, 31, 27 # encoding: [0xfc,0xff,0xdb,0xc8] +# CHECK-LE: xsnmsubqp 7, 31, 27 # encoding: [0xc8,0xdb,0xff,0xfc] + xsnmsubqp 7, 31, 27 +# CHECK-BE: xsnmsubqpo 7, 31, 27 # encoding: [0xfc,0xff,0xdb,0xc9] +# CHECK-LE: xsnmsubqpo 7, 31, 27 # encoding: [0xc9,0xdb,0xff,0xfc] + xsnmsubqpo 7, 31, 27 + # Compare Ordered/Unordered # CHECK-BE: xscmpoqp 6, 31, 27 # encoding: [0xff,0x1f,0xd9,0x08] # CHECK-LE: xscmpoqp 6, 31, 27 # encoding: [0x08,0xd9,0x1f,0xff]