From: Chuang-Yu Cheng <cycheng@multicorewareinc.com>
Date: Mon, 28 Mar 2016 07:38:01 +0000 (+0000)
Subject: [Power9] Implement new vsx instructions: quad-precision move, fp-arithmetic
X-Git-Tag: llvmorg-3.9.0-rc1~10774
X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=5663848996b01faeb58276be25d5e364378fc1c9;p=platform%2Fupstream%2Fllvm.git

[Power9] Implement new vsx instructions: quad-precision move, fp-arithmetic

This change implements the following vsx instructions:

- quad-precision move
    xscpsgnqp, xsabsqp, xsnegqp, xsnabsqp

- quad-precision fp-arithmetic
    xsaddqp(o) xsdivqp(o) xsmulqp(o) xssqrtqp(o) xssubqp(o)
    xsmaddqp(o) xsmsubqp(o) xsnmaddqp(o) xsnmsubqp(o)

22 instructions

Thanks Nemanja and Kit for careful review and invaluable discussion!
Reviewers: hal, nemanja, kbarton, tjablin, amehsan

http://reviews.llvm.org/D16110

llvm-svn: 264565
---

diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index e17bfd7..8b035e1 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -1819,6 +1819,55 @@ let Predicates = [HasP9Vector] in {
     : XX3Form<opcode, xo, (outs xty:$XT), (ins aty:$XA, bty:$XB),
               !strconcat(opc, " $XT, $XA, $XB"), itin, pattern>;
 
+  // [PO VRT VRA VRB XO /]
+  class X_VT5_VA5_VB5<bits<6> opcode, bits<10> xo, string opc,
+                      list<dag> pattern>
+    : XForm_1<opcode, xo, (outs vrrc:$vT), (ins vrrc:$vA, vrrc:$vB),
+              !strconcat(opc, " $vT, $vA, $vB"), IIC_VecFP, pattern>;
+
+  // [PO VRT VRA VRB XO RO], Round to Odd version of [PO VRT VRA VRB XO /]
+  class X_VT5_VA5_VB5_Ro<bits<6> opcode, bits<10> xo, string opc,
+                         list<dag> pattern>
+    : X_VT5_VA5_VB5<opcode, xo, opc, pattern>, isDOT;
+
+  //===--------------------------------------------------------------------===//
+  // Quad-Precision Scalar Move Instructions:
+
+  // Copy Sign
+  def XSCPSGNQP : X_VT5_VA5_VB5<63, 100, "xscpsgnqp", []>;
+
+  // Absolute/Negative-Absolute/Negate
+  def XSABSQP   : X_VT5_XO5_VB5<63,  0, 804, "xsabsqp" , []>;
+  def XSNABSQP  : X_VT5_XO5_VB5<63,  8, 804, "xsnabsqp", []>;
+  def XSNEGQP   : X_VT5_XO5_VB5<63, 16, 804, "xsnegqp" , []>;
+
+  //===--------------------------------------------------------------------===//
+  // Quad-Precision Scalar Floating-Point Arithmetic Instructions:
+
+  // Add/Divide/Multiply/Subtract
+  def XSADDQP   : X_VT5_VA5_VB5   <63,   4, "xsaddqp" , []>;
+  def XSADDQPO  : X_VT5_VA5_VB5_Ro<63,   4, "xsaddqpo", []>;
+  def XSDIVQP   : X_VT5_VA5_VB5   <63, 548, "xsdivqp" , []>;
+  def XSDIVQPO  : X_VT5_VA5_VB5_Ro<63, 548, "xsdivqpo", []>;
+  def XSMULQP   : X_VT5_VA5_VB5   <63,  36, "xsmulqp" , []>;
+  def XSMULQPO  : X_VT5_VA5_VB5_Ro<63,  36, "xsmulqpo", []>;
+  def XSSUBQP   : X_VT5_VA5_VB5   <63, 516, "xssubqp" , []>;
+  def XSSUBQPO  : X_VT5_VA5_VB5_Ro<63, 516, "xssubqpo", []>;
+
+  // Square-Root
+  def XSSQRTQP  : X_VT5_XO5_VB5   <63, 27, 804, "xssqrtqp" , []>;
+  def XSSQRTQPO : X_VT5_XO5_VB5_Ro<63, 27, 804, "xssqrtqpo", []>;
+
+  // (Negative) Multiply-{Add/Subtract}
+  def XSMADDQP  : X_VT5_VA5_VB5   <63, 388, "xsmaddqp"  , []>;
+  def XSMADDQPO : X_VT5_VA5_VB5_Ro<63, 388, "xsmaddqpo" , []>;
+  def XSMSUBQP  : X_VT5_VA5_VB5   <63, 420, "xsmsubqp"  , []>;
+  def XSMSUBQPO : X_VT5_VA5_VB5_Ro<63, 420, "xsmsubqpo" , []>;
+  def XSNMADDQP : X_VT5_VA5_VB5   <63, 452, "xsnmaddqp" , []>;
+  def XSNMADDQPO: X_VT5_VA5_VB5_Ro<63, 452, "xsnmaddqpo", []>;
+  def XSNMSUBQP : X_VT5_VA5_VB5   <63, 484, "xsnmsubqp" , []>;
+  def XSNMSUBQPO: X_VT5_VA5_VB5_Ro<63, 484, "xsnmsubqpo", []>;
+
   //===--------------------------------------------------------------------===//
   // Quad/Double-Precision Compare Instructions:
 
diff --git a/llvm/lib/Target/PowerPC/README_P9.txt b/llvm/lib/Target/PowerPC/README_P9.txt
index 5482d4a..a58f522 100644
--- a/llvm/lib/Target/PowerPC/README_P9.txt
+++ b/llvm/lib/Target/PowerPC/README_P9.txt
@@ -141,6 +141,128 @@ Altivec:
     VX1_Int_Ty< 65, "vmul10ecuq", int_ppc_altivec_vmul10ecuq, v1i128>;
 
 VSX:
+- QP Copy Sign: xscpsgnqp
+  . Similar to xscpsgndp
+  . (set f128:$vT, (fcopysign f128:$vB, f128:$vA)
+
+- QP Absolute/Negative-Absolute/Negate: xsabsqp xsnabsqp xsnegqp
+  . Similar to xsabsdp/xsnabsdp/xsnegdp
+  . (set f128:$vT, (fabs f128:$vB))             // xsabsqp
+    (set f128:$vT, (fneg (fabs f128:$vB)))      // xsnabsqp
+    (set f128:$vT, (fneg f128:$vB))             // xsnegqp
+
+- QP Add/Divide/Multiply/Subtract/Square-Root:
+  xsaddqp xsdivqp xsmulqp xssubqp xssqrtqp
+  . Similar to xsadddp
+  . isCommutable = 1
+    (set f128:$vT, (fadd f128:$vA, f128:$vB))   // xsaddqp
+    (set f128:$vT, (fmul f128:$vA, f128:$vB))   // xsmulqp
+
+  . isCommutable = 0
+    (set f128:$vT, (fdiv f128:$vA, f128:$vB))   // xsdivqp
+    (set f128:$vT, (fsub f128:$vA, f128:$vB))   // xssubqp
+    (set f128:$vT, (fsqrt f128:$vB)))           // xssqrtqp
+
+- Round to Odd of QP Add/Divide/Multiply/Subtract/Square-Root:
+  xsaddqpo xsdivqpo xsmulqpo xssubqpo xssqrtqpo
+  . Similar to xsrsqrtedp??
+      def XSRSQRTEDP : XX2Form<60, 74,
+                               (outs vsfrc:$XT), (ins vsfrc:$XB),
+                               "xsrsqrtedp $XT, $XB", IIC_VecFP,
+                               [(set f64:$XT, (PPCfrsqrte f64:$XB))]>;
+
+  . Define DAG Node in PPCInstrInfo.td:
+    def PPCfaddrto: SDNode<"PPCISD::FADDRTO", SDTFPBinOp, []>;
+    def PPCfdivrto: SDNode<"PPCISD::FDIVRTO", SDTFPBinOp, []>;
+    def PPCfmulrto: SDNode<"PPCISD::FMULRTO", SDTFPBinOp, []>;
+    def PPCfsubrto: SDNode<"PPCISD::FSUBRTO", SDTFPBinOp, []>;
+    def PPCfsqrtrto: SDNode<"PPCISD::FSQRTRTO", SDTFPUnaryOp, []>;
+
+    DAG patterns of each instruction (PPCInstrVSX.td):
+    . isCommutable = 1
+      (set f128:$vT, (PPCfaddrto f128:$vA, f128:$vB))   // xsaddqpo
+      (set f128:$vT, (PPCfmulrto f128:$vA, f128:$vB))   // xsmulqpo
+
+    . isCommutable = 0
+      (set f128:$vT, (PPCfdivrto f128:$vA, f128:$vB))   // xsdivqpo
+      (set f128:$vT, (PPCfsubrto f128:$vA, f128:$vB))   // xssubqpo
+      (set f128:$vT, (PPCfsqrtrto f128:$vB))            // xssqrtqpo
+
+- QP (Negative) Multiply-{Add/Subtract}: xsmaddqp xsmsubqp xsnmaddqp xsnmsubqp
+  . Ref: xsmaddadp/xsmsubadp/xsnmaddadp/xsnmsubadp
+
+  . isCommutable = 1
+    // xsmaddqp
+    [(set f128:$vT, (fma f128:$vA, f128:$vB, f128:$vTi))]>,
+    RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">,
+    AltVSXFMARel;
+
+    // xsmsubqp
+    [(set f128:$vT, (fma f128:$vA, f128:$vB, (fneg f128:$vTi)))]>,
+    RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">,
+    AltVSXFMARel;
+
+    // xsnmaddqp
+    [(set f128:$vT, (fneg (fma f128:$vA, f128:$vB, f128:$vTi)))]>,
+    RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">,
+    AltVSXFMARel;
+
+    // xsnmsubqp
+    [(set f128:$vT, (fneg (fma f128:$vA, f128:$vB, (fneg f128:$vTi))))]>,
+    RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">,
+    AltVSXFMARel;
+
+- Round to Odd of QP (Negative) Multiply-{Add/Subtract}:
+  xsmaddqpo xsmsubqpo xsnmaddqpo xsnmsubqpo
+  . Similar to xsrsqrtedp??
+
+  . Define DAG Node in PPCInstrInfo.td:
+    def PPCfmarto: SDNode<"PPCISD::FMARTO", SDTFPTernaryOp, []>;
+
+    It looks like we only need to define "PPCfmarto" for these instructions,
+    because according to PowerISA_V3.0, these instructions perform RTO on
+    fma's result:
+        xsmaddqp(o)
+        v      â bfp_MULTIPLY_ADD(src1, src3, src2)
+        rnd    â bfp_ROUND_TO_BFP128(RO, FPSCR.RN, v)
+        result â bfp_CONVERT_TO_BFP128(rnd)
+
+        xsmsubqp(o)
+        v      â bfp_MULTIPLY_ADD(src1, src3, bfp_NEGATE(src2))
+        rnd    â bfp_ROUND_TO_BFP128(RO, FPSCR.RN, v)
+        result â bfp_CONVERT_TO_BFP128(rnd)
+
+        xsnmaddqp(o)
+        v      â bfp_MULTIPLY_ADD(src1,src3,src2)
+        rnd    â bfp_NEGATE(bfp_ROUND_TO_BFP128(RO, FPSCR.RN, v))
+        result â bfp_CONVERT_TO_BFP128(rnd)
+
+        xsnmsubqp(o)
+        v      â bfp_MULTIPLY_ADD(src1, src3, bfp_NEGATE(src2))
+        rnd    â bfp_NEGATE(bfp_ROUND_TO_BFP128(RO, FPSCR.RN, v))
+        result â bfp_CONVERT_TO_BFP128(rnd)
+
+    DAG patterns of each instruction (PPCInstrVSX.td):
+    . isCommutable = 1
+      // xsmaddqpo
+      [(set f128:$vT, (PPCfmarto f128:$vA, f128:$vB, f128:$vTi))]>,
+      RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">,
+      AltVSXFMARel;
+
+      // xsmsubqpo
+      [(set f128:$vT, (PPCfmarto f128:$vA, f128:$vB, (fneg f128:$vTi)))]>,
+      RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">,
+      AltVSXFMARel;
+
+      // xsnmaddqpo
+      [(set f128:$vT, (fneg (PPCfmarto f128:$vA, f128:$vB, f128:$vTi)))]>,
+      RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">,
+      AltVSXFMARel;
+
+      // xsnmsubqpo
+      [(set f128:$vT, (fneg (PPCfmarto f128:$vA, f128:$vB, (fneg f128:$vTi))))]>,
+      RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">,
+      AltVSXFMARel;
 
 - QP Compare Ordered/Unordered: xscmpoqp xscmpuqp
   . ref: XSCMPUDP
diff --git a/llvm/test/MC/Disassembler/PowerPC/vsx.txt b/llvm/test/MC/Disassembler/PowerPC/vsx.txt
index ed7893d..6159c3a 100644
--- a/llvm/test/MC/Disassembler/PowerPC/vsx.txt
+++ b/llvm/test/MC/Disassembler/PowerPC/vsx.txt
@@ -542,6 +542,72 @@
 
 # Power9 Instructions:
 
+# CHECK: xscpsgnqp 7, 31, 27
+0xfc 0xff 0xd8 0xc8
+
+# CHECK: xsabsqp 7, 27
+0xfc 0xe0 0xde 0x48
+
+# CHECK: xsnegqp 7, 27
+0xfc 0xf0 0xde 0x48
+
+# CHECK: xsnabsqp 7, 27
+0xfc 0xe8 0xde 0x48
+
+# CHECK: xsaddqp 7, 31, 27
+0xfc 0xff 0xd8 0x08
+
+# CHECK: xsaddqpo 7, 31, 27
+0xfc 0xff 0xd8 0x09
+
+# CHECK: xsdivqp 7, 31, 27
+0xfc 0xff 0xdc 0x48
+
+# CHECK: xsdivqpo 7, 31, 27
+0xfc 0xff 0xdc 0x49
+
+# CHECK: xsmulqp 7, 31, 27
+0xfc 0xff 0xd8 0x48
+
+# CHECK: xsmulqpo 7, 31, 27
+0xfc 0xff 0xd8 0x49
+
+# CHECK: xssqrtqp 7, 31
+0xfc 0xfb 0xfe 0x48
+
+# CHECK: xssqrtqpo 7, 31
+0xfc 0xfb 0xfe 0x49
+
+# CHECK: xssubqp 7, 31, 27
+0xfc 0xff 0xdc 0x08
+
+# CHECK: xssubqpo 7, 31, 27
+0xfc 0xff 0xdc 0x09
+
+# CHECK: xsmaddqp 7, 31, 27
+0xfc 0xff 0xdb 0x08
+
+# CHECK: xsmaddqpo 7, 31, 27
+0xfc 0xff 0xdb 0x09
+
+# CHECK: xsmsubqp 7, 31, 27
+0xfc 0xff 0xdb 0x48
+
+# CHECK: xsmsubqpo 7, 31, 27
+0xfc 0xff 0xdb 0x49
+
+# CHECK: xsnmaddqp 7, 31, 27
+0xfc 0xff 0xdb 0x88
+
+# CHECK: xsnmaddqpo 7, 31, 27
+0xfc 0xff 0xdb 0x89
+
+# CHECK: xsnmsubqp 7, 31, 27
+0xfc 0xff 0xdb 0xc8
+
+# CHECK: xsnmsubqpo 7, 31, 27
+0xfc 0xff 0xdb 0xc9
+
 # CHECK: xscmpoqp 6, 31, 27
 0xff 0x1f 0xd9 0x08
 
diff --git a/llvm/test/MC/PowerPC/vsx.s b/llvm/test/MC/PowerPC/vsx.s
index ce341c3..a2f111e 100644
--- a/llvm/test/MC/PowerPC/vsx.s
+++ b/llvm/test/MC/PowerPC/vsx.s
@@ -550,6 +550,80 @@
 
 # Power9 Instructions:
 
+# Copy Sign
+# CHECK-BE: xscpsgnqp 7, 31, 27                # encoding: [0xfc,0xff,0xd8,0xc8]
+# CHECK-LE: xscpsgnqp 7, 31, 27                # encoding: [0xc8,0xd8,0xff,0xfc]
+            xscpsgnqp 7, 31, 27
+
+# Absolute/Negative Absolute/Negate
+# CHECK-BE: xsabsqp 7, 27                      # encoding: [0xfc,0xe0,0xde,0x48]
+# CHECK-LE: xsabsqp 7, 27                      # encoding: [0x48,0xde,0xe0,0xfc]
+            xsabsqp 7, 27
+# CHECK-BE: xsnegqp 7, 27                      # encoding: [0xfc,0xf0,0xde,0x48]
+# CHECK-LE: xsnegqp 7, 27                      # encoding: [0x48,0xde,0xf0,0xfc]
+            xsnegqp 7, 27
+# CHECK-BE: xsnabsqp 7, 27                     # encoding: [0xfc,0xe8,0xde,0x48]
+# CHECK-LE: xsnabsqp 7, 27                     # encoding: [0x48,0xde,0xe8,0xfc]
+            xsnabsqp 7, 27
+
+# Add/Divide/Multiply/Square-Root/Subtract
+# CHECK-BE: xsaddqp 7, 31, 27                  # encoding: [0xfc,0xff,0xd8,0x08]
+# CHECK-LE: xsaddqp 7, 31, 27                  # encoding: [0x08,0xd8,0xff,0xfc]
+            xsaddqp 7, 31, 27
+# CHECK-BE: xsaddqpo 7, 31, 27                 # encoding: [0xfc,0xff,0xd8,0x09]
+# CHECK-LE: xsaddqpo 7, 31, 27                 # encoding: [0x09,0xd8,0xff,0xfc]
+            xsaddqpo 7, 31, 27
+# CHECK-BE: xsdivqp 7, 31, 27                  # encoding: [0xfc,0xff,0xdc,0x48]
+# CHECK-LE: xsdivqp 7, 31, 27                  # encoding: [0x48,0xdc,0xff,0xfc]
+            xsdivqp 7, 31, 27
+# CHECK-BE: xsdivqpo 7, 31, 27                 # encoding: [0xfc,0xff,0xdc,0x49]
+# CHECK-LE: xsdivqpo 7, 31, 27                 # encoding: [0x49,0xdc,0xff,0xfc]
+            xsdivqpo 7, 31, 27
+# CHECK-BE: xsmulqp 7, 31, 27                  # encoding: [0xfc,0xff,0xd8,0x48]
+# CHECK-LE: xsmulqp 7, 31, 27                  # encoding: [0x48,0xd8,0xff,0xfc]
+            xsmulqp 7, 31, 27
+# CHECK-BE: xsmulqpo 7, 31, 27                 # encoding: [0xfc,0xff,0xd8,0x49]
+# CHECK-LE: xsmulqpo 7, 31, 27                 # encoding: [0x49,0xd8,0xff,0xfc]
+            xsmulqpo 7, 31, 27
+# CHECK-BE: xssqrtqp 7, 31                     # encoding: [0xfc,0xfb,0xfe,0x48]
+# CHECK-LE: xssqrtqp 7, 31                     # encoding: [0x48,0xfe,0xfb,0xfc]
+            xssqrtqp 7, 31
+# CHECK-BE: xssqrtqpo 7, 31                    # encoding: [0xfc,0xfb,0xfe,0x49]
+# CHECK-LE: xssqrtqpo 7, 31                    # encoding: [0x49,0xfe,0xfb,0xfc]
+            xssqrtqpo 7, 31
+# CHECK-BE: xssubqp 7, 31, 27                  # encoding: [0xfc,0xff,0xdc,0x08]
+# CHECK-LE: xssubqp 7, 31, 27                  # encoding: [0x08,0xdc,0xff,0xfc]
+            xssubqp 7, 31, 27
+# CHECK-BE: xssubqpo 7, 31, 27                 # encoding: [0xfc,0xff,0xdc,0x09]
+# CHECK-LE: xssubqpo 7, 31, 27                 # encoding: [0x09,0xdc,0xff,0xfc]
+            xssubqpo 7, 31, 27
+
+# (Negative) Multiply-Add/Subtract
+# CHECK-BE: xsmaddqp 7, 31, 27                 # encoding: [0xfc,0xff,0xdb,0x08]
+# CHECK-LE: xsmaddqp 7, 31, 27                 # encoding: [0x08,0xdb,0xff,0xfc]
+            xsmaddqp 7, 31, 27
+# CHECK-BE: xsmaddqpo 7, 31, 27                # encoding: [0xfc,0xff,0xdb,0x09]
+# CHECK-LE: xsmaddqpo 7, 31, 27                # encoding: [0x09,0xdb,0xff,0xfc]
+            xsmaddqpo 7, 31, 27
+# CHECK-BE: xsmsubqp 7, 31, 27                 # encoding: [0xfc,0xff,0xdb,0x48]
+# CHECK-LE: xsmsubqp 7, 31, 27                 # encoding: [0x48,0xdb,0xff,0xfc]
+            xsmsubqp 7, 31, 27
+# CHECK-BE: xsmsubqpo 7, 31, 27                # encoding: [0xfc,0xff,0xdb,0x49]
+# CHECK-LE: xsmsubqpo 7, 31, 27                # encoding: [0x49,0xdb,0xff,0xfc]
+            xsmsubqpo 7, 31, 27
+# CHECK-BE: xsnmaddqp 7, 31, 27                # encoding: [0xfc,0xff,0xdb,0x88]
+# CHECK-LE: xsnmaddqp 7, 31, 27                # encoding: [0x88,0xdb,0xff,0xfc]
+            xsnmaddqp 7, 31, 27
+# CHECK-BE: xsnmaddqpo 7, 31, 27               # encoding: [0xfc,0xff,0xdb,0x89]
+# CHECK-LE: xsnmaddqpo 7, 31, 27               # encoding: [0x89,0xdb,0xff,0xfc]
+            xsnmaddqpo 7, 31, 27
+# CHECK-BE: xsnmsubqp 7, 31, 27                # encoding: [0xfc,0xff,0xdb,0xc8]
+# CHECK-LE: xsnmsubqp 7, 31, 27                # encoding: [0xc8,0xdb,0xff,0xfc]
+            xsnmsubqp 7, 31, 27
+# CHECK-BE: xsnmsubqpo 7, 31, 27               # encoding: [0xfc,0xff,0xdb,0xc9]
+# CHECK-LE: xsnmsubqpo 7, 31, 27               # encoding: [0xc9,0xdb,0xff,0xfc]
+            xsnmsubqpo 7, 31, 27
+
 # Compare Ordered/Unordered
 # CHECK-BE: xscmpoqp 6, 31, 27                 # encoding: [0xff,0x1f,0xd9,0x08]
 # CHECK-LE: xscmpoqp 6, 31, 27                 # encoding: [0x08,0xd9,0x1f,0xff]