vsx.md (VSX_EXTRACT_FL): New iterator for all binary floating point types supported...

author Michael Meissner <meissner@linux.vnet.ibm.com>

Tue, 1 Nov 2016 00:41:30 +0000 (00:41 +0000)

committer Michael Meissner <meissner@gcc.gnu.org>

Tue, 1 Nov 2016 00:41:30 +0000 (00:41 +0000)
author Michael Meissner <meissner@linux.vnet.ibm.com>
Tue, 1 Nov 2016 00:41:30 +0000 (00:41 +0000)
committer Michael Meissner <meissner@gcc.gnu.org>
Tue, 1 Nov 2016 00:41:30 +0000 (00:41 +0000)
diff --git a/gcc/ChangeLog b/gcc/ChangeLog

index d67a20f..7dd239d 100644 (file)
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,24 @@
+2016-10-31  Michael Meissner  <meissner@linux.vnet.ibm.com>
+
+       * config/rs6000/vsx.md (VSX_EXTRACT_FL): New iterator for all
+       binary floating point types supported by the hardware except for
+       double.
+       (vsx_xvcvsxwdp_df): Provide scalar result alternative to the
+       vector instruction for optimizing extracting a SImode from a
+       V4SImode vector and converting it to floating point.
+       (vsx_xvcvuxwdp_df): Likewise.
+       (vsx_extract_si): On ISA 3.0, allow extract target and temporary
+       registers to be any VSX register.  Move stores to the end of the
+       constraints.
+       (vsx_extract_si_<uns>float_df): New combiner pattern and splitter
+       to optimize extracting a SImode from a V4SImode vector and
+       converting it to a binary floating point type supported by the
+       hardware.  Use the vector converts instead of extracting the
+       element, sign extending it, and then converting it to double.
+       Other floating point types  than double first convert to double,
+       then the double is converted to that type.
+       (vsx_extract_si_<uns>float_<mode>): Likewise.
+
  2016-10-31  Andrew Pinski  <apinski@cavium.com>
  
         * config/aarch64/driver-aarch64.c (host_detect_local_cpu):
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md

index 18f3e86..505c270 100644 (file)
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -288,6 +288,16 @@
                           (V8HI  "v")
                           (V4SI  "wa")])
  
+;; Mode iterator for binary floating types other than double to
+;; optimize convert to that floating point type from an extract
+;; of an integer type
+(define_mode_iterator VSX_EXTRACT_FL [SF
+                                     (IF "FLOAT128_2REG_P (IFmode)")
+                                     (KF "TARGET_FLOAT128_HW")
+                                     (TF "FLOAT128_2REG_P (TFmode)
+                                          || (FLOAT128_IEEE_P (TFmode)
+                                              && TARGET_FLOAT128_HW)")])
+
  ;; Iterator for the 2 short vector types to do a splat from an integer
  (define_mode_iterator VSX_SPLAT_I [V16QI V8HI])
  
@@ -1907,6 +1917,7 @@
    [(set_attr "type" "vecdouble")])
  
  ;; Convert from 32-bit to 64-bit types
+;; Provide both vector and scalar targets
  (define_insn "vsx_xvcvsxwdp"
    [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
         (unspec:V2DF [(match_operand:V4SI 1 "vsx_register_operand" "wf,wa")]
@@ -1915,6 +1926,14 @@
    "xvcvsxwdp %x0,%x1"
    [(set_attr "type" "vecdouble")])
  
+(define_insn "vsx_xvcvsxwdp_df"
+  [(set (match_operand:DF 0 "vsx_register_operand" "=ws")
+       (unspec:DF [(match_operand:V4SI 1 "vsx_register_operand" "wa")]
+                  UNSPEC_VSX_CVSXWDP))]
+  "TARGET_VSX"
+  "xvcvsxwdp %x0,%x1"
+  [(set_attr "type" "vecdouble")])
+
  (define_insn "vsx_xvcvuxwdp"
    [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
         (unspec:V2DF [(match_operand:V4SI 1 "vsx_register_operand" "wf,wa")]
@@ -1923,6 +1942,14 @@
    "xvcvuxwdp %x0,%x1"
    [(set_attr "type" "vecdouble")])
  
+(define_insn "vsx_xvcvuxwdp_df"
+  [(set (match_operand:DF 0 "vsx_register_operand" "=ws")
+       (unspec:DF [(match_operand:V4SI 1 "vsx_register_operand" "wa")]
+                  UNSPEC_VSX_CVUXWDP))]
+  "TARGET_VSX"
+  "xvcvuxwdp %x0,%x1"
+  [(set_attr "type" "vecdouble")])
+
  (define_insn "vsx_xvcvspsxds"
    [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa")
         (unspec:V2DI [(match_operand:V4SF 1 "vsx_register_operand" "wd,wa")]
@@ -2574,11 +2601,11 @@
    [(set_attr "type" "vecsimple")])
  
  (define_insn_and_split  "*vsx_extract_si"
-  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,Z,Z,wJwK")
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,wHwI,Z")
         (vec_select:SI
-        (match_operand:V4SI 1 "gpc_reg_operand" "v,wJwK,v,v")
-        (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n,n")])))
-   (clobber (match_scratch:V4SI 3 "=v,wJwK,v,v"))]
+        (match_operand:V4SI 1 "gpc_reg_operand" "wJv,wJv,wJv")
+        (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n")])))
+   (clobber (match_scratch:V4SI 3 "=wJv,wJv,wJv"))]
    "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
    "#"
    "&& reload_completed"
@@ -2628,7 +2655,7 @@
  
    DONE;
  }
-  [(set_attr "type" "mftgpr,fpstore,fpstore,vecsimple")
+  [(set_attr "type" "mftgpr,vecperm,fpstore")
     (set_attr "length" "8")])
  
  (define_insn_and_split  "*vsx_extract_<mode>_p8"
@@ -2714,6 +2741,107 @@
    DONE;
  })
  
+;; VSX_EXTRACT optimizations
+;; Optimize double d = (double) vec_extract (vi, <n>)
+;; Get the element into the top position and use XVCVSWDP/XVCVUWDP
+(define_insn_and_split "*vsx_extract_si_<uns>float_df"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "=ws")
+       (any_float:DF
+        (vec_select:SI
+         (match_operand:V4SI 1 "gpc_reg_operand" "v")
+         (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n")]))))
+   (clobber (match_scratch:V4SI 3 "=v"))]
+  "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  rtx dest = operands[0];
+  rtx src = operands[1];
+  rtx element = operands[2];
+  rtx v4si_tmp = operands[3];
+  int value;
+
+  if (!VECTOR_ELT_ORDER_BIG)
+    element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element));
+
+  /* If the value is in the correct position, we can avoid doing the VSPLT<x>
+     instruction.  */
+  value = INTVAL (element);
+  if (value != 0)
+    {
+      if (GET_CODE (v4si_tmp) == SCRATCH)
+       v4si_tmp = gen_reg_rtx (V4SImode);
+      emit_insn (gen_altivec_vspltw_direct (v4si_tmp, src, element));
+    }
+  else
+    v4si_tmp = src;
+
+  emit_insn (gen_vsx_xvcv<su>xwdp_df (dest, v4si_tmp));
+  DONE;
+})
+
+;; Optimize <type> f = (<type>) vec_extract (vi, <n>)
+;; where <type> is a floating point type that supported by the hardware that is
+;; not double.  First convert the value to double, and then to the desired
+;; type.
+(define_insn_and_split "*vsx_extract_si_<uns>float_<mode>"
+  [(set (match_operand:VSX_EXTRACT_FL 0 "gpc_reg_operand" "=ww")
+       (any_float:VSX_EXTRACT_FL
+        (vec_select:SI
+         (match_operand:V4SI 1 "gpc_reg_operand" "v")
+         (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n")]))))
+   (clobber (match_scratch:V4SI 3 "=v"))
+   (clobber (match_scratch:DF 4 "=ws"))]
+  "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  rtx dest = operands[0];
+  rtx src = operands[1];
+  rtx element = operands[2];
+  rtx v4si_tmp = operands[3];
+  rtx df_tmp = operands[4];
+  int value;
+
+  if (!VECTOR_ELT_ORDER_BIG)
+    element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element));
+
+  /* If the value is in the correct position, we can avoid doing the VSPLT<x>
+     instruction.  */
+  value = INTVAL (element);
+  if (value != 0)
+    {
+      if (GET_CODE (v4si_tmp) == SCRATCH)
+       v4si_tmp = gen_reg_rtx (V4SImode);
+      emit_insn (gen_altivec_vspltw_direct (v4si_tmp, src, element));
+    }
+  else
+    v4si_tmp = src;
+
+  if (GET_CODE (df_tmp) == SCRATCH)
+    df_tmp = gen_reg_rtx (DFmode);
+
+  emit_insn (gen_vsx_xvcv<su>xwdp_df (df_tmp, v4si_tmp));
+
+  if (<MODE>mode == SFmode)
+    emit_insn (gen_truncdfsf2 (dest, df_tmp));
+  else if (<MODE>mode == TFmode && FLOAT128_IBM_P (TFmode))
+    emit_insn (gen_extenddftf2_vsx (dest, df_tmp));
+  else if (<MODE>mode == TFmode && FLOAT128_IEEE_P (TFmode)
+          && TARGET_FLOAT128_HW)
+    emit_insn (gen_extenddftf2_hw (dest, df_tmp));
+  else if (<MODE>mode == IFmode && FLOAT128_IBM_P (IFmode))
+    emit_insn (gen_extenddfif2 (dest, df_tmp));
+  else if (<MODE>mode == KFmode && TARGET_FLOAT128_HW)
+    emit_insn (gen_extenddfkf2_hw (dest, df_tmp));
+  else
+    gcc_unreachable ();
+
+  DONE;
+})
+
  ;; Expanders for builtins
  (define_expand "vsx_mergel_<mode>"
    [(use (match_operand:VSX_D 0 "vsx_register_operand" ""))
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog

index 36b4d8e..aed8a66 100644 (file)
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+2016-10-31  Michael Meissner  <meissner@linux.vnet.ibm.com>
+
+       * gcc.target/powerpc/vsx-extract-4.c: New test.
+       * gcc.target/powerpc/vsx-extract-5.c: Likewise.
+
  2016-10-31  Jerry DeLisle  <jvdelisle@gcc.gnu.org>
  
         PR fortran/54679
diff --git a/gcc/testsuite/gcc.target/powerpc/vsx-extract-4.c b/gcc/testsuite/gcc.target/powerpc/vsx-extract-4.c

new file mode 100644 (file)

index 0000000..3b498f4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vsx-extract-4.c
@@ -0,0 +1,76 @@
+/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power8" } } */
+/* { dg-options "-O2 -mcpu=power8" } */
+
+/* { dg-final { scan-assembler-times "vspltw"    6 } } */
+/* { dg-final { scan-assembler-times "xvcvsxwdp" 4 } } */
+/* { dg-final { scan-assembler-times "xvcvuxwdp" 4 } } */
+/* { dg-final { scan-assembler-not   "mtvsrd"      } } */
+/* { dg-final { scan-assembler-not   "mtvsrwa"     } } */
+/* { dg-final { scan-assembler-not   "mtvsrwz"     } } */
+/* { dg-final { scan-assembler-not   "mfvsrd"      } } */
+/* { dg-final { scan-assembler-not   "mfvsrwz"     } } */
+
+#include <altivec.h>
+
+#ifndef TYPE
+#define TYPE double
+#endif
+
+TYPE
+foo_0s (vector int v)
+{
+  int i = vec_extract (v, 0);
+  return (TYPE) i;
+}
+
+TYPE
+foo_1s (vector int v)
+{
+  int i = vec_extract (v, 1);
+  return (TYPE) i;
+}
+
+TYPE
+foo_2s (vector int v)
+{
+  int i = vec_extract (v, 2);
+  return (TYPE) i;
+}
+
+TYPE
+foo_3s (vector int v)
+{
+  int i = vec_extract (v, 3);
+  return (TYPE) i;
+}
+
+TYPE
+foo_0u (vector unsigned int v)
+{
+  unsigned int u = vec_extract (v, 0);
+  return (TYPE) u;
+}
+
+TYPE
+foo_1u (vector unsigned int v)
+{
+  unsigned int u = vec_extract (v, 1);
+  return (TYPE) u;
+}
+
+TYPE
+foo_2u (vector unsigned int v)
+{
+  unsigned int u = vec_extract (v, 2);
+  return (TYPE) u;
+}
+
+TYPE
+foo_3u (vector unsigned int v)
+{
+  unsigned int u = vec_extract (v, 3);
+  return (TYPE) u;
+}
diff --git a/gcc/testsuite/gcc.target/powerpc/vsx-extract-5.c b/gcc/testsuite/gcc.target/powerpc/vsx-extract-5.c

new file mode 100644 (file)

index 0000000..1338c6b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vsx-extract-5.c
@@ -0,0 +1,77 @@
+/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power8" } } */
+/* { dg-options "-O2 -mcpu=power8" } */
+
+/* { dg-final { scan-assembler-times "vspltw"      6 } } */
+/* { dg-final { scan-assembler-times "xvcvsxwdp"   4 } } */
+/* { dg-final { scan-assembler-times "xvcvuxwdp"   4 } } */
+/* { dg-final { scan-assembler-times "frsp\|xsrsp" 8 } } */
+/* { dg-final { scan-assembler-not   "mtvsrd"        } } */
+/* { dg-final { scan-assembler-not   "mtvsrwa"       } } */
+/* { dg-final { scan-assembler-not   "mtvsrwz"       } } */
+/* { dg-final { scan-assembler-not   "mfvsrd"        } } */
+/* { dg-final { scan-assembler-not   "mfvsrwz"       } } */
+
+#include <altivec.h>
+
+#ifndef TYPE
+#define TYPE float
+#endif
+
+TYPE
+foo_0s (vector int v)
+{
+  int i = vec_extract (v, 0);
+  return (TYPE) i;
+}
+
+TYPE
+foo_1s (vector int v)
+{
+  int i = vec_extract (v, 1);
+  return (TYPE) i;
+}
+
+TYPE
+foo_2s (vector int v)
+{
+  int i = vec_extract (v, 2);
+  return (TYPE) i;
+}
+
+TYPE
+foo_3s (vector int v)
+{
+  int i = vec_extract (v, 3);
+  return (TYPE) i;
+}
+
+TYPE
+foo_0u (vector unsigned int v)
+{
+  unsigned int u = vec_extract (v, 0);
+  return (TYPE) u;
+}
+
+TYPE
+foo_1u (vector unsigned int v)
+{
+  unsigned int u = vec_extract (v, 1);
+  return (TYPE) u;
+}
+
+TYPE
+foo_2u (vector unsigned int v)
+{
+  unsigned int u = vec_extract (v, 2);
+  return (TYPE) u;
+}
+
+TYPE
+foo_3u (vector unsigned int v)
+{
+  unsigned int u = vec_extract (v, 3);
+  return (TYPE) u;
+}
author	Michael Meissner <meissner@linux.vnet.ibm.com>
	Tue, 1 Nov 2016 00:41:30 +0000 (00:41 +0000)
committer	Michael Meissner <meissner@gcc.gnu.org>
	Tue, 1 Nov 2016 00:41:30 +0000 (00:41 +0000)
gcc/ChangeLog		patch \| blob \| history
gcc/config/rs6000/vsx.md		patch \| blob \| history
gcc/testsuite/ChangeLog		patch \| blob \| history
gcc/testsuite/gcc.target/powerpc/vsx-extract-4.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/powerpc/vsx-extract-5.c	[new file with mode: 0644]	patch \| blob