Generate XXSPLTIDP for vectors on power10.

author Michael Meissner <meissner@linux.ibm.com>

Wed, 15 Dec 2021 07:02:24 +0000 (02:02 -0500)

committer Michael Meissner <meissner@linux.ibm.com>

Wed, 15 Dec 2021 07:02:24 +0000 (02:02 -0500)
author Michael Meissner <meissner@linux.ibm.com>
Wed, 15 Dec 2021 07:02:24 +0000 (02:02 -0500)
committer Michael Meissner <meissner@linux.ibm.com>
Wed, 15 Dec 2021 07:02:24 +0000 (02:02 -0500)
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md

index 886ace7..0d9f6a6 100644 (file)
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -610,6 +610,9 @@
  
        if (constant_generates_xxspltiw (&vsx_const))
         return true;
+
+      if (constant_generates_xxspltidp (&vsx_const))
+       return true;
      }
  
    /* Otherwise consider floating point constants hard, so that the
@@ -653,6 +656,9 @@
    if (constant_generates_xxspltiw (&vsx_const))
      return true;
  
+  if (constant_generates_xxspltidp (&vsx_const))
+    return true;
+
    return false;
  })
  
@@ -727,6 +733,9 @@
  
           if (constant_generates_xxspltiw (&vsx_const))
             return true;
+
+         if (constant_generates_xxspltidp (&vsx_const))
+           return true;
         }
  
        if (TARGET_P9_VECTOR
diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h

index 74699ab..3e03d37 100644 (file)
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -253,6 +253,7 @@ extern bool vec_const_128bit_to_bytes (rtx, machine_mode,
                                        vec_const_128bit_type *);
  extern unsigned constant_generates_lxvkq (vec_const_128bit_type *);
  extern unsigned constant_generates_xxspltiw (vec_const_128bit_type *);
+extern unsigned constant_generates_xxspltidp (vec_const_128bit_type *);
  #endif /* RTX_CODE */
  
  #ifdef TREE_CODE
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c

index 5c1b620..e82a47f 100644 (file)
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -6723,6 +6723,13 @@ output_vec_const_move (rtx *operands)
               operands[2] = GEN_INT (imm);
               return "xxspltiw %x0,%2";
             }
+
+         imm = constant_generates_xxspltidp (&vsx_const);
+         if (imm)
+           {
+             operands[2] = GEN_INT (imm);
+             return "xxspltidp %x0,%2";
+           }
         }
  
        if (TARGET_P9_VECTOR
@@ -26524,6 +26531,9 @@ prefixed_xxsplti_p (rtx_insn *insn)
      {
        if (constant_generates_xxspltiw (&vsx_const))
         return true;
+
+      if (constant_generates_xxspltidp (&vsx_const))
+       return true;
      }
  
    return false;
@@ -28731,6 +28741,104 @@ constant_generates_xxspltiw (vec_const_128bit_type *vsx_const)
    return vsx_const->words[0];
  }
  
+/* Determine if a vector constant can be loaded with XXSPLTIDP.  Return zero if
+   the XXSPLTIDP instruction cannot be used.  Otherwise return the immediate
+   value to be used with the XXSPLTIDP instruction.  */
+
+unsigned
+constant_generates_xxspltidp (vec_const_128bit_type *vsx_const)
+{
+  if (!TARGET_SPLAT_FLOAT_CONSTANT || !TARGET_PREFIXED || !TARGET_VSX)
+    return 0;
+
+  /* Reject if the two 64-bit segments are not the same.  */
+  if (!vsx_const->all_double_words_same)
+    return 0;
+
+  /* If the bytes, half words, or words are all the same, don't use XXSPLTIDP.
+     Use a simpler instruction (XXSPLTIB, VSPLTISB, VSPLTISH, or VSPLTISW).  */
+  if (vsx_const->all_bytes_same
+      || vsx_const->all_half_words_same
+      || vsx_const->all_words_same)
+    return 0;
+
+  unsigned HOST_WIDE_INT value = vsx_const->double_words[0];
+
+  /* Avoid values that look like DFmode NaN's, except for the normal NaN bit
+     pattern and the signalling NaN bit pattern.  Recognize infinity and
+     negative infinity.  */
+
+  /* Bit representation of DFmode normal quiet NaN.  */
+#define RS6000_CONST_DF_NAN    HOST_WIDE_INT_UC (0x7ff8000000000000)
+
+  /* Bit representation of DFmode normal signaling NaN.  */
+#define RS6000_CONST_DF_NANS   HOST_WIDE_INT_UC (0x7ff4000000000000)
+
+  /* Bit representation of DFmode positive infinity.  */
+#define RS6000_CONST_DF_INF    HOST_WIDE_INT_UC (0x7ff0000000000000)
+
+  /* Bit representation of DFmode negative infinity.  */
+#define RS6000_CONST_DF_NEG_INF        HOST_WIDE_INT_UC (0xfff0000000000000)
+
+  if (value != RS6000_CONST_DF_NAN
+      && value != RS6000_CONST_DF_NANS
+      && value != RS6000_CONST_DF_INF
+      && value != RS6000_CONST_DF_NEG_INF)
+    {
+      /* The IEEE 754 64-bit floating format has 1 bit for sign, 11 bits for
+        the exponent, and 52 bits for the mantissa (not counting the hidden
+        bit used for normal numbers).  NaN values have the exponent set to all
+        1 bits, and the mantissa non-zero (mantissa == 0 is infinity).  */
+
+      int df_exponent = (value >> 52) & 0x7ff;
+      unsigned HOST_WIDE_INT
+       df_mantissa = value & ((HOST_WIDE_INT_1U << 52) - HOST_WIDE_INT_1U);
+
+      if (df_exponent == 0x7ff && df_mantissa != 0)    /* other NaNs.  */
+       return 0;
+
+      /* Avoid values that are DFmode subnormal values.  Subnormal numbers have
+        the exponent all 0 bits, and the mantissa non-zero.  If the value is
+        subnormal, then the hidden bit in the mantissa is not set.  */
+      if (df_exponent == 0 && df_mantissa != 0)                /* subnormal.  */
+       return 0;
+    }
+
+  /* Change the representation to DFmode constant.  */
+  long df_words[2] = { vsx_const->words[0], vsx_const->words[1] };
+
+  /* real_from_target takes the target words in target order.  */
+  if (!BYTES_BIG_ENDIAN)
+    std::swap (df_words[0], df_words[1]);
+
+  REAL_VALUE_TYPE rv_type;
+  real_from_target (&rv_type, df_words, DFmode);
+
+  const REAL_VALUE_TYPE *rv = &rv_type;
+
+  /* Validate that the number can be stored as a SFmode value.  */
+  if (!exact_real_truncate (SFmode, rv))
+    return 0;
+
+  /* Validate that the number is not a SFmode subnormal value (exponent is 0,
+     mantissa field is non-zero) which is undefined for the XXSPLTIDP
+     instruction.  */
+  long sf_value;
+  real_to_target (&sf_value, rv, SFmode);
+
+  /* IEEE 754 32-bit values have 1 bit for the sign, 8 bits for the exponent,
+     and 23 bits for the mantissa.  Subnormal numbers have the exponent all
+     0 bits, and the mantissa non-zero.  */
+  long sf_exponent = (sf_value >> 23) & 0xFF;
+  long sf_mantissa = sf_value & 0x7FFFFF;
+
+  if (sf_exponent == 0 && sf_mantissa != 0)
+    return 0;
+
+  /* Return the immediate to be used.  */
+  return sf_value;
+}
+
  \f
  struct gcc_target targetm = TARGET_INITIALIZER;
  
diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt

index ec7b106..c1d661d 100644 (file)
--- a/gcc/config/rs6000/rs6000.opt
+++ b/gcc/config/rs6000/rs6000.opt
@@ -644,6 +644,10 @@ msplat-word-constant
  Target Var(TARGET_SPLAT_WORD_CONSTANT) Init(1) Save
  Generate (do not generate) code that uses the XXSPLTIW instruction.
  
+msplat-float-constant
+Target Var(TARGET_SPLAT_FLOAT_CONSTANT) Init(1) Save
+Generate (do not generate) code that uses the XXSPLTIDP instruction.
+
  mieee128-constant
  Target Var(TARGET_IEEE128_CONSTANT) Init(1) Save
  Generate (do not generate) code that uses the LXVKQ instruction.
diff --git a/gcc/testsuite/gcc.target/powerpc/pr86731-fwrapv-longlong.c b/gcc/testsuite/gcc.target/powerpc/pr86731-fwrapv-longlong.c

index bd1502b..dcb30e1 100644 (file)
--- a/gcc/testsuite/gcc.target/powerpc/pr86731-fwrapv-longlong.c
+++ b/gcc/testsuite/gcc.target/powerpc/pr86731-fwrapv-longlong.c
@@ -24,11 +24,12 @@ vector signed long long splats4(void)
          return (vector signed long long) vec_sl(mzero, mzero);
  }
  
-/* Codegen will consist of splat and shift instructions for most types.
-   If folding is enabled, the vec_sl tests using vector long long type will
-   generate a lvx instead of a vspltisw+vsld pair.  */
+/* Codegen will consist of splat and shift instructions for most types.  If
+   folding is enabled, the vec_sl tests using vector long long type will
+   generate a lvx instead of a vspltisw+vsld pair.  On power10, it will
+   generate a xxspltidp instruction instead of the lvx.  */
  
  /* { dg-final { scan-assembler-times {\mvspltis[bhw]\M} 0 } } */
  /* { dg-final { scan-assembler-times {\mvsl[bhwd]\M} 0 } } */
-/* { dg-final { scan-assembler-times {\mp?lxv\M|\mlxv\M|\mlxvd2x\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mp?lxv\M|\mlxv\M|\mlxvd2x\M|\mxxspltidp\M} 2 } } */
  
diff --git a/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v2df.c b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v2df.c

new file mode 100644 (file)

index 0000000..82ffc86
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v2df.c
@@ -0,0 +1,64 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+
+#include <math.h>
+
+/* Test generating V2DFmode constants with the ISA 3.1 (power10) XXSPLTIDP
+   instruction.  */
+
+vector double
+v2df_double_0 (void)
+{
+  return (vector double) { 0.0, 0.0 };                 /* XXSPLTIB or XXLXOR.  */
+}
+
+vector double
+v2df_double_1 (void)
+{
+  return (vector double) { 1.0, 1.0 };                 /* XXSPLTIDP.  */
+}
+
+#ifndef __FAST_MATH__
+vector double
+v2df_double_m0 (void)
+{
+  return (vector double) { -0.0, -0.0 };               /* XXSPLTIDP.  */
+}
+
+vector double
+v2df_double_nan (void)
+{
+  return (vector double) { __builtin_nan (""),
+                          __builtin_nan ("") };        /* XXSPLTIDP.  */
+}
+
+vector double
+v2df_double_inf (void)
+{
+  return (vector double) { __builtin_inf (),
+                          __builtin_inf () };          /* XXSPLTIDP.  */
+}
+
+vector double
+v2df_double_m_inf (void)
+{
+  return (vector double) { - __builtin_inf (),
+                          - __builtin_inf () };        /* XXSPLTIDP.  */
+}
+#endif
+
+vector double
+v2df_double_pi (void)
+{
+  return (vector double) { M_PI, M_PI };               /* PLVX.  */
+}
+
+vector double
+v2df_double_denorm (void)
+{
+  return (vector double) { (double)0x1p-149f,
+                          (double)0x1p-149f };         /* PLVX.  */
+}
+
+/* { dg-final { scan-assembler-times {\mxxspltidp\M} 5 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v2di.c b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v2di.c

new file mode 100644 (file)

index 0000000..4d44f94
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v2di.c
@@ -0,0 +1,50 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+
+/* Test generating V2DImode constants that have the same bit pattern as
+   V2DFmode constants that can be loaded with the XXSPLTIDP instruction with
+   the ISA 3.1 (power10).  */
+
+vector long long
+vector_0 (void)
+{
+  /* XXSPLTIB or XXLXOR.  */
+  return (vector long long) { 0LL, 0LL };
+}
+
+vector long long
+vector_1 (void)
+{
+  /* XXSPLTIB and VEXTSB2D.  */
+  return (vector long long) { 1LL, 1LL };
+}
+
+/* 0x8000000000000000LL is the bit pattern for -0.0, which can be generated
+   with XXSPLTISDP.  */
+vector long long
+vector_float_neg_0 (void)
+{
+  /* XXSPLTIDP.  */
+  return (vector long long) { 0x8000000000000000LL, 0x8000000000000000LL };
+}
+
+/* 0x3ff0000000000000LL is the bit pattern for 1.0 which can be generated with
+   XXSPLTISDP.  */
+vector long long
+vector_float_1_0 (void)
+{
+  /* XXSPLTIDP.  */
+  return (vector long long) { 0x3ff0000000000000LL, 0x3ff0000000000000LL };
+}
+
+/* 0x400921fb54442d18LL is the bit pattern for PI, which cannot be generated
+   with XXSPLTIDP.  */
+vector long long
+scalar_pi (void)
+{
+  /* PLXV.  */
+  return (vector long long) { 0x400921fb54442d18LL, 0x400921fb54442d18LL };
+}
+
+/* { dg-final { scan-assembler-times {\mxxspltidp\M} 2 } } */
author	Michael Meissner <meissner@linux.ibm.com>
	Wed, 15 Dec 2021 07:02:24 +0000 (02:02 -0500)
committer	Michael Meissner <meissner@linux.ibm.com>
	Wed, 15 Dec 2021 07:02:24 +0000 (02:02 -0500)
gcc/config/rs6000/predicates.md		patch \| blob \| history
gcc/config/rs6000/rs6000-protos.h		patch \| blob \| history
gcc/config/rs6000/rs6000.c		patch \| blob \| history
gcc/config/rs6000/rs6000.opt		patch \| blob \| history
gcc/testsuite/gcc.target/powerpc/pr86731-fwrapv-longlong.c		patch \| blob \| history
gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v2df.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v2di.c	[new file with mode: 0644]	patch \| blob