Add LXVKQ support.

author Michael Meissner <meissner@linux.ibm.com>

Wed, 15 Dec 2021 05:57:44 +0000 (00:57 -0500)

committer Michael Meissner <meissner@linux.ibm.com>

Wed, 15 Dec 2021 05:57:44 +0000 (00:57 -0500)
author Michael Meissner <meissner@linux.ibm.com>
Wed, 15 Dec 2021 05:57:44 +0000 (00:57 -0500)
committer Michael Meissner <meissner@linux.ibm.com>
Wed, 15 Dec 2021 05:57:44 +0000 (00:57 -0500)
diff --git a/gcc/config/rs6000/constraints.md b/gcc/config/rs6000/constraints.md

index c8cff1a..e72132b 100644 (file)
--- a/gcc/config/rs6000/constraints.md
+++ b/gcc/config/rs6000/constraints.md
@@ -213,6 +213,12 @@
    "A signed 34-bit integer constant if prefixed instructions are supported."
    (match_operand 0 "cint34_operand"))
  
+;; A TF/KF scalar constant or a vector constant that can load certain IEEE
+;; 128-bit constants into vector registers using LXVKQ.
+(define_constraint "eQ"
+  "An IEEE 128-bit constant that can be loaded into VSX registers."
+  (match_operand 0 "easy_vector_constant_ieee128"))
+
  ;; Floating-point constraints.  These two are defined so that insn
  ;; length attributes can be calculated exactly.
  
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md

index f216ffd..be72167 100644 (file)
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -601,6 +601,14 @@
    if (TARGET_VSX && op == CONST0_RTX (mode))
      return 1;
  
+  /* Constants that can be generated with ISA 3.1 instructions are easy.  */
+  vec_const_128bit_type vsx_const;
+  if (TARGET_POWER10 && vec_const_128bit_to_bytes (op, mode, &vsx_const))
+    {
+      if (constant_generates_lxvkq (&vsx_const) != 0)
+       return true;
+    }
+
    /* Otherwise consider floating point constants hard, so that the
       constant gets pushed to memory during the early RTL phases.  This
       has the advantage that double precision constants that can be
@@ -609,6 +617,23 @@
     return 0;
  })
  
+;; Return 1 if the operand is a special IEEE 128-bit value that can be loaded
+;; via the LXVKQ instruction.
+
+(define_predicate "easy_vector_constant_ieee128"
+  (match_code "const_vector,const_double")
+{
+  vec_const_128bit_type vsx_const;
+
+  /* Can we generate the LXVKQ instruction?  */
+  if (!TARGET_IEEE128_CONSTANT || !TARGET_FLOAT128_HW || !TARGET_POWER10
+      || !TARGET_VSX)
+    return false;
+
+  return (vec_const_128bit_to_bytes (op, mode, &vsx_const)
+         && constant_generates_lxvkq (&vsx_const) != 0);
+})
+
  ;; Return 1 if the operand is a constant that can loaded with a XXSPLTIB
  ;; instruction and then a VUPKHSB, VECSB2W or VECSB2D instruction.
  
@@ -653,6 +678,15 @@
        if (zero_constant (op, mode) || all_ones_constant (op, mode))
         return true;
  
+      /* Constants that can be generated with ISA 3.1 instructions are
+         easy.  */
+      vec_const_128bit_type vsx_const;
+      if (TARGET_POWER10 && vec_const_128bit_to_bytes (op, mode, &vsx_const))
+       {
+         if (constant_generates_lxvkq (&vsx_const) != 0)
+           return true;
+       }
+
        if (TARGET_P9_VECTOR
            && xxspltib_constant_p (op, mode, &num_insns, &value))
         return true;
diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h

index 81345d8..4a2e7fa 100644 (file)
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -250,6 +250,7 @@ typedef struct {
  
  extern bool vec_const_128bit_to_bytes (rtx, machine_mode,
                                        vec_const_128bit_type *);
+extern unsigned constant_generates_lxvkq (vec_const_128bit_type *);
  #endif /* RTX_CODE */
  
  #ifdef TREE_CODE
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c

index 819314d..0bc3844 100644 (file)
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -6700,6 +6700,17 @@ output_vec_const_move (rtx *operands)
             gcc_unreachable ();
         }
  
+      vec_const_128bit_type vsx_const;
+      if (TARGET_POWER10 && vec_const_128bit_to_bytes (vec, mode, &vsx_const))
+       {
+         unsigned imm = constant_generates_lxvkq (&vsx_const);
+         if (imm)
+           {
+             operands[2] = GEN_INT (imm);
+             return "lxvkq %x0,%2";
+           }
+       }
+
        if (TARGET_P9_VECTOR
           && xxspltib_constant_p (vec, mode, &num_insns, &xxspltib_value))
         {
@@ -28587,6 +28598,57 @@ vec_const_128bit_to_bytes (rtx op,
    return true;
  }
  
+/* Determine if an IEEE 128-bit constant can be loaded with LXVKQ.  Return zero
+   if the LXVKQ instruction cannot be used.  Otherwise return the immediate
+   value to be used with the LXVKQ instruction.  */
+
+unsigned
+constant_generates_lxvkq (vec_const_128bit_type *vsx_const)
+{
+  /* Is the instruction supported with power10 code generation, IEEE 128-bit
+     floating point hardware and VSX registers are available.  */
+  if (!TARGET_IEEE128_CONSTANT || !TARGET_FLOAT128_HW || !TARGET_POWER10
+      || !TARGET_VSX)
+    return 0;
+
+  /* All of the constants that are generated by LXVKQ have the bottom 3 words
+     that are 0.  */
+  if (vsx_const->words[1] != 0
+      || vsx_const->words[2] != 0
+      || vsx_const->words[3] != 0)
+      return 0;
+
+  /* See if we have a match for the first word.  */
+  switch (vsx_const->words[0])
+    {
+    case 0x3FFF0000U: return 1;                /* IEEE 128-bit +1.0.  */
+    case 0x40000000U: return 2;                /* IEEE 128-bit +2.0.  */
+    case 0x40008000U: return 3;                /* IEEE 128-bit +3.0.  */
+    case 0x40010000U: return 4;                /* IEEE 128-bit +4.0.  */
+    case 0x40014000U: return 5;                /* IEEE 128-bit +5.0.  */
+    case 0x40018000U: return 6;                /* IEEE 128-bit +6.0.  */
+    case 0x4001C000U: return 7;                /* IEEE 128-bit +7.0.  */
+    case 0x7FFF0000U: return 8;                /* IEEE 128-bit +Infinity.  */
+    case 0x7FFF8000U: return 9;                /* IEEE 128-bit quiet NaN.  */
+    case 0x80000000U: return 16;       /* IEEE 128-bit -0.0.  */
+    case 0xBFFF0000U: return 17;       /* IEEE 128-bit -1.0.  */
+    case 0xC0000000U: return 18;       /* IEEE 128-bit -2.0.  */
+    case 0xC0008000U: return 19;       /* IEEE 128-bit -3.0.  */
+    case 0xC0010000U: return 20;       /* IEEE 128-bit -4.0.  */
+    case 0xC0014000U: return 21;       /* IEEE 128-bit -5.0.  */
+    case 0xC0018000U: return 22;       /* IEEE 128-bit -6.0.  */
+    case 0xC001C000U: return 23;       /* IEEE 128-bit -7.0.  */
+    case 0xFFFF0000U: return 24;       /* IEEE 128-bit -Infinity.  */
+
+      /* anything else cannot be loaded.  */
+    default:
+      break;
+    }
+
+  return 0;
+}
+
+\f
  struct gcc_target targetm = TARGET_INITIALIZER;
  
  #include "gt-rs6000.h"
diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt

index 9d7878f..b7433ec 100644 (file)
--- a/gcc/config/rs6000/rs6000.opt
+++ b/gcc/config/rs6000/rs6000.opt
@@ -640,6 +640,10 @@ mprivileged
  Target Var(rs6000_privileged) Init(0)
  Generate code that will run in privileged state.
  
+mieee128-constant
+Target Var(TARGET_IEEE128_CONSTANT) Init(1) Save
+Generate (do not generate) code that uses the LXVKQ instruction.
+
  -param=rs6000-density-pct-threshold=
  Target Undocumented Joined UInteger Var(rs6000_density_pct_threshold) Init(85) IntegerRange(0, 100) Param
  When costing for loop vectorization, we probably need to penalize the loop body
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md

index 83d6c7b..de04840 100644 (file)
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -1192,16 +1192,19 @@
  
  ;;              VSX store  VSX load   VSX move  VSX->GPR   GPR->VSX    LQ (GPR)
  ;;              STQ (GPR)  GPR load   GPR store GPR move   XXSPLTIB    VSPLTISW
+;;              LXVKQ
  ;;              VSX 0/-1   VMX const  GPR const LVX (VMX)  STVX (VMX)
  (define_insn "vsx_mov<mode>_64bit"
    [(set (match_operand:VSX_M 0 "nonimmediate_operand"
                 "=ZwO,      wa,        wa,        r,         we,        ?wQ,
                  ?&r,       ??r,       ??Y,       <??r>,     wa,        v,
+                wa,
                  ?wa,       v,         <??r>,     wZ,        v")
  
         (match_operand:VSX_M 1 "input_operand" 
                 "wa,        ZwO,       wa,        we,        r,         r,
                  wQ,        Y,         r,         r,         wE,        jwM,
+                eQ,
                  ?jwM,      W,         <nW>,      v,         wZ"))]
  
    "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
@@ -1213,35 +1216,43 @@
    [(set_attr "type"
                 "vecstore,  vecload,   vecsimple, mtvsr,     mfvsr,     load,
                  store,     load,      store,     *,         vecsimple, vecsimple,
+                vecperm,
                  vecsimple, *,         *,         vecstore,  vecload")
     (set_attr "num_insns"
                 "*,         *,         *,         2,         *,         2,
                  2,         2,         2,         2,         *,         *,
+                *,
                  *,         5,         2,         *,         *")
     (set_attr "max_prefixed_insns"
                 "*,         *,         *,         *,         *,         2,
                  2,         2,         2,         2,         *,         *,
+                *,
                  *,         *,         *,         *,         *")
     (set_attr "length"
                 "*,         *,         *,         8,         *,         8,
                  8,         8,         8,         8,         *,         *,
+                *,
                  *,         20,        8,         *,         *")
     (set_attr "isa"
                 "<VSisa>,   <VSisa>,   <VSisa>,   *,         *,         *,
                  *,         *,         *,         *,         p9v,       *,
+                p10,
                  <VSisa>,   *,         *,         *,         *")])
  
  ;;              VSX store  VSX load   VSX move   GPR load   GPR store  GPR move
+;;              LXVKQ
  ;;              XXSPLTIB   VSPLTISW   VSX 0/-1   VMX const  GPR const
  ;;              LVX (VMX)  STVX (VMX)
  (define_insn "*vsx_mov<mode>_32bit"
    [(set (match_operand:VSX_M 0 "nonimmediate_operand"
                 "=ZwO,      wa,        wa,        ??r,       ??Y,       <??r>,
+                wa,
                  wa,        v,         ?wa,       v,         <??r>,
                  wZ,        v")
  
         (match_operand:VSX_M 1 "input_operand" 
                 "wa,        ZwO,       wa,        Y,         r,         r,
+                eQ,
                  wE,        jwM,       ?jwM,      W,         <nW>,
                  v,         wZ"))]
  
@@ -1253,14 +1264,17 @@
  }
    [(set_attr "type"
                 "vecstore,  vecload,   vecsimple, load,      store,    *,
+                vecperm,
                  vecsimple, vecsimple, vecsimple, *,         *,
                  vecstore,  vecload")
     (set_attr "length"
                 "*,         *,         *,         16,        16,        16,
+                *,
                  *,         *,         *,         20,        16,
                  *,         *")
     (set_attr "isa"
                 "<VSisa>,   <VSisa>,   <VSisa>,   *,         *,         *,
+                p10,
                  p9v,       *,         <VSisa>,   *,         *,
                  *,         *")])
  
diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi

index 8fd0f8d..69cb7e3 100644 (file)
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -3336,6 +3336,10 @@ A constant whose negation is a signed 16-bit constant.
  @item eI
  A signed 34-bit integer constant if prefixed instructions are supported.
  
+@item eQ
+An IEEE 128-bit constant that can be loaded into a VSX register with
+the @code{lxvkq} instruction.
+
  @ifset INTERNALS
  @item G
  A floating point constant that can be loaded into a register with one
diff --git a/gcc/testsuite/gcc.target/powerpc/float128-constant.c b/gcc/testsuite/gcc.target/powerpc/float128-constant.c

new file mode 100644 (file)

index 0000000..e3286a7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/float128-constant.c
@@ -0,0 +1,160 @@
+/* { dg-require-effective-target ppc_float128_hw } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+
+/* Test whether the LXVKQ instruction is generated to load special IEEE 128-bit
+   constants.  */
+
+_Float128
+return_0 (void)
+{
+  return 0.0f128;                      /* XXSPLTIB 34,0.  */
+}
+
+_Float128
+return_1 (void)
+{
+  return 1.0f128;                      /* LXVKQ 34,1.  */
+}
+
+_Float128
+return_2 (void)
+{
+  return 2.0f128;                      /* LXVKQ 34,2.  */
+}
+
+_Float128
+return_3 (void)
+{
+  return 3.0f128;                      /* LXVKQ 34,3.  */
+}
+
+_Float128
+return_4 (void)
+{
+  return 4.0f128;                      /* LXVKQ 34,4.  */
+}
+
+_Float128
+return_5 (void)
+{
+  return 5.0f128;                      /* LXVKQ 34,5.  */
+}
+
+_Float128
+return_6 (void)
+{
+  return 6.0f128;                      /* LXVKQ 34,6.  */
+}
+
+_Float128
+return_7 (void)
+{
+  return 7.0f128;                      /* LXVKQ 34,7.  */
+}
+
+_Float128
+return_m0 (void)
+{
+  return -0.0f128;                     /* LXVKQ 34,16.  */
+}
+
+_Float128
+return_m1 (void)
+{
+  return -1.0f128;                     /* LXVKQ 34,17.  */
+}
+
+_Float128
+return_m2 (void)
+{
+  return -2.0f128;                     /* LXVKQ 34,18.  */
+}
+
+_Float128
+return_m3 (void)
+{
+  return -3.0f128;                     /* LXVKQ 34,19.  */
+}
+
+_Float128
+return_m4 (void)
+{
+  return -4.0f128;                     /* LXVKQ 34,20.  */
+}
+
+_Float128
+return_m5 (void)
+{
+  return -5.0f128;                     /* LXVKQ 34,21.  */
+}
+
+_Float128
+return_m6 (void)
+{
+  return -6.0f128;                     /* LXVKQ 34,22.  */
+}
+
+_Float128
+return_m7 (void)
+{
+  return -7.0f128;                     /* LXVKQ 34,23.  */
+}
+
+_Float128
+return_inf (void)
+{
+  return __builtin_inff128 ();         /* LXVKQ 34,8.  */
+}
+
+_Float128
+return_minf (void)
+{
+  return - __builtin_inff128 ();       /* LXVKQ 34,24.  */
+}
+
+_Float128
+return_nan (void)
+{
+  return __builtin_nanf128 ("");       /* LXVKQ 34,9.  */
+}
+
+/* Note, the following NaNs should not generate a LXVKQ instruction.  */
+_Float128
+return_mnan (void)
+{
+  return - __builtin_nanf128 ("");     /* PLXV 34,... */
+}
+
+_Float128
+return_nan2 (void)
+{
+  return __builtin_nanf128 ("1");      /* PLXV 34,... */
+}
+
+_Float128
+return_nans (void)
+{
+  return __builtin_nansf128 ("");      /* PLXV 34,... */
+}
+
+vector long long
+return_longlong_neg_0 (void)
+{
+  /* This vector is the same pattern as -0.0F128.  */
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+#define FIRST  0x8000000000000000
+#define SECOND 0x0000000000000000
+
+#else
+#define FIRST  0x0000000000000000
+#define SECOND 0x8000000000000000
+#endif
+
+  return (vector long long) { FIRST, SECOND }; /* LXVKQ 34,16.  */
+}
+
+/* { dg-final { scan-assembler-times {\mlxvkq\M}    19 } } */
+/* { dg-final { scan-assembler-times {\mplxv\M}      3 } } */
+/* { dg-final { scan-assembler-times {\mxxspltib\M}  1 } } */
+
author	Michael Meissner <meissner@linux.ibm.com>
	Wed, 15 Dec 2021 05:57:44 +0000 (00:57 -0500)
committer	Michael Meissner <meissner@linux.ibm.com>
	Wed, 15 Dec 2021 05:57:44 +0000 (00:57 -0500)
gcc/config/rs6000/constraints.md		patch \| blob \| history
gcc/config/rs6000/predicates.md		patch \| blob \| history
gcc/config/rs6000/rs6000-protos.h		patch \| blob \| history
gcc/config/rs6000/rs6000.c		patch \| blob \| history
gcc/config/rs6000/rs6000.opt		patch \| blob \| history
gcc/config/rs6000/vsx.md		patch \| blob \| history
gcc/doc/md.texi		patch \| blob \| history
gcc/testsuite/gcc.target/powerpc/float128-constant.c	[new file with mode: 0644]	patch \| blob