cfgloop.h (struct loop): Add simdlen member.

author Jakub Jelinek <jakub@redhat.com>

Mon, 20 May 2019 09:49:07 +0000 (11:49 +0200)

committer Jakub Jelinek <jakub@gcc.gnu.org>

Mon, 20 May 2019 09:49:07 +0000 (11:49 +0200)
author Jakub Jelinek <jakub@redhat.com>
Mon, 20 May 2019 09:49:07 +0000 (11:49 +0200)
committer Jakub Jelinek <jakub@gcc.gnu.org>
Mon, 20 May 2019 09:49:07 +0000 (11:49 +0200)
diff --git a/gcc/ChangeLog b/gcc/ChangeLog

index e07f8a1..682d5f7 100644 (file)
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,34 @@
+2019-05-20  Jakub Jelinek  <jakub@redhat.com>
+
+       * cfgloop.h (struct loop): Add simdlen member.
+       * cfgloopmanip.c (copy_loop_info): Copy simdlen as well.
+       * omp-expand.c (expand_omp_simd): Set it if simdlen clause is present.
+       * tree-vect-loop.c (vect_analyze_loop): Pass loop->simdlen != 0
+       as new argument to autovectorize_vector_sizes target hook.  If
+       loop->simdlen, pick up vector size where the vectorization factor
+       is equal to loop->simd, and if there is none, fall back to the first
+       successful one.
+       (vect_transform_loop): Adjust autovectorize_vector_sizes target hook
+       caller.
+       * omp-low.c (omp_clause_aligned_alignment): Likewise.
+       * omp-general.c (omp_max_vf): Likewise.
+       * optabs-query.c (can_vec_mask_load_store_p): Likewise.
+       * tree-vect-slp.c (vect_slp_bb): Likewise.
+       * target.def (autovectorize_vector_sizes): Add ALL argument and
+       document it.
+       * doc/tm.texi: Adjust documentation.
+       * targhooks.c (default_autovectorize_vector_sizes): Add bool argument.
+       * targhooks.h (default_autovectorize_vector_sizes): Likewise.
+       * config/aarch64/aarch64.c (aarch64_autovectorize_vector_sizes): Add
+       bool argument.
+       * config/arc/arc.c (arc_autovectorize_vector_sizes): Likewise.
+       * config/arm/arm.c (arm_autovectorize_vector_sizes): Likewise.
+       * config/mips/mips.c (mips_autovectorize_vector_sizes): Likewise.
+       * config/i386/i386.c (ix86_autovectorize_vector_sizes): Likewise.  If
+       true and TARGET_AVX512F or TARGET_AVX, push 3 or 2 sizes even if
+       preferred vector size is not 512-bit or 256-bit, just put those
+       unpreferred ones last.
+
  2019-05-20  Martin Liska  <mliska@suse.cz>
  
         * targhooks.c (default_libc_has_fast_function): New function.
diff --git a/gcc/cfgloop.h b/gcc/cfgloop.h

index e82cd7a..2f8ab10 100644 (file)
--- a/gcc/cfgloop.h
+++ b/gcc/cfgloop.h
@@ -174,6 +174,9 @@ struct GTY ((chain_next ("%h.next"))) loop {
       of the loop can be safely evaluated concurrently.  */
    int safelen;
  
+  /* Preferred vectorization factor for the loop if non-zero.  */
+  int simdlen;
+
    /* Constraints are generally set by consumers and affect certain
       semantics of niter analyzer APIs.  Currently the APIs affected are
       number_of_iterations_exit* functions and their callers.  One typical
diff --git a/gcc/cfgloopmanip.c b/gcc/cfgloopmanip.c

index bfee48e..50250ec 100644 (file)
--- a/gcc/cfgloopmanip.c
+++ b/gcc/cfgloopmanip.c
@@ -1016,6 +1016,7 @@ copy_loop_info (struct loop *loop, struct loop *target)
    target->nb_iterations_estimate = loop->nb_iterations_estimate;
    target->estimate_state = loop->estimate_state;
    target->safelen = loop->safelen;
+  target->simdlen = loop->simdlen;
    target->constraints = loop->constraints;
    target->can_be_parallel = loop->can_be_parallel;
    target->warned_aggressive_loop_optimizations
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c

index 971c4d0..8a290dc 100644 (file)
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -14109,7 +14109,7 @@ aarch64_preferred_simd_mode (scalar_mode mode)
  /* Return a list of possible vector sizes for the vectorizer
     to iterate over.  */
  static void
-aarch64_autovectorize_vector_sizes (vector_sizes *sizes)
+aarch64_autovectorize_vector_sizes (vector_sizes *sizes, bool)
  {
    if (TARGET_SVE)
      sizes->safe_push (BYTES_PER_SVE_VECTOR);
diff --git a/gcc/config/arc/arc.c b/gcc/config/arc/arc.c

index 1633d01..bce1899 100644 (file)
--- a/gcc/config/arc/arc.c
+++ b/gcc/config/arc/arc.c
@@ -480,7 +480,7 @@ arc_preferred_simd_mode (scalar_mode mode)
     TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES.  */
  
  static void
-arc_autovectorize_vector_sizes (vector_sizes *sizes)
+arc_autovectorize_vector_sizes (vector_sizes *sizes, bool)
  {
    if (TARGET_PLUS_QMACW)
      {
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c

index 1d3be26..e3e71ea 100644 (file)
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -288,7 +288,7 @@ static bool arm_builtin_support_vector_misalignment (machine_mode mode,
  static void arm_conditional_register_usage (void);
  static enum flt_eval_method arm_excess_precision (enum excess_precision_type);
  static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
-static void arm_autovectorize_vector_sizes (vector_sizes *);
+static void arm_autovectorize_vector_sizes (vector_sizes *, bool);
  static int arm_default_branch_cost (bool, bool);
  static int arm_cortex_a5_branch_cost (bool, bool);
  static int arm_cortex_m_branch_cost (bool, bool);
@@ -28351,7 +28351,7 @@ arm_vector_alignment (const_tree type)
  }
  
  static void
-arm_autovectorize_vector_sizes (vector_sizes *sizes)
+arm_autovectorize_vector_sizes (vector_sizes *sizes, bool)
  {
    if (!TARGET_NEON_VECTORIZE_DOUBLE)
      {
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c

index 384c633..696a474 100644 (file)
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -21332,7 +21332,7 @@ ix86_preferred_simd_mode (scalar_mode mode)
     256bit and 128bit vectors.  */
  
  static void
-ix86_autovectorize_vector_sizes (vector_sizes *sizes)
+ix86_autovectorize_vector_sizes (vector_sizes *sizes, bool all)
  {
    if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
      {
@@ -21340,11 +21340,22 @@ ix86_autovectorize_vector_sizes (vector_sizes *sizes)
        sizes->safe_push (32);
        sizes->safe_push (16);
      }
+  else if (TARGET_AVX512F && all)
+    {
+      sizes->safe_push (32);
+      sizes->safe_push (16);
+      sizes->safe_push (64);
+    }
    else if (TARGET_AVX && !TARGET_PREFER_AVX128)
      {
        sizes->safe_push (32);
        sizes->safe_push (16);
      }
+  else if (TARGET_AVX && all)
+    {
+      sizes->safe_push (16);
+      sizes->safe_push (32);
+    }
  }
  
  /* Implemenation of targetm.vectorize.get_mask_mode.  */
diff --git a/gcc/config/mips/mips.c b/gcc/config/mips/mips.c

index 42cafed..6eafe3d 100644 (file)
--- a/gcc/config/mips/mips.c
+++ b/gcc/config/mips/mips.c
@@ -13460,7 +13460,7 @@ mips_preferred_simd_mode (scalar_mode mode)
  /* Implement TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES.  */
  
  static void
-mips_autovectorize_vector_sizes (vector_sizes *sizes)
+mips_autovectorize_vector_sizes (vector_sizes *sizes, bool)
  {
    if (ISA_HAS_MSA)
      sizes->safe_push (16);
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi

index 0941039..622e8cf 100644 (file)
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -6021,11 +6021,13 @@ against lower halves of vectors recursively until the specified mode is
  reached.  The default is @var{mode} which means no splitting.
  @end deftypefn
  
-@deftypefn {Target Hook} void TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES (vector_sizes *@var{sizes})
+@deftypefn {Target Hook} void TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES (vector_sizes *@var{sizes}, bool @var{all})
  If the mode returned by @code{TARGET_VECTORIZE_PREFERRED_SIMD_MODE} is not
  the only one that is worth considering, this hook should add all suitable
  vector sizes to @var{sizes}, in order of decreasing preference.  The first
  one should be the size of @code{TARGET_VECTORIZE_PREFERRED_SIMD_MODE}.
+If @var{all} is true, add suitable vector sizes even when they are generally
+not expected to be worthwhile.
  
  The hook does not need to do anything if the vector returned by
  @code{TARGET_VECTORIZE_PREFERRED_SIMD_MODE} is the only one relevant
diff --git a/gcc/omp-expand.c b/gcc/omp-expand.c

index 7415973..0d7f104 100644 (file)
--- a/gcc/omp-expand.c
+++ b/gcc/omp-expand.c
@@ -4974,6 +4974,13 @@ expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
           && loop->safelen > 1)
         {
           loop->force_vectorize = true;
+         if (simdlen && tree_fits_uhwi_p (OMP_CLAUSE_SIMDLEN_EXPR (simdlen)))
+           {
+             unsigned HOST_WIDE_INT v
+               = tree_to_uhwi (OMP_CLAUSE_SIMDLEN_EXPR (simdlen));
+             if (v < INT_MAX && v <= (unsigned HOST_WIDE_INT) loop->safelen)
+               loop->simdlen = v;
+           }
           cfun->has_force_vectorize_loops = true;
         }
        else if (dont_vectorize)
diff --git a/gcc/omp-general.c b/gcc/omp-general.c

index 82f0a04..4a9b15c 100644 (file)
--- a/gcc/omp-general.c
+++ b/gcc/omp-general.c
@@ -469,7 +469,7 @@ omp_max_vf (void)
      return 1;
  
    auto_vector_sizes sizes;
-  targetm.vectorize.autovectorize_vector_sizes (&sizes);
+  targetm.vectorize.autovectorize_vector_sizes (&sizes, true);
    if (!sizes.is_empty ())
      {
        poly_uint64 vf = 0;
diff --git a/gcc/omp-low.c b/gcc/omp-low.c

index 04fc5f6..26ee70d 100644 (file)
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -3600,7 +3600,7 @@ omp_clause_aligned_alignment (tree clause)
    unsigned int al = 1;
    opt_scalar_mode mode_iter;
    auto_vector_sizes sizes;
-  targetm.vectorize.autovectorize_vector_sizes (&sizes);
+  targetm.vectorize.autovectorize_vector_sizes (&sizes, true);
    poly_uint64 vs = 0;
    for (unsigned int i = 0; i < sizes.length (); ++i)
      vs = ordered_max (vs, sizes[i]);
diff --git a/gcc/optabs-query.c b/gcc/optabs-query.c

index 71c73fb..04c8d08 100644 (file)
--- a/gcc/optabs-query.c
+++ b/gcc/optabs-query.c
@@ -593,7 +593,7 @@ can_vec_mask_load_store_p (machine_mode mode,
      return true;
  
    auto_vector_sizes vector_sizes;
-  targetm.vectorize.autovectorize_vector_sizes (&vector_sizes);
+  targetm.vectorize.autovectorize_vector_sizes (&vector_sizes, true);
    for (unsigned int i = 0; i < vector_sizes.length (); ++i)
      {
        poly_uint64 cur = vector_sizes[i];
diff --git a/gcc/target.def b/gcc/target.def

index 23e260c..7d52102 100644 (file)
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -1899,12 +1899,14 @@ DEFHOOK
  the only one that is worth considering, this hook should add all suitable\n\
  vector sizes to @var{sizes}, in order of decreasing preference.  The first\n\
  one should be the size of @code{TARGET_VECTORIZE_PREFERRED_SIMD_MODE}.\n\
+If @var{all} is true, add suitable vector sizes even when they are generally\n\
+not expected to be worthwhile.\n\
  \n\
  The hook does not need to do anything if the vector returned by\n\
  @code{TARGET_VECTORIZE_PREFERRED_SIMD_MODE} is the only one relevant\n\
  for autovectorization.  The default implementation does nothing.",
   void,
- (vector_sizes *sizes),
+ (vector_sizes *sizes, bool all),
   default_autovectorize_vector_sizes)
  
  /* Function to get a target mode for a vector mask.  */
diff --git a/gcc/targhooks.c b/gcc/targhooks.c

index d820618..b271116 100644 (file)
--- a/gcc/targhooks.c
+++ b/gcc/targhooks.c
@@ -1316,7 +1316,7 @@ default_split_reduction (machine_mode mode)
     is tried.  */
  
  void
-default_autovectorize_vector_sizes (vector_sizes *)
+default_autovectorize_vector_sizes (vector_sizes *, bool)
  {
  }
  
diff --git a/gcc/targhooks.h b/gcc/targhooks.h

index 810c2b1..229aacd 100644 (file)
--- a/gcc/targhooks.h
+++ b/gcc/targhooks.h
@@ -110,7 +110,7 @@ default_builtin_support_vector_misalignment (machine_mode mode,
                                              int, bool);
  extern machine_mode default_preferred_simd_mode (scalar_mode mode);
  extern machine_mode default_split_reduction (machine_mode);
-extern void default_autovectorize_vector_sizes (vector_sizes *);
+extern void default_autovectorize_vector_sizes (vector_sizes *, bool);
  extern opt_machine_mode default_get_mask_mode (poly_uint64, poly_uint64);
  extern bool default_empty_mask_is_expensive (unsigned);
  extern void *default_init_cost (struct loop *);
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog

index 9b1e6d3..3ecff36 100644 (file)
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,7 @@
+2019-05-20  Jakub Jelinek  <jakub@redhat.com>
+
+       * gcc.target/i386/avx512f-simd-1.c: New test.
+
  2019-05-20  Christophe Lyon  <christophe.lyon@linaro.org>
  
         * gcc.target/aarch64/target_attr_10.c: Add quotes to expected
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-simd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-simd-1.c

new file mode 100644 (file)

index 0000000..235fb91
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-simd-1.c
@@ -0,0 +1,35 @@
+/* { dg-do compile } */
+/* { dg-options "-fopenmp-simd -O2 -mavx512f -masm=att" } */
+/* { dg-final { scan-assembler "vpadd\[^\n\r]*%xmm" } } */
+/* { dg-final { scan-assembler "vpadd\[^\n\r]*%ymm" } } */
+/* { dg-final { scan-assembler "vpadd\[^\n\r]*%zmm" } } */
+
+#define N 1024
+int a[N];
+
+void
+f1 (void)
+{
+  int i;
+  #pragma omp simd simdlen (4)
+  for (i = 0; i < N; ++i)
+    a[i] = a[i] + 1;
+}
+
+void
+f2 (void)
+{
+  int i;
+  #pragma omp simd simdlen (8)
+  for (i = 0; i < N; ++i)
+    a[i] = a[i] + 2;
+}
+
+void
+f3 (void)
+{
+  int i;
+  #pragma omp simd simdlen (16)
+  for (i = 0; i < N; ++i)
+    a[i] = a[i] + 3;
+}
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c

index 5776417..e1229a5 100644 (file)
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -2254,7 +2254,8 @@ vect_analyze_loop (struct loop *loop, loop_vec_info orig_loop_vinfo,
  
    /* Autodetect first vector size we try.  */
    current_vector_size = 0;
-  targetm.vectorize.autovectorize_vector_sizes (&vector_sizes);
+  targetm.vectorize.autovectorize_vector_sizes (&vector_sizes,
+                                               loop->simdlen != 0);
    unsigned int next_size = 0;
  
    DUMP_VECT_SCOPE ("analyze_loop_nest");
@@ -2273,6 +2274,8 @@ vect_analyze_loop (struct loop *loop, loop_vec_info orig_loop_vinfo,
  
    unsigned n_stmts = 0;
    poly_uint64 autodetected_vector_size = 0;
+  opt_loop_vec_info first_loop_vinfo = opt_loop_vec_info::success (NULL);
+  poly_uint64 first_vector_size = 0;
    while (1)
      {
        /* Check the CFG characteristics of the loop (nesting, entry/exit).  */
@@ -2283,6 +2286,7 @@ vect_analyze_loop (struct loop *loop, loop_vec_info orig_loop_vinfo,
           if (dump_enabled_p ())
             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
                              "bad loop form.\n");
+         gcc_checking_assert (first_loop_vinfo == NULL);
           return loop_vinfo;
         }
  
@@ -2296,10 +2300,27 @@ vect_analyze_loop (struct loop *loop, loop_vec_info orig_loop_vinfo,
         {
           LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
  
-         return loop_vinfo;
+         if (loop->simdlen
+             && maybe_ne (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
+                          (unsigned HOST_WIDE_INT) loop->simdlen))
+           {
+             if (first_loop_vinfo == NULL)
+               {
+                 first_loop_vinfo = loop_vinfo;
+                 first_vector_size = current_vector_size;
+                 loop->aux = NULL;
+               }
+             else
+               delete loop_vinfo;
+           }
+         else
+           {
+             delete first_loop_vinfo;
+             return loop_vinfo;
+           }
         }
-
-      delete loop_vinfo;
+      else
+       delete loop_vinfo;
  
        if (next_size == 0)
         autodetected_vector_size = current_vector_size;
@@ -2308,10 +2329,31 @@ vect_analyze_loop (struct loop *loop, loop_vec_info orig_loop_vinfo,
           && known_eq (vector_sizes[next_size], autodetected_vector_size))
         next_size += 1;
  
-      if (fatal
-         || next_size == vector_sizes.length ()
+      if (fatal)
+       {
+         gcc_checking_assert (first_loop_vinfo == NULL);
+         return opt_loop_vec_info::propagate_failure (res);
+       }
+
+      if (next_size == vector_sizes.length ()
           || known_eq (current_vector_size, 0U))
-       return opt_loop_vec_info::propagate_failure (res);
+       {
+         if (first_loop_vinfo)
+           {
+             current_vector_size = first_vector_size;
+             loop->aux = (loop_vec_info) first_loop_vinfo;
+             if (dump_enabled_p ())
+               {
+                 dump_printf_loc (MSG_NOTE, vect_location,
+                                  "***** Choosing vector size ");
+                 dump_dec (MSG_NOTE, current_vector_size);
+                 dump_printf (MSG_NOTE, "\n");
+               }
+             return first_loop_vinfo;
+           }
+         else
+           return opt_loop_vec_info::propagate_failure (res);
+       }
  
        /* Try the next biggest vector size.  */
        current_vector_size = vector_sizes[next_size++];
@@ -8670,7 +8712,7 @@ vect_transform_loop (loop_vec_info loop_vinfo)
    if (epilogue)
      {
        auto_vector_sizes vector_sizes;
-      targetm.vectorize.autovectorize_vector_sizes (&vector_sizes);
+      targetm.vectorize.autovectorize_vector_sizes (&vector_sizes, false);
        unsigned int next_size = 0;
  
        /* Note LOOP_VINFO_NITERS_KNOWN_P and LOOP_VINFO_INT_NITERS work
diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c

index 52c7b47..2810228 100644 (file)
--- a/gcc/tree-vect-slp.c
+++ b/gcc/tree-vect-slp.c
@@ -2983,7 +2983,7 @@ vect_slp_bb (basic_block bb)
  
    /* Autodetect first vector size we try.  */
    current_vector_size = 0;
-  targetm.vectorize.autovectorize_vector_sizes (&vector_sizes);
+  targetm.vectorize.autovectorize_vector_sizes (&vector_sizes, false);
    unsigned int next_size = 0;
  
    gsi = gsi_start_bb (bb);
author	Jakub Jelinek <jakub@redhat.com>
	Mon, 20 May 2019 09:49:07 +0000 (11:49 +0200)
committer	Jakub Jelinek <jakub@gcc.gnu.org>
	Mon, 20 May 2019 09:49:07 +0000 (11:49 +0200)
gcc/ChangeLog		patch \| blob \| history
gcc/cfgloop.h		patch \| blob \| history
gcc/cfgloopmanip.c		patch \| blob \| history
gcc/config/aarch64/aarch64.c		patch \| blob \| history
gcc/config/arc/arc.c		patch \| blob \| history
gcc/config/arm/arm.c		patch \| blob \| history
gcc/config/i386/i386.c		patch \| blob \| history
gcc/config/mips/mips.c		patch \| blob \| history
gcc/doc/tm.texi		patch \| blob \| history
gcc/omp-expand.c		patch \| blob \| history
gcc/omp-general.c		patch \| blob \| history
gcc/omp-low.c		patch \| blob \| history
gcc/optabs-query.c		patch \| blob \| history
gcc/target.def		patch \| blob \| history
gcc/targhooks.c		patch \| blob \| history
gcc/targhooks.h		patch \| blob \| history
gcc/testsuite/ChangeLog		patch \| blob \| history
gcc/testsuite/gcc.target/i386/avx512f-simd-1.c	[new file with mode: 0644]	patch \| blob
gcc/tree-vect-loop.c		patch \| blob \| history
gcc/tree-vect-slp.c		patch \| blob \| history