2015-05-26 Richard Biener <rguenther@suse.de>
authorrguenth <rguenth@138bc75d-0d04-0410-961f-82ee72b054a4>
Tue, 26 May 2015 09:03:53 +0000 (09:03 +0000)
committerrguenth <rguenth@138bc75d-0d04-0410-961f-82ee72b054a4>
Tue, 26 May 2015 09:03:53 +0000 (09:03 +0000)
* tree-vect-loop.c (vect_update_vf_for_slp): Split out from ...
(vect_analyze_loop_operations): ... here.  Remove slp parameter,
detect whether we apply SLP.  Remove call to
vect_update_slp_costs_according_to_vf.
(vect_analyze_loop_2): Call vect_update_vf_for_slp and
vect_update_slp_costs_according_to_vf from here.  Dispatch
to vect_slp_analyze_operations to analyze SLP stmts.
* tree-vect-slp.c (vect_slp_analyze_node_operations): Drop
unused bb_vec_info parameter, adjust assert.
(vect_slp_analyze_operations): Pass in the slp instance tree
instead of bb_vec_info.
(vect_slp_analyze_bb_1): Adjust call to vect_slp_analyze_operations.
* tree-vectorizer.h (vect_slp_analyze_operations): Declare.

git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@223670 138bc75d-0d04-0410-961f-82ee72b054a4

gcc/ChangeLog
gcc/tree-vect-loop.c
gcc/tree-vect-slp.c
gcc/tree-vectorizer.h

index a387c80..dab3e07 100644 (file)
@@ -1,3 +1,19 @@
+2015-05-26  Richard Biener  <rguenther@suse.de>
+
+       * tree-vect-loop.c (vect_update_vf_for_slp): Split out from ...
+       (vect_analyze_loop_operations): ... here.  Remove slp parameter,
+       detect whether we apply SLP.  Remove call to
+       vect_update_slp_costs_according_to_vf.
+       (vect_analyze_loop_2): Call vect_update_vf_for_slp and
+       vect_update_slp_costs_according_to_vf from here.  Dispatch
+       to vect_slp_analyze_operations to analyze SLP stmts.
+       * tree-vect-slp.c (vect_slp_analyze_node_operations): Drop
+       unused bb_vec_info parameter, adjust assert.
+       (vect_slp_analyze_operations): Pass in the slp instance tree
+       instead of bb_vec_info.
+       (vect_slp_analyze_bb_1): Adjust call to vect_slp_analyze_operations.
+       * tree-vectorizer.h (vect_slp_analyze_operations): Declare.
+
 2015-05-25  Alexander Monakov  <amonakov@ispras.ru>
 
        * config/i386/i386.h (enum reg_class): Move CLOBBERED_REGS prior to
index 2c983b8..89202c4 100644 (file)
@@ -1355,25 +1355,85 @@ vect_analyze_loop_form (struct loop *loop)
   return loop_vinfo;
 }
 
+/* Scan the loop stmts and dependent on whether there are any (non-)SLP
+   statements update the vectorization factor.  */
+
+static void
+vect_update_vf_for_slp (loop_vec_info loop_vinfo)
+{
+  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+  basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
+  int nbbs = loop->num_nodes;
+  unsigned int vectorization_factor;
+  int i;
+
+  if (dump_enabled_p ())
+    dump_printf_loc (MSG_NOTE, vect_location,
+                    "=== vect_update_vf_for_slp ===\n");
+
+  vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+  gcc_assert (vectorization_factor != 0);
+
+  /* If all the stmts in the loop can be SLPed, we perform only SLP, and
+     vectorization factor of the loop is the unrolling factor required by
+     the SLP instances.  If that unrolling factor is 1, we say, that we
+     perform pure SLP on loop - cross iteration parallelism is not
+     exploited.  */
+  bool only_slp_in_loop = true;
+  for (i = 0; i < nbbs; i++)
+    {
+      basic_block bb = bbs[i];
+      for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si);
+          gsi_next (&si))
+       {
+         gimple stmt = gsi_stmt (si);
+         stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+         if (STMT_VINFO_IN_PATTERN_P (stmt_info)
+             && STMT_VINFO_RELATED_STMT (stmt_info))
+           {
+             stmt = STMT_VINFO_RELATED_STMT (stmt_info);
+             stmt_info = vinfo_for_stmt (stmt);
+           }
+         if ((STMT_VINFO_RELEVANT_P (stmt_info)
+              || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
+             && !PURE_SLP_STMT (stmt_info))
+           /* STMT needs both SLP and loop-based vectorization.  */
+           only_slp_in_loop = false;
+       }
+    }
+
+  if (only_slp_in_loop)
+    vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo);
+  else
+    vectorization_factor
+      = least_common_multiple (vectorization_factor,
+                              LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo));
+
+  LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
+  if (dump_enabled_p ())
+    dump_printf_loc (MSG_NOTE, vect_location,
+                    "Updating vectorization factor to %d\n",
+                    vectorization_factor);
+}
 
 /* Function vect_analyze_loop_operations.
 
    Scan the loop stmts and make sure they are all vectorizable.  */
 
 static bool
-vect_analyze_loop_operations (loop_vec_info loop_vinfo, bool slp)
+vect_analyze_loop_operations (loop_vec_info loop_vinfo)
 {
   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
   int nbbs = loop->num_nodes;
-  unsigned int vectorization_factor = 0;
+  unsigned int vectorization_factor;
   int i;
   stmt_vec_info stmt_info;
   bool need_to_vectorize = false;
   int min_profitable_iters;
   int min_scalar_loop_bound;
   unsigned int th;
-  bool only_slp_in_loop = true, ok;
+  bool ok;
   HOST_WIDE_INT max_niter;
   HOST_WIDE_INT estimated_niter;
   int min_profitable_estimate;
@@ -1382,50 +1442,6 @@ vect_analyze_loop_operations (loop_vec_info loop_vinfo, bool slp)
     dump_printf_loc (MSG_NOTE, vect_location,
                     "=== vect_analyze_loop_operations ===\n");
 
-  gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
-  vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
-  if (slp)
-    {
-      /* If all the stmts in the loop can be SLPed, we perform only SLP, and
-        vectorization factor of the loop is the unrolling factor required by
-        the SLP instances.  If that unrolling factor is 1, we say, that we
-        perform pure SLP on loop - cross iteration parallelism is not
-        exploited.  */
-      for (i = 0; i < nbbs; i++)
-       {
-         basic_block bb = bbs[i];
-         for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si);
-              gsi_next (&si))
-           {
-             gimple stmt = gsi_stmt (si);
-             stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
-             if (STMT_VINFO_IN_PATTERN_P (stmt_info)
-                 && STMT_VINFO_RELATED_STMT (stmt_info))
-               {
-                 stmt = STMT_VINFO_RELATED_STMT (stmt_info);
-                 stmt_info = vinfo_for_stmt (stmt);
-               }
-             if ((STMT_VINFO_RELEVANT_P (stmt_info)
-                  || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
-                 && !PURE_SLP_STMT (stmt_info))
-               /* STMT needs both SLP and loop-based vectorization.  */
-               only_slp_in_loop = false;
-           }
-       }
-
-      if (only_slp_in_loop)
-       vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo);
-      else
-       vectorization_factor = least_common_multiple (vectorization_factor,
-                               LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo));
-
-      LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
-      if (dump_enabled_p ())
-       dump_printf_loc (MSG_NOTE, vect_location,
-                        "Updating vectorization factor to %d\n",
-                        vectorization_factor);
-    }
-
   for (i = 0; i < nbbs; i++)
     {
       basic_block bb = bbs[i];
@@ -1540,6 +1556,11 @@ vect_analyze_loop_operations (loop_vec_info loop_vinfo, bool slp)
           gsi_next (&si))
         {
           gimple stmt = gsi_stmt (si);
+         if (STMT_SLP_TYPE (vinfo_for_stmt (stmt)))
+           {
+             need_to_vectorize = true;
+             continue;
+           }
          if (!gimple_clobber_p (stmt)
              && !vect_analyze_stmt (stmt, &need_to_vectorize, NULL))
            return false;
@@ -1563,6 +1584,9 @@ vect_analyze_loop_operations (loop_vec_info loop_vinfo, bool slp)
       return false;
     }
 
+  vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+  gcc_assert (vectorization_factor != 0);
+
   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) && dump_enabled_p ())
     dump_printf_loc (MSG_NOTE, vect_location,
                     "vectorization_factor = %d, niters = "
@@ -1586,10 +1610,6 @@ vect_analyze_loop_operations (loop_vec_info loop_vinfo, bool slp)
 
   /* Analyze cost.  Decide if worth while to vectorize.  */
 
-  /* Once VF is set, SLP costs should be updated since the number of created
-     vector stmts depends on VF.  */
-  vect_update_slp_costs_according_to_vf (loop_vinfo);
-
   vect_estimate_min_profitable_iters (loop_vinfo, &min_profitable_iters,
                                      &min_profitable_estimate);
   LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo) = min_profitable_iters;
@@ -1664,7 +1684,7 @@ vect_analyze_loop_operations (loop_vec_info loop_vinfo, bool slp)
 static bool
 vect_analyze_loop_2 (loop_vec_info loop_vinfo)
 {
-  bool ok, slp = false;
+  bool ok;
   int max_vf = MAX_VECTORIZATION_FACTOR;
   int min_vf = 2;
   unsigned int th;
@@ -1790,19 +1810,34 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo)
   ok = vect_analyze_slp (loop_vinfo, NULL, n_stmts);
   if (ok)
     {
-      /* Decide which possible SLP instances to SLP.  */
-      slp = vect_make_slp_decision (loop_vinfo);
-
-      /* Find stmts that need to be both vectorized and SLPed.  */
-      vect_detect_hybrid_slp (loop_vinfo);
+      /* If there are any SLP instances mark them as pure_slp.  */
+      if (vect_make_slp_decision (loop_vinfo))
+       {
+         /* Find stmts that need to be both vectorized and SLPed.  */
+         vect_detect_hybrid_slp (loop_vinfo);
+
+         /* Update the vectorization factor based on the SLP decision.  */
+         vect_update_vf_for_slp (loop_vinfo);
+
+         /* Once VF is set, SLP costs should be updated since the number of
+            created vector stmts depends on VF.  */
+         vect_update_slp_costs_according_to_vf (loop_vinfo);
+
+         /* Analyze operations in the SLP instances.  Note this may
+            remove unsupported SLP instances which makes the above
+            SLP kind detection invalid.  */
+         unsigned old_size = LOOP_VINFO_SLP_INSTANCES (loop_vinfo).length ();
+         vect_slp_analyze_operations (LOOP_VINFO_SLP_INSTANCES (loop_vinfo));
+         if (LOOP_VINFO_SLP_INSTANCES (loop_vinfo).length () != old_size)
+           return false;
+       }
     }
   else
     return false;
 
-  /* Scan all the operations in the loop and make sure they are
-     vectorizable.  */
-
-  ok = vect_analyze_loop_operations (loop_vinfo, slp);
+  /* Scan all the remaining operations in the loop that are not subject
+     to SLP and make sure they are vectorizable.  */
+  ok = vect_analyze_loop_operations (loop_vinfo);
   if (!ok)
     {
       if (dump_enabled_p ())
index 1c51990..1e68020 100644 (file)
@@ -2191,7 +2191,7 @@ destroy_bb_vec_info (bb_vec_info bb_vinfo)
    the subtree. Return TRUE if the operations are supported.  */
 
 static bool
-vect_slp_analyze_node_operations (bb_vec_info bb_vinfo, slp_tree node)
+vect_slp_analyze_node_operations (slp_tree node)
 {
   bool dummy;
   int i;
@@ -2202,17 +2202,17 @@ vect_slp_analyze_node_operations (bb_vec_info bb_vinfo, slp_tree node)
     return true;
 
   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
-    if (!vect_slp_analyze_node_operations (bb_vinfo, child))
+    if (!vect_slp_analyze_node_operations (child))
       return false;
 
   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
     {
       stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
       gcc_assert (stmt_info);
-      gcc_assert (PURE_SLP_STMT (stmt_info));
+      gcc_assert (STMT_SLP_TYPE (stmt_info) != loop_vect);
 
       if (!vect_analyze_stmt (stmt, &dummy, node))
-        return false;
+       return false;
     }
 
   return true;
@@ -2222,19 +2222,26 @@ vect_slp_analyze_node_operations (bb_vec_info bb_vinfo, slp_tree node)
 /* Analyze statements in SLP instances of the basic block.  Return TRUE if the
    operations are supported. */
 
-static bool
-vect_slp_analyze_operations (bb_vec_info bb_vinfo)
+bool
+vect_slp_analyze_operations (vec<slp_instance> slp_instances)
 {
-  vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
   slp_instance instance;
   int i;
 
+  if (dump_enabled_p ())
+    dump_printf_loc (MSG_NOTE, vect_location,
+                    "=== vect_slp_analyze_operations ===\n");
+
   for (i = 0; slp_instances.iterate (i, &instance); )
     {
-      if (!vect_slp_analyze_node_operations (bb_vinfo,
-                                             SLP_INSTANCE_TREE (instance)))
+      if (!vect_slp_analyze_node_operations (SLP_INSTANCE_TREE (instance)))
         {
-         vect_free_slp_instance (instance);
+         dump_printf_loc (MSG_NOTE, vect_location,
+                          "removing SLP instance operations starting from: ");
+         dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
+                           SLP_TREE_SCALAR_STMTS
+                             (SLP_INSTANCE_TREE (instance))[0], 0);
+         vect_free_slp_instance (instance);
           slp_instances.ordered_remove (i);
        }
       else
@@ -2498,7 +2505,7 @@ vect_slp_analyze_bb_1 (basic_block bb)
       return NULL;
     }
 
-  if (!vect_slp_analyze_operations (bb_vinfo))
+  if (!vect_slp_analyze_operations (BB_VINFO_SLP_INSTANCES (bb_vinfo)))
     {
       if (dump_enabled_p ())
         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
index adde2fb..ae795a9 100644 (file)
@@ -1114,6 +1114,7 @@ extern void vect_free_slp_instance (slp_instance);
 extern bool vect_transform_slp_perm_load (slp_tree, vec<tree> ,
                                           gimple_stmt_iterator *, int,
                                           slp_instance, bool);
+extern bool vect_slp_analyze_operations (vec<slp_instance> slp_instances);
 extern bool vect_schedule_slp (loop_vec_info, bb_vec_info);
 extern void vect_update_slp_costs_according_to_vf (loop_vec_info);
 extern bool vect_analyze_slp (loop_vec_info, bb_vec_info, unsigned);