re PR tree-optimization/83202 (Try joining operations on consecutive array elements...
authorRichard Biener <rguenther@suse.de>
Wed, 29 Nov 2017 14:38:06 +0000 (14:38 +0000)
committerRichard Biener <rguenth@gcc.gnu.org>
Wed, 29 Nov 2017 14:38:06 +0000 (14:38 +0000)
2017-11-29  Richard Biener  <rguenther@suse.de>

PR tree-optimization/83202
* tree-vect-slp.c (scalar_stmts_set_t): New typedef.
(bst_fail): Use it.
(vect_analyze_slp_cost_1): Add visited set, do not account SLP
nodes vectorized to the same stmts multiple times.
(vect_analyze_slp_cost): Allocate a visited set and pass it down.
(vect_analyze_slp_instance): Adjust.
(scalar_stmts_to_slp_tree_map_t): New typedef.
(vect_schedule_slp_instance): Add a map recording the SLP node
representing the vectorized stmts for a set of scalar stmts.
Avoid code-generating redundancies.
(vect_schedule_slp): Allocate map and pass it down.

* gcc.dg/vect/costmodel/x86_64/costmodel-pr83202.c: New testcase.

From-SVN: r255233

gcc/ChangeLog
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-pr83202.c [new file with mode: 0644]
gcc/tree-vect-slp.c

index 2b93243..003b6b1 100644 (file)
@@ -1,3 +1,18 @@
+2017-11-29  Richard Biener  <rguenther@suse.de>
+
+       PR tree-optimization/83202
+       * tree-vect-slp.c (scalar_stmts_set_t): New typedef.
+       (bst_fail): Use it.
+       (vect_analyze_slp_cost_1): Add visited set, do not account SLP
+       nodes vectorized to the same stmts multiple times.
+       (vect_analyze_slp_cost): Allocate a visited set and pass it down.
+       (vect_analyze_slp_instance): Adjust.
+       (scalar_stmts_to_slp_tree_map_t): New typedef.
+       (vect_schedule_slp_instance): Add a map recording the SLP node
+       representing the vectorized stmts for a set of scalar stmts.
+       Avoid code-generating redundancies.
+       (vect_schedule_slp): Allocate map and pass it down.
+
 2017-11-29  Nathan Sidwell  <nathan@acm.org>
 
        PR c++/83187
index 3b90be1..bf1e374 100644 (file)
@@ -1,3 +1,8 @@
+2017-11-29  Richard Biener  <rguenther@suse.de>
+
+       PR tree-optimization/83202
+       * gcc.dg/vect/costmodel/x86_64/costmodel-pr83202.c: New testcase.
+
 2017-11-29  Nathan Sidwell  <nathan@acm.org>
 
        PR c++/83187
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-pr83202.c b/gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-pr83202.c
new file mode 100644 (file)
index 0000000..bdec20f
--- /dev/null
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_double } */
+
+void test(double data[4][2])
+{
+  for (int i = 0; i < 4; i++)
+    {
+      data[i][0] = data[i][0] * data[i][0];
+      data[i][1] = data[i][1] * data[i][1];
+    }
+}
+
+/* We should vectorize this with SLP and V2DF vectors.  */
+/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "vect" } } */
+/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" } } */
index bc81b3d..19f2ac4 100644 (file)
@@ -961,7 +961,8 @@ bst_traits::equal (value_type existing, value_type candidate)
   return true;
 }
 
-static hash_set <vec <gimple *>, bst_traits> *bst_fail;
+typedef hash_set <vec <gimple *>, bst_traits> scalar_stmts_set_t;
+static scalar_stmts_set_t *bst_fail;
 
 static slp_tree
 vect_build_slp_tree_2 (vec_info *vinfo,
@@ -1674,7 +1675,8 @@ static void
 vect_analyze_slp_cost_1 (slp_instance instance, slp_tree node,
                         stmt_vector_for_cost *prologue_cost_vec,
                         stmt_vector_for_cost *body_cost_vec,
-                        unsigned ncopies_for_cost)
+                        unsigned ncopies_for_cost,
+                        scalar_stmts_set_t* visited)
 {
   unsigned i, j;
   slp_tree child;
@@ -1682,11 +1684,18 @@ vect_analyze_slp_cost_1 (slp_instance instance, slp_tree node,
   stmt_vec_info stmt_info;
   tree lhs;
 
+  /* If we already costed the exact same set of scalar stmts we're done.
+     We share the generated vector stmts for those.  */
+  if (visited->contains (SLP_TREE_SCALAR_STMTS (node)))
+    return;
+
+  visited->add (SLP_TREE_SCALAR_STMTS (node).copy ());
+
   /* Recurse down the SLP tree.  */
   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
     if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
       vect_analyze_slp_cost_1 (instance, child, prologue_cost_vec,
-                              body_cost_vec, ncopies_for_cost);
+                              body_cost_vec, ncopies_for_cost, visited);
 
   /* Look at the first scalar stmt to determine the cost.  */
   stmt = SLP_TREE_SCALAR_STMTS (node)[0];
@@ -1871,9 +1880,11 @@ vect_analyze_slp_cost (slp_instance instance, void *data)
 
   prologue_cost_vec.create (10);
   body_cost_vec.create (10);
+  scalar_stmts_set_t *visited = new scalar_stmts_set_t ();
   vect_analyze_slp_cost_1 (instance, SLP_INSTANCE_TREE (instance),
                           &prologue_cost_vec, &body_cost_vec,
-                          ncopies_for_cost);
+                          ncopies_for_cost, visited);
+  delete visited;
 
   /* Record the prologue costs, which were delayed until we were
      sure that SLP was successful.  */
@@ -2037,7 +2048,7 @@ vect_analyze_slp_instance (vec_info *vinfo,
   /* Build the tree for the SLP instance.  */
   bool *matches = XALLOCAVEC (bool, group_size);
   unsigned npermutes = 0;
-  bst_fail = new hash_set <vec <gimple *>, bst_traits> ();
+  bst_fail = new scalar_stmts_set_t ();
   node = vect_build_slp_tree (vinfo, scalar_stmts, group_size,
                                   &max_nunits, &loads, matches, &npermutes,
                              NULL, max_tree_size);
@@ -3702,12 +3713,15 @@ vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
   return true;
 }
 
-
+typedef hash_map <vec <gimple *>, slp_tree,
+                 simple_hashmap_traits <bst_traits, slp_tree> >
+  scalar_stmts_to_slp_tree_map_t;
 
 /* Vectorize SLP instance tree in postorder.  */
 
 static bool
-vect_schedule_slp_instance (slp_tree node, slp_instance instance)
+vect_schedule_slp_instance (slp_tree node, slp_instance instance,
+                           scalar_stmts_to_slp_tree_map_t *bst_map)
 {
   gimple *stmt;
   bool grouped_store, is_store;
@@ -3721,8 +3735,19 @@ vect_schedule_slp_instance (slp_tree node, slp_instance instance)
   if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
     return false;
 
+  /* See if we have already vectorized the same set of stmts and reuse their
+     vectorized stmts.  */
+  slp_tree &leader
+    = bst_map->get_or_insert (SLP_TREE_SCALAR_STMTS (node).copy ());
+  if (leader)
+    {
+      SLP_TREE_VEC_STMTS (node).safe_splice (SLP_TREE_VEC_STMTS (leader));
+      return false;
+    }
+
+  leader = node;
   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
-    vect_schedule_slp_instance (child, instance);
+    vect_schedule_slp_instance (child, instance, bst_map);
 
   /* Push SLP node def-type to stmts.  */
   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
@@ -3894,8 +3919,11 @@ vect_schedule_slp (vec_info *vinfo)
   FOR_EACH_VEC_ELT (slp_instances, i, instance)
     {
       /* Schedule the tree of INSTANCE.  */
+      scalar_stmts_to_slp_tree_map_t *bst_map
+       = new scalar_stmts_to_slp_tree_map_t ();
       is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
-                                             instance);
+                                             instance, bst_map);
+      delete bst_map;
       if (dump_enabled_p ())
        dump_printf_loc (MSG_NOTE, vect_location,
                          "vectorizing stmts using SLP.\n");