re PR tree-optimization/84503 (store-merging miscompilation on powerpc64 with -O3...
authorJakub Jelinek <jakub@redhat.com>
Thu, 22 Feb 2018 08:28:42 +0000 (09:28 +0100)
committerJakub Jelinek <jakub@gcc.gnu.org>
Thu, 22 Feb 2018 08:28:42 +0000 (09:28 +0100)
PR tree-optimization/84503
* gimple-ssa-store-merging.c (merged_store_group::merge_into): Compute
width as info->bitpos + info->bitsize - start.
(merged_store_group::merge_overlapping): Simplify width computation.
(check_no_overlap): New function.
(imm_store_chain_info::try_coalesce_bswap): Compute expected
start + width and last_order of the group, fail if check_no_overlap
fails.
(imm_store_chain_info::coalesce_immediate_stores): Don't merge info
to group if check_no_overlap fails.

* gcc.dg/pr84503-1.c: New test.
* gcc.dg/pr84503-2.c: New test.

From-SVN: r257891

gcc/ChangeLog
gcc/gimple-ssa-store-merging.c
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.dg/pr84503-1.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/pr84503-2.c [new file with mode: 0644]

index 08279e5..914d6cb 100644 (file)
@@ -1,3 +1,16 @@
+2018-02-22  Jakub Jelinek  <jakub@redhat.com>
+
+       PR tree-optimization/84503
+       * gimple-ssa-store-merging.c (merged_store_group::merge_into): Compute
+       width as info->bitpos + info->bitsize - start.
+       (merged_store_group::merge_overlapping): Simplify width computation.
+       (check_no_overlap): New function.
+       (imm_store_chain_info::try_coalesce_bswap): Compute expected
+       start + width and last_order of the group, fail if check_no_overlap
+       fails.
+       (imm_store_chain_info::coalesce_immediate_stores): Don't merge info
+       to group if check_no_overlap fails.
+
 2018-02-21  Segher Boessenkool  <segher@kernel.crashing.org>
 
        * config/rs6000/altivec.md: Delete contraint arguments to
index 92ddfb5..7b56031 100644 (file)
@@ -1891,12 +1891,11 @@ merged_store_group::do_merge (store_immediate_info *info)
 void
 merged_store_group::merge_into (store_immediate_info *info)
 {
-  unsigned HOST_WIDE_INT wid = info->bitsize;
   /* Make sure we're inserting in the position we think we're inserting.  */
   gcc_assert (info->bitpos >= start + width
              && info->bitregion_start <= bitregion_end);
 
-  width += wid;
+  width = info->bitpos + info->bitsize - start;
   do_merge (info);
 }
 
@@ -1909,7 +1908,7 @@ merged_store_group::merge_overlapping (store_immediate_info *info)
 {
   /* If the store extends the size of the group, extend the width.  */
   if (info->bitpos + info->bitsize > start + width)
-    width += info->bitpos + info->bitsize - (start + width);
+    width = info->bitpos + info->bitsize - start;
 
   do_merge (info);
 }
@@ -2304,6 +2303,55 @@ gather_bswap_load_refs (vec<tree> *refs, tree val)
     }
 }
 
+/* Check if there are any stores in M_STORE_INFO after index I
+   (where M_STORE_INFO must be sorted by sort_by_bitpos) that overlap
+   a potential group ending with END that have their order
+   smaller than LAST_ORDER.  RHS_CODE is the kind of store in the
+   group.  Return true if there are no such stores.
+   Consider:
+     MEM[(long long int *)p_28] = 0;
+     MEM[(long long int *)p_28 + 8B] = 0;
+     MEM[(long long int *)p_28 + 16B] = 0;
+     MEM[(long long int *)p_28 + 24B] = 0;
+     _129 = (int) _130;
+     MEM[(int *)p_28 + 8B] = _129;
+     MEM[(int *)p_28].a = -1;
+   We already have
+     MEM[(long long int *)p_28] = 0;
+     MEM[(int *)p_28].a = -1;
+   stmts in the current group and need to consider if it is safe to
+   add MEM[(long long int *)p_28 + 8B] = 0; store into the same group.
+   There is an overlap between that store and the MEM[(int *)p_28 + 8B] = _129;
+   store though, so if we add the MEM[(long long int *)p_28 + 8B] = 0;
+   into the group and merging of those 3 stores is successful, merged
+   stmts will be emitted at the latest store from that group, i.e.
+   LAST_ORDER, which is the MEM[(int *)p_28].a = -1; store.
+   The MEM[(int *)p_28 + 8B] = _129; store that originally follows
+   the MEM[(long long int *)p_28 + 8B] = 0; would now be before it,
+   so we need to refuse merging MEM[(long long int *)p_28 + 8B] = 0;
+   into the group.  That way it will be its own store group and will
+   not be touched.  If RHS_CODE is INTEGER_CST and there are overlapping
+   INTEGER_CST stores, those are mergeable using merge_overlapping,
+   so don't return false for those.  */
+
+static bool
+check_no_overlap (vec<store_immediate_info *> m_store_info, unsigned int i,
+                 enum tree_code rhs_code, unsigned int last_order,
+                 unsigned HOST_WIDE_INT end)
+{
+  unsigned int len = m_store_info.length ();
+  for (++i; i < len; ++i)
+    {
+      store_immediate_info *info = m_store_info[i];
+      if (info->bitpos >= end)
+       break;
+      if (info->order < last_order
+         && (rhs_code != INTEGER_CST || info->rhs_code != INTEGER_CST))
+       return false;
+    }
+  return true;
+}
+
 /* Return true if m_store_info[first] and at least one following store
    form a group which store try_size bitsize value which is byte swapped
    from a memory load or some value, or identity from some value.
@@ -2375,6 +2423,7 @@ imm_store_chain_info::try_coalesce_bswap (merged_store_group *merged_store,
   unsigned int last_order = merged_store->last_order;
   gimple *first_stmt = merged_store->first_stmt;
   gimple *last_stmt = merged_store->last_stmt;
+  unsigned HOST_WIDE_INT end = merged_store->start + merged_store->width;
   store_immediate_info *infof = m_store_info[first];
 
   for (unsigned int i = first; i <= last; ++i)
@@ -2413,25 +2462,23 @@ imm_store_chain_info::try_coalesce_bswap (merged_store_group *merged_store,
        }
       else
        {
-         if (n.base_addr)
+         if (n.base_addr && n.vuse != this_n.vuse)
            {
-             if (n.vuse != this_n.vuse)
-               {
-                 if (vuse_store == 0)
-                   return false;
-                 vuse_store = 1;
-               }
-             if (info->order > last_order)
-               {
-                 last_order = info->order;
-                 last_stmt = info->stmt;
-               }
-             else if (info->order < first_order)
-               {
-                 first_order = info->order;
-                 first_stmt = info->stmt;
-               }
+             if (vuse_store == 0)
+               return false;
+             vuse_store = 1;
            }
+         if (info->order > last_order)
+           {
+             last_order = info->order;
+             last_stmt = info->stmt;
+           }
+         else if (info->order < first_order)
+           {
+             first_order = info->order;
+             first_stmt = info->stmt;
+           }
+         end = MAX (end, info->bitpos + info->bitsize);
 
          ins_stmt = perform_symbolic_merge (ins_stmt, &n, info->ins_stmt,
                                             &this_n, &n);
@@ -2452,6 +2499,9 @@ imm_store_chain_info::try_coalesce_bswap (merged_store_group *merged_store,
   if (n.base_addr == NULL_TREE && !is_gimple_val (n.src))
     return false;
 
+  if (!check_no_overlap (m_store_info, last, LROTATE_EXPR, last_order, end))
+    return false;
+
   /* Don't handle memory copy this way if normal non-bswap processing
      would handle it too.  */
   if (n.n == cmpnop && (unsigned) n.n_ops == last - first + 1)
@@ -2633,7 +2683,13 @@ imm_store_chain_info::coalesce_immediate_stores ()
               : !info->ops[0].base_addr)
              && (infof->ops[1].base_addr
                  ? compatible_load_p (merged_store, info, base_addr, 1)
-                 : !info->ops[1].base_addr))
+                 : !info->ops[1].base_addr)
+             && check_no_overlap (m_store_info, i, info->rhs_code,
+                                  MAX (merged_store->last_order,
+                                       info->order),
+                                  MAX (merged_store->start
+                                       + merged_store->width,
+                                       info->bitpos + info->bitsize)))
            {
              merged_store->merge_into (info);
              continue;
index 073acf5..19b85bb 100644 (file)
@@ -1,3 +1,9 @@
+2018-02-22  Jakub Jelinek  <jakub@redhat.com>
+
+       PR tree-optimization/84503
+       * gcc.dg/pr84503-1.c: New test.
+       * gcc.dg/pr84503-2.c: New test.
+
 2018-02-21  Jakub Jelinek  <jakub@redhat.com>
 
        PR tree-optimization/84478
diff --git a/gcc/testsuite/gcc.dg/pr84503-1.c b/gcc/testsuite/gcc.dg/pr84503-1.c
new file mode 100644 (file)
index 0000000..03fb2fb
--- /dev/null
@@ -0,0 +1,68 @@
+/* PR tree-optimization/84503 */
+/* { dg-do run } */
+/* { dg-options "-O3" } */
+
+typedef __SIZE_TYPE__ size_t;
+typedef __UINTPTR_TYPE__ uintptr_t;
+
+struct S { int a; unsigned short b; int c, d, e; long f, g, h; int i, j; };
+static struct S *k;
+static size_t l = 0;
+int m;
+
+static int
+bar (void)
+{
+  unsigned i;
+  int j;
+  if (k[0].c == 0)
+    {
+      ++m;
+      size_t n = l * 2;
+      struct S *o;
+      o = (struct S *) __builtin_realloc (k, sizeof (struct S) * n);
+      if (!o)
+       __builtin_exit (0);
+      k = o;
+      for (i = l; i < n; i++)
+       {
+         void *p = (void *) &k[i];
+         int q = 0;
+         size_t r = sizeof (struct S);
+         if ((((uintptr_t) p) % __alignof__ (long)) == 0
+             && r % sizeof (long) == 0)
+           {
+             long __attribute__ ((may_alias)) *s = (long *) p;
+             long *t = (long *) ((char *) s + r);
+             while (s < t)
+               *s++ = 0;
+           }
+         else
+           __builtin_memset (p, q, r);
+         k[i].c = i + 1;
+         k[i].a = -1;
+       }
+      k[n - 1].c = 0;
+      k[0].c = l;
+      l = n;
+    }
+  j = k[0].c;
+  k[0].c = k[j].c;
+  return j;
+}
+
+int
+main ()
+{
+  k = (struct S *) __builtin_malloc (sizeof (struct S));
+  if (!k)
+    __builtin_exit (0);
+  __builtin_memset (k, '\0', sizeof (struct S));
+  k->a = -1;
+  l = 1;
+  for (int i = 0; i < 15; ++i)
+    bar ();
+  if (m != 4)
+    __builtin_abort ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/pr84503-2.c b/gcc/testsuite/gcc.dg/pr84503-2.c
new file mode 100644 (file)
index 0000000..76701f0
--- /dev/null
@@ -0,0 +1,5 @@
+/* PR tree-optimization/84503 */
+/* { dg-do run } */
+/* { dg-options "-O3 -fno-tree-vectorize -fno-ivopts" } */
+
+#include "pr84503-1.c"