sccvn: Handle bitfields in vn_reference_lookup_3 [PR93582]
authorJakub Jelinek <jakub@redhat.com>
Thu, 13 Feb 2020 09:04:11 +0000 (10:04 +0100)
committerJakub Jelinek <jakub@redhat.com>
Thu, 13 Feb 2020 09:04:11 +0000 (10:04 +0100)
The following patch is first step towards fixing PR93582.
vn_reference_lookup_3 right now punts on anything that isn't byte aligned,
so to be able to lookup a constant bitfield store, one needs to use
the exact same COMPONENT_REF, otherwise it isn't found.

This patch lifts up that that restriction if the bits to be loaded are
covered by a single store of a constant (keeps the restriction so far
for the multiple store case, can tweak that incrementally, but I think
for bisection etc. it is worth to do it one step at a time).

2020-02-13  Jakub Jelinek  <jakub@redhat.com>

PR tree-optimization/93582
* fold-const.h (shift_bytes_in_array_left,
shift_bytes_in_array_right): Declare.
* fold-const.c (shift_bytes_in_array_left,
shift_bytes_in_array_right): New function, moved from
gimple-ssa-store-merging.c, no longer static.
* gimple-ssa-store-merging.c (shift_bytes_in_array): Move
to gimple-ssa-store-merging.c and rename to shift_bytes_in_array_left.
(shift_bytes_in_array_right): Move to gimple-ssa-store-merging.c.
(encode_tree_to_bitpos): Use shift_bytes_in_array_left instead of
shift_bytes_in_array.
(verify_shift_bytes_in_array): Rename to ...
(verify_shift_bytes_in_array_left): ... this.  Use
shift_bytes_in_array_left instead of shift_bytes_in_array.
(store_merging_c_tests): Call verify_shift_bytes_in_array_left
instead of verify_shift_bytes_in_array.
* tree-ssa-sccvn.c (vn_reference_lookup_3): For native_encode_expr
/ native_interpret_expr where the store covers all needed bits,
punt on PDP-endian, otherwise allow all involved offsets and sizes
not to be byte-aligned.

* gcc.dg/tree-ssa/pr93582-1.c: New test.
* gcc.dg/tree-ssa/pr93582-2.c: New test.
* gcc.dg/tree-ssa/pr93582-3.c: New test.

gcc/ChangeLog
gcc/fold-const.c
gcc/fold-const.h
gcc/gimple-ssa-store-merging.c
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.dg/tree-ssa/pr93582-1.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/tree-ssa/pr93582-2.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/tree-ssa/pr93582-3.c [new file with mode: 0644]
gcc/tree-ssa-sccvn.c

index 252e820..36a8956 100644 (file)
@@ -1,5 +1,26 @@
 2020-02-13  Jakub Jelinek  <jakub@redhat.com>
 
+       PR tree-optimization/93582
+       * fold-const.h (shift_bytes_in_array_left,
+       shift_bytes_in_array_right): Declare.
+       * fold-const.c (shift_bytes_in_array_left,
+       shift_bytes_in_array_right): New function, moved from
+       gimple-ssa-store-merging.c, no longer static.
+       * gimple-ssa-store-merging.c (shift_bytes_in_array): Move
+       to gimple-ssa-store-merging.c and rename to shift_bytes_in_array_left.
+       (shift_bytes_in_array_right): Move to gimple-ssa-store-merging.c.
+       (encode_tree_to_bitpos): Use shift_bytes_in_array_left instead of
+       shift_bytes_in_array.
+       (verify_shift_bytes_in_array): Rename to ...
+       (verify_shift_bytes_in_array_left): ... this.  Use
+       shift_bytes_in_array_left instead of shift_bytes_in_array.
+       (store_merging_c_tests): Call verify_shift_bytes_in_array_left
+       instead of verify_shift_bytes_in_array.
+       * tree-ssa-sccvn.c (vn_reference_lookup_3): For native_encode_expr
+       / native_interpret_expr where the store covers all needed bits,
+       punt on PDP-endian, otherwise allow all involved offsets and sizes
+       not to be byte-aligned.
+
        PR target/93673
        * config/i386/sse.md (k<code><mode>): Drop mode from last operand and
        use const_0_to_255_operand predicate instead of immediate_operand.
index aefa916..71a1d3e 100644 (file)
@@ -8354,6 +8354,70 @@ can_native_interpret_type_p (tree type)
     }
 }
 
+/* Routines for manipulation of native_encode_expr encoded data if the encoded
+   or extracted constant positions and/or sizes aren't byte aligned.  */
+
+/* Shift left the bytes in PTR of SZ elements by AMNT bits, carrying over the
+   bits between adjacent elements.  AMNT should be within
+   [0, BITS_PER_UNIT).
+   Example, AMNT = 2:
+   00011111|11100000 << 2 = 01111111|10000000
+   PTR[1]  | PTR[0]         PTR[1]  | PTR[0].  */
+
+void
+shift_bytes_in_array_left (unsigned char *ptr, unsigned int sz,
+                          unsigned int amnt)
+{
+  if (amnt == 0)
+    return;
+
+  unsigned char carry_over = 0U;
+  unsigned char carry_mask = (~0U) << (unsigned char) (BITS_PER_UNIT - amnt);
+  unsigned char clear_mask = (~0U) << amnt;
+
+  for (unsigned int i = 0; i < sz; i++)
+    {
+      unsigned prev_carry_over = carry_over;
+      carry_over = (ptr[i] & carry_mask) >> (BITS_PER_UNIT - amnt);
+
+      ptr[i] <<= amnt;
+      if (i != 0)
+       {
+         ptr[i] &= clear_mask;
+         ptr[i] |= prev_carry_over;
+       }
+    }
+}
+
+/* Like shift_bytes_in_array_left but for big-endian.
+   Shift right the bytes in PTR of SZ elements by AMNT bits, carrying over the
+   bits between adjacent elements.  AMNT should be within
+   [0, BITS_PER_UNIT).
+   Example, AMNT = 2:
+   00011111|11100000 >> 2 = 00000111|11111000
+   PTR[0]  | PTR[1]         PTR[0]  | PTR[1].  */
+
+void
+shift_bytes_in_array_right (unsigned char *ptr, unsigned int sz,
+                           unsigned int amnt)
+{
+  if (amnt == 0)
+    return;
+
+  unsigned char carry_over = 0U;
+  unsigned char carry_mask = ~(~0U << amnt);
+
+  for (unsigned int i = 0; i < sz; i++)
+    {
+      unsigned prev_carry_over = carry_over;
+      carry_over = ptr[i] & carry_mask;
+
+      carry_over <<= (unsigned char) BITS_PER_UNIT - amnt;
+      ptr[i] >>= amnt;
+      ptr[i] |= prev_carry_over;
+    }
+}
+
 /* Try to view-convert VECTOR_CST EXPR to VECTOR_TYPE TYPE by operating
    directly on the VECTOR_CST encoding, in a way that works for variable-
    length vectors.  Return the resulting VECTOR_CST on success or null
index 7ac792f..0f788a4 100644 (file)
@@ -30,6 +30,10 @@ extern int native_encode_initializer (tree, unsigned char *, int,
                                      int off = -1);
 extern tree native_interpret_expr (tree, const unsigned char *, int);
 extern bool can_native_interpret_type_p (tree);
+extern void shift_bytes_in_array_left (unsigned char *, unsigned int,
+                                      unsigned int);
+extern void shift_bytes_in_array_right (unsigned char *, unsigned int,
+                                       unsigned int);
 
 /* Fold constants as much as possible in an expression.
    Returns the simplified expression.
index 8371323..4bcafef 100644 (file)
@@ -1475,66 +1475,6 @@ dump_char_array (FILE *fd, unsigned char *ptr, unsigned int len)
   fprintf (fd, "\n");
 }
 
-/* Shift left the bytes in PTR of SZ elements by AMNT bits, carrying over the
-   bits between adjacent elements.  AMNT should be within
-   [0, BITS_PER_UNIT).
-   Example, AMNT = 2:
-   00011111|11100000 << 2 = 01111111|10000000
-   PTR[1]  | PTR[0]         PTR[1]  | PTR[0].  */
-
-static void
-shift_bytes_in_array (unsigned char *ptr, unsigned int sz, unsigned int amnt)
-{
-  if (amnt == 0)
-    return;
-
-  unsigned char carry_over = 0U;
-  unsigned char carry_mask = (~0U) << (unsigned char) (BITS_PER_UNIT - amnt);
-  unsigned char clear_mask = (~0U) << amnt;
-
-  for (unsigned int i = 0; i < sz; i++)
-    {
-      unsigned prev_carry_over = carry_over;
-      carry_over = (ptr[i] & carry_mask) >> (BITS_PER_UNIT - amnt);
-
-      ptr[i] <<= amnt;
-      if (i != 0)
-       {
-         ptr[i] &= clear_mask;
-         ptr[i] |= prev_carry_over;
-       }
-    }
-}
-
-/* Like shift_bytes_in_array but for big-endian.
-   Shift right the bytes in PTR of SZ elements by AMNT bits, carrying over the
-   bits between adjacent elements.  AMNT should be within
-   [0, BITS_PER_UNIT).
-   Example, AMNT = 2:
-   00011111|11100000 >> 2 = 00000111|11111000
-   PTR[0]  | PTR[1]         PTR[0]  | PTR[1].  */
-
-static void
-shift_bytes_in_array_right (unsigned char *ptr, unsigned int sz,
-                           unsigned int amnt)
-{
-  if (amnt == 0)
-    return;
-
-  unsigned char carry_over = 0U;
-  unsigned char carry_mask = ~(~0U << amnt);
-
-  for (unsigned int i = 0; i < sz; i++)
-    {
-      unsigned prev_carry_over = carry_over;
-      carry_over = ptr[i] & carry_mask;
-
-      carry_over <<= (unsigned char) BITS_PER_UNIT - amnt;
-      ptr[i] >>= amnt;
-      ptr[i] |= prev_carry_over;
-    }
-}
-
 /* Clear out LEN bits starting from bit START in the byte array
    PTR.  This clears the bits to the *right* from START.
    START must be within [0, BITS_PER_UNIT) and counts starting from
@@ -1793,7 +1733,7 @@ encode_tree_to_bitpos (tree expr, unsigned char *ptr, int bitlen, int bitpos,
   /* Create the shifted version of EXPR.  */
   if (!BYTES_BIG_ENDIAN)
     {
-      shift_bytes_in_array (tmpbuf, byte_size, shift_amnt);
+      shift_bytes_in_array_left (tmpbuf, byte_size, shift_amnt);
       if (shift_amnt == 0)
        byte_size--;
     }
@@ -5092,11 +5032,11 @@ verify_array_eq (unsigned char *x, unsigned char *y, unsigned int n)
     }
 }
 
-/* Test shift_bytes_in_array and that it carries bits across between
+/* Test shift_bytes_in_array_left and that it carries bits across between
    bytes correctly.  */
 
 static void
-verify_shift_bytes_in_array (void)
+verify_shift_bytes_in_array_left (void)
 {
    /* byte 1   | byte 0
       00011111 | 11100000.  */
@@ -5105,13 +5045,13 @@ verify_shift_bytes_in_array (void)
   memcpy (in, orig, sizeof orig);
 
   unsigned char expected[2] = { 0x80, 0x7f };
-  shift_bytes_in_array (in, sizeof (in), 2);
+  shift_bytes_in_array_left (in, sizeof (in), 2);
   verify_array_eq (in, expected, sizeof (in));
 
   memcpy (in, orig, sizeof orig);
   memcpy (expected, orig, sizeof orig);
   /* Check that shifting by zero doesn't change anything.  */
-  shift_bytes_in_array (in, sizeof (in), 0);
+  shift_bytes_in_array_left (in, sizeof (in), 0);
   verify_array_eq (in, expected, sizeof (in));
 
 }
@@ -5196,7 +5136,7 @@ verify_clear_bit_region_be (void)
 void
 store_merging_c_tests (void)
 {
-  verify_shift_bytes_in_array ();
+  verify_shift_bytes_in_array_left ();
   verify_shift_bytes_in_array_right ();
   verify_clear_bit_region ();
   verify_clear_bit_region_be ();
index 63d3ff5..2541353 100644 (file)
@@ -1,3 +1,10 @@
+2020-02-13  Jakub Jelinek  <jakub@redhat.com>
+
+       PR tree-optimization/93582
+       * gcc.dg/tree-ssa/pr93582-1.c: New test.
+       * gcc.dg/tree-ssa/pr93582-2.c: New test.
+       * gcc.dg/tree-ssa/pr93582-3.c: New test.
+
 2020-02-13  Richard Biener  <rguenther@suse.de>
 
        PR testsuite/93717
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr93582-1.c b/gcc/testsuite/gcc.dg/tree-ssa/pr93582-1.c
new file mode 100644 (file)
index 0000000..a9f0e34
--- /dev/null
@@ -0,0 +1,17 @@
+/* PR tree-optimization/93582 */
+/* { dg-do compile { target int32 } } */
+/* { dg-options "-O2 -fdump-tree-fre1" } */
+/* { dg-final { scan-tree-dump "return 1;" "fre1" } } */
+
+union U {
+  struct S { int a : 1, b : 4, c : 27; } s;
+  struct T { int d : 2; int e : 2; int f : 28; } t;
+};
+
+int
+foo (void)
+{
+  union U u;
+  u.s.b = 10;
+  return u.t.e;
+}
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr93582-2.c b/gcc/testsuite/gcc.dg/tree-ssa/pr93582-2.c
new file mode 100644 (file)
index 0000000..76d03ec
--- /dev/null
@@ -0,0 +1,17 @@
+/* PR tree-optimization/93582 */
+/* { dg-do compile { target int32 } } */
+/* { dg-options "-O2 -fdump-tree-fre1" } */
+/* { dg-final { scan-tree-dump "return 593;" "fre1" } } */
+
+union U {
+  struct S { int a : 1, b : 14, c : 17; } s;
+  struct T { int d : 2; int e : 12; int f : 18; } t;
+};
+
+int
+foo (void)
+{
+  union U u;
+  u.s.b = -7005;
+  return u.t.e;
+}
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr93582-3.c b/gcc/testsuite/gcc.dg/tree-ssa/pr93582-3.c
new file mode 100644 (file)
index 0000000..84f54fc
--- /dev/null
@@ -0,0 +1,18 @@
+/* PR tree-optimization/93582 */
+/* { dg-do compile { target int32 } } */
+/* { dg-options "-O2 -fdump-tree-fre1" } */
+/* { dg-final { scan-tree-dump "return 1;" "fre1" { target be } } } */
+/* { dg-final { scan-tree-dump "return 2;" "fre1" { target le } } } */
+
+union U {
+  struct S { int a : 1, b : 14, c : 17; } s;
+  struct T { int d : 10; int e : 4; int f : 18; } t;
+};
+
+int
+foo (void)
+{
+  union U u;
+  u.s.b = -7005;
+  return u.t.e;
+}
index 33cd12b..30518ab 100644 (file)
@@ -2586,13 +2586,13 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *data_,
           && is_gimple_reg_type (vr->type)
           && !contains_storage_order_barrier_p (vr->operands)
           && gimple_assign_single_p (def_stmt)
-          && CHAR_BIT == 8 && BITS_PER_UNIT == 8
+          && CHAR_BIT == 8
+          && BITS_PER_UNIT == 8
+          && BYTES_BIG_ENDIAN == WORDS_BIG_ENDIAN
           /* native_encode and native_decode operate on arrays of bytes
              and so fundamentally need a compile-time size and offset.  */
           && maxsize.is_constant (&maxsizei)
-          && maxsizei % BITS_PER_UNIT == 0
           && offset.is_constant (&offseti)
-          && offseti % BITS_PER_UNIT == 0
           && (is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt))
               || (TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME
                   && is_gimple_min_invariant (SSA_VAL (gimple_assign_rhs1 (def_stmt))))))
@@ -2617,8 +2617,6 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *data_,
          && !reverse
          && !storage_order_barrier_p (lhs)
          && known_eq (maxsize2, size2)
-         && multiple_p (size2, BITS_PER_UNIT)
-         && multiple_p (offset2, BITS_PER_UNIT)
          && adjust_offsets_for_equal_base_address (base, &offset,
                                                    base2, &offset2)
          && offset.is_constant (&offseti)
@@ -2629,37 +2627,80 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *data_,
              && known_subrange_p (offseti, maxsizei, offset2, size2))
            {
              /* We support up to 512-bit values (for V8DFmode).  */
-             unsigned char buffer[64];
+             unsigned char buffer[65];
              int len;
 
              tree rhs = gimple_assign_rhs1 (def_stmt);
              if (TREE_CODE (rhs) == SSA_NAME)
                rhs = SSA_VAL (rhs);
-             unsigned pad = 0;
-             if (BYTES_BIG_ENDIAN
-                 && is_a <scalar_mode> (TYPE_MODE (TREE_TYPE (rhs))))
-               {
-                 /* On big-endian the padding is at the 'front' so
-                    just skip the initial bytes.  */
-                 fixed_size_mode mode
-                   = as_a <fixed_size_mode> (TYPE_MODE (TREE_TYPE (rhs)));
-                 pad = GET_MODE_SIZE (mode) - size2i / BITS_PER_UNIT;
-               }
              len = native_encode_expr (rhs,
-                                       buffer, sizeof (buffer),
-                                       ((offseti - offset2i) / BITS_PER_UNIT
-                                        + pad));
+                                       buffer, sizeof (buffer) - 1,
+                                       (offseti - offset2i) / BITS_PER_UNIT);
              if (len > 0 && len * BITS_PER_UNIT >= maxsizei)
                {
                  tree type = vr->type;
+                 unsigned char *buf = buffer;
+                 unsigned int amnt = 0;
                  /* Make sure to interpret in a type that has a range
                     covering the whole access size.  */
                  if (INTEGRAL_TYPE_P (vr->type)
                      && maxsizei != TYPE_PRECISION (vr->type))
                    type = build_nonstandard_integer_type (maxsizei,
                                                           TYPE_UNSIGNED (type));
-                 tree val = native_interpret_expr (type, buffer,
-                                                   maxsizei / BITS_PER_UNIT);
+                 if (BYTES_BIG_ENDIAN)
+                   {
+                     /* For big-endian native_encode_expr stored the rhs
+                        such that the LSB of it is the LSB of buffer[len - 1].
+                        That bit is stored into memory at position
+                        offset2 + size2 - 1, i.e. in byte
+                        base + (offset2 + size2 - 1) / BITS_PER_UNIT.
+                        E.g. for offset2 1 and size2 14, rhs -1 and memory
+                        previously cleared that is:
+                        0        1
+                        01111111|11111110
+                        Now, if we want to extract offset 2 and size 12 from
+                        it using native_interpret_expr (which actually works
+                        for integral bitfield types in terms of byte size of
+                        the mode), the native_encode_expr stored the value
+                        into buffer as
+                        XX111111|11111111
+                        and returned len 2 (the X bits are outside of
+                        precision).
+                        Let sz be maxsize / BITS_PER_UNIT if not extracting
+                        a bitfield, and GET_MODE_SIZE otherwise.
+                        We need to align the LSB of the value we want to
+                        extract as the LSB of buf[sz - 1].
+                        The LSB from memory we need to read is at position
+                        offset + maxsize - 1.  */
+                     HOST_WIDE_INT sz = maxsizei / BITS_PER_UNIT;
+                     if (INTEGRAL_TYPE_P (type))
+                       sz = GET_MODE_SIZE (SCALAR_INT_TYPE_MODE (type));
+                     amnt = ((unsigned HOST_WIDE_INT) offset2i + size2i
+                             - offseti - maxsizei) % BITS_PER_UNIT;
+                     if (amnt)
+                       shift_bytes_in_array_right (buffer, len, amnt);
+                     amnt = ((unsigned HOST_WIDE_INT) offset2i + size2i
+                             - offseti - maxsizei - amnt) / BITS_PER_UNIT;
+                     if ((unsigned HOST_WIDE_INT) sz + amnt > (unsigned) len)
+                       len = 0;
+                     else
+                       {
+                         buf = buffer + len - sz - amnt;
+                         len -= (buf - buffer);
+                       }
+                   }
+                 else
+                   {
+                     amnt = ((unsigned HOST_WIDE_INT) offset2i
+                             - offseti) % BITS_PER_UNIT;
+                     if (amnt)
+                       {
+                         buffer[len] = 0;
+                         shift_bytes_in_array_left (buffer, len + 1, amnt);
+                         buf = buffer + 1;
+                       }
+                   }
+                 tree val = native_interpret_expr (type, buf, len);
                  /* If we chop off bits because the types precision doesn't
                     match the memory access size this is ok when optimizing
                     reads but not when called from the DSE code during
@@ -2677,7 +2718,12 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *data_,
                    return data->finish (get_alias_set (lhs), val);
                }
            }
-         else if (ranges_known_overlap_p (offseti, maxsizei, offset2i, size2i))
+         else if (ranges_known_overlap_p (offseti, maxsizei, offset2i,
+                                          size2i)
+                  && maxsizei % BITS_PER_UNIT == 0
+                  && offseti % BITS_PER_UNIT == 0
+                  && size2i % BITS_PER_UNIT == 0
+                  && offset2i % BITS_PER_UNIT == 0)
            {
              pd_data pd;
              tree rhs = gimple_assign_rhs1 (def_stmt);