From 2801f23fb82a5ef51c8b460a500786797943e1e9 Mon Sep 17 00:00:00 2001
From: Jakub Jelinek <jakub@redhat.com>
Date: Tue, 15 Feb 2022 12:11:31 +0100
Subject: [PATCH] fold, simplify-rtx: Punt on non-representable floating point
 constants [PR104522]

For IBM double double I've added in PR95450 and PR99648 verification that
when we at the tree/GIMPLE or RTL level interpret target bytes as a REAL_CST
or CONST_DOUBLE constant, we try to encode it back to target bytes and
verify it is the same.
This is because our real.c support isn't able to represent all valid values
of IBM double double which has variable precision.
In PR104522, it has been noted that we have similar problem with the
Intel/Motorola extended XFmode formats, our internal representation isn't
able to record pseudo denormals, pseudo infinities, pseudo NaNs and unnormal
values.
So, the following patch is an attempt to extend that verification to all
floats.
Unfortunately, it wasn't that straightforward, because the
__builtin_clear_padding code exactly for the XFmode long doubles needs to
discover what bits are padding and does that by interpreting memory of
all 1s.  That is actually a valid supported value, a qNaN with negative
sign with all mantissa bits set, but the verification includes also the
padding bits (exactly what __builtin_clear_padding wants to figure out)
and so fails the comparison check and so we ICE.
The patch fixes that case by moving that verification from
native_interpret_real to its caller, so that clear_padding_type can
call native_interpret_real and avoid that extra check.

With this, the only thing that regresses in the testsuite is
+FAIL: gcc.target/i386/auto-init-4.c scan-assembler-times long\\t-16843010 5
because it decides to use a pattern that has non-zero bits in the padding
bits of the long double, so the simplify-rtx.cc change prevents folding
a SUBREG into a constant.  We emit (the testcase is -O0 but we emit worse
code at all opt levels) something like:
        movabsq $-72340172838076674, %rax
        movabsq $-72340172838076674, %rdx
        movq    %rax, -48(%rbp)
        movq    %rdx, -40(%rbp)
        fldt    -48(%rbp)
        fstpt   -32(%rbp)
instead of
        fldt    .LC2(%rip)
        fstpt   -32(%rbp)
...
.LC2:
        .long   -16843010
        .long   -16843010
        .long   65278
        .long   0
Note, neither of those sequences actually stores the padding bits, fstpt
simply doesn't touch them.
For vars with clear_padding_real_needs_padding_p types that are allocated
to memory at expansion time, I'd say much better would be to do the stores
using integral modes rather than XFmode, so do that:
        movabsq $-72340172838076674, %rax
        movq    %rax, -32(%rbp)
        movq    %rax, -24(%rbp)
directly.  That is the only way to ensure the padding bits are initialized
(or expand __builtin_clear_padding, but then you initialize separately the
value bits and padding bits).

2022-02-15  Jakub Jelinek  <jakub@redhat.com>

	PR middle-end/104522
	* fold-const.h (native_interpret_real): Declare.
	* fold-const.cc (native_interpret_real): No longer static.  Don't
	perform MODE_COMPOSITE_P verification here.
	(native_interpret_expr) <case REAL_TYPE>: But perform it here instead
	for all modes.
	* gimple-fold.cc (clear_padding_type): Call native_interpret_real
	instead of native_interpret_expr.
	* simplify-rtx.cc (simplify_immed_subreg): Perform the native_encode_rtx
	and comparison verification for all FLOAT_MODE_P modes, not just
	MODE_COMPOSITE_P.

	* gcc.dg/pr104522.c: New test.
---
 gcc/fold-const.cc               | 34 +++++++++++++++++++---------------
 gcc/fold-const.h                |  1 +
 gcc/gimple-fold.cc              |  4 ++--
 gcc/simplify-rtx.cc             |  2 +-
 gcc/testsuite/gcc.dg/pr104522.c | 14 ++++++++++++++
 5 files changed, 37 insertions(+), 18 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/pr104522.c
diff --git a/gcc/fold-const.cc b/gcc/fold-const.cc
index 9d99396..7b21240 100644
--- a/gcc/fold-const.cc
+++ b/gcc/fold-const.cc
@@ -8643,7 +8643,7 @@ native_interpret_fixed (tree type, const unsigned char *ptr, int len)
    the buffer PTR of length LEN as a REAL_CST of type TYPE.
    If the buffer cannot be interpreted, return NULL_TREE.  */
 
-static tree
+tree
 native_interpret_real (tree type, const unsigned char *ptr, int len)
 {
   scalar_float_mode mode = SCALAR_FLOAT_TYPE_MODE (type);
@@ -8694,19 +8694,7 @@ native_interpret_real (tree type, const unsigned char *ptr, int len)
     }
 
   real_from_target (&r, tmp, mode);
-  tree ret = build_real (type, r);
-  if (MODE_COMPOSITE_P (mode))
-    {
-      /* For floating point values in composite modes, punt if this folding
-	 doesn't preserve bit representation.  As the mode doesn't have fixed
-	 precision while GCC pretends it does, there could be valid values that
-	 GCC can't really represent accurately.  See PR95450.  */
-      unsigned char buf[24];
-      if (native_encode_expr (ret, buf, total_bytes, 0) != total_bytes
-	  || memcmp (ptr, buf, total_bytes) != 0)
-	ret = NULL_TREE;
-    }
-  return ret;
+  return build_real (type, r);
 }
 
 
@@ -8824,7 +8812,23 @@ native_interpret_expr (tree type, const unsigned char *ptr, int len)
       return native_interpret_int (type, ptr, len);
 
     case REAL_TYPE:
-      return native_interpret_real (type, ptr, len);
+      if (tree ret = native_interpret_real (type, ptr, len))
+	{
+	  /* For floating point values in composite modes, punt if this
+	     folding doesn't preserve bit representation.  As the mode doesn't
+	     have fixed precision while GCC pretends it does, there could be
+	     valid values that GCC can't really represent accurately.
+	     See PR95450.  Even for other modes, e.g. x86 XFmode can have some
+	     bit combinationations which GCC doesn't preserve.  */
+	  unsigned char buf[24];
+	  scalar_float_mode mode = SCALAR_FLOAT_TYPE_MODE (type);
+	  int total_bytes = GET_MODE_SIZE (mode);
+	  if (native_encode_expr (ret, buf, total_bytes, 0) != total_bytes
+	      || memcmp (ptr, buf, total_bytes) != 0)
+	    return NULL_TREE;
+	  return ret;
+	}
+      return NULL_TREE;
 
     case FIXED_POINT_TYPE:
       return native_interpret_fixed (type, ptr, len);
diff --git a/gcc/fold-const.h b/gcc/fold-const.h
index f217598..926c775 100644
--- a/gcc/fold-const.h
+++ b/gcc/fold-const.h
@@ -36,6 +36,7 @@ extern int native_encode_expr (const_tree, unsigned char *, int, int off = -1);
 extern int native_encode_initializer (tree, unsigned char *, int,
 				      int off = -1, unsigned char * = nullptr);
 extern tree native_interpret_expr (tree, const unsigned char *, int);
+extern tree native_interpret_real (tree, const unsigned char *, int);
 extern bool can_native_interpret_type_p (tree);
 extern tree native_interpret_aggregate (tree, const unsigned char *, int, int);
 extern tree find_bitfield_repr_type (int, int);
diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc
index d9b1a44..16f02c2 100644
--- a/gcc/gimple-fold.cc
+++ b/gcc/gimple-fold.cc
@@ -4807,10 +4807,10 @@ clear_padding_type (clear_padding_struct *buf, tree type,
 	clear_padding_flush (buf, false);
       if (clear_padding_real_needs_padding_p (type))
 	{
-	  /* Use native_interpret_expr + native_encode_expr to figure out
+	  /* Use native_interpret_real + native_encode_expr to figure out
 	     which bits are padding.  */
 	  memset (buf->buf + buf->size, ~0, sz);
-	  tree cst = native_interpret_expr (type, buf->buf + buf->size, sz);
+	  tree cst = native_interpret_real (type, buf->buf + buf->size, sz);
 	  gcc_assert (cst && TREE_CODE (cst) == REAL_CST);
 	  int len = native_encode_expr (cst, buf->buf + buf->size, sz);
 	  gcc_assert (len > 0 && (size_t) len == (size_t) sz);
diff --git a/gcc/simplify-rtx.cc b/gcc/simplify-rtx.cc
index bd176e8..85ad990 100644
--- a/gcc/simplify-rtx.cc
+++ b/gcc/simplify-rtx.cc
@@ -7302,7 +7302,7 @@ simplify_immed_subreg (fixed_size_mode outermode, rtx x,
   else if (!native_encode_rtx (innermode, x, buffer, first_byte, inner_bytes))
     return NULL_RTX;
   rtx ret = native_decode_rtx (outermode, buffer, 0);
-  if (ret && MODE_COMPOSITE_P (outermode))
+  if (ret && FLOAT_MODE_P (outermode))
     {
       auto_vec<target_unit, 128> buffer2 (buffer_bytes);
       if (!native_encode_rtx (outermode, ret, buffer2, 0, buffer_bytes))
diff --git a/gcc/testsuite/gcc.dg/pr104522.c b/gcc/testsuite/gcc.dg/pr104522.c
new file mode 100644
index 0000000..4d1d630
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr104522.c
@@ -0,0 +1,14 @@
+/* PR middle-end/104522 */
+/* { dg-do compile } */
+/* { dg-options "-O -fcompare-debug -dP" } */
+
+typedef short __attribute__((__vector_size__(16))) V;
+long double x;
+
+void
+foo (void)
+{
+  V t = { 512, 0, 0, 0, 16384 };
+  long double u = *(long double *) &t;
+  x /= u;
+}
-- 
2.7.4