Practical improvement to libgcc complex divide

author Patrick McGehearty <patrick.mcgehearty@oracle.com>

Wed, 28 Apr 2021 19:14:48 +0000 (19:14 +0000)

committer Jose E. Marchesi <jose.marchesi@oracle.com>

Wed, 28 Apr 2021 19:54:44 +0000 (21:54 +0200)
author Patrick McGehearty <patrick.mcgehearty@oracle.com>
Wed, 28 Apr 2021 19:14:48 +0000 (19:14 +0000)
committer Jose E. Marchesi <jose.marchesi@oracle.com>
Wed, 28 Apr 2021 19:54:44 +0000 (21:54 +0200)
diff --git a/gcc/c-family/c-cppbuiltin.c b/gcc/c-family/c-cppbuiltin.c

index 9f993c4..42b7604 100644 (file)
--- a/gcc/c-family/c-cppbuiltin.c
+++ b/gcc/c-family/c-cppbuiltin.c
@@ -1277,29 +1277,39 @@ c_cpp_builtins (cpp_reader *pfile)
         {
           scalar_float_mode mode = mode_iter.require ();
           const char *name = GET_MODE_NAME (mode);
+         const size_t name_len = strlen (name);
+         char float_h_prefix[16] = "";
           char *macro_name
-           = (char *) alloca (strlen (name)
-                              + sizeof ("__LIBGCC__MANT_DIG__"));
+           = XALLOCAVEC (char, name_len + sizeof ("__LIBGCC__MANT_DIG__"));
           sprintf (macro_name, "__LIBGCC_%s_MANT_DIG__", name);
           builtin_define_with_int_value (macro_name,
                                          REAL_MODE_FORMAT (mode)->p);
           if (!targetm.scalar_mode_supported_p (mode)
               || !targetm.libgcc_floating_mode_supported_p (mode))
             continue;
-         macro_name = (char *) alloca (strlen (name)
-                                       + sizeof ("__LIBGCC_HAS__MODE__"));
+         macro_name = XALLOCAVEC (char, name_len
+                                  + sizeof ("__LIBGCC_HAS__MODE__"));
           sprintf (macro_name, "__LIBGCC_HAS_%s_MODE__", name);
           cpp_define (pfile, macro_name);
-         macro_name = (char *) alloca (strlen (name)
-                                       + sizeof ("__LIBGCC__FUNC_EXT__"));
+         macro_name = XALLOCAVEC (char, name_len
+                                  + sizeof ("__LIBGCC__FUNC_EXT__"));
           sprintf (macro_name, "__LIBGCC_%s_FUNC_EXT__", name);
           char suffix[20] = "";
           if (mode == TYPE_MODE (double_type_node))
-           ; /* Empty suffix correct.  */
+           {
+             /* Empty suffix correct.  */
+             memcpy (float_h_prefix, "DBL", 4);
+           }
           else if (mode == TYPE_MODE (float_type_node))
-           suffix[0] = 'f';
+           {
+             suffix[0] = 'f';
+             memcpy (float_h_prefix, "FLT", 4);
+           }
           else if (mode == TYPE_MODE (long_double_type_node))
-           suffix[0] = 'l';
+           {
+             suffix[0] = 'l';
+             memcpy (float_h_prefix, "LDBL", 5);
+           }
           else
             {
               bool found_suffix = false;
@@ -1310,6 +1320,8 @@ c_cpp_builtins (cpp_reader *pfile)
                     sprintf (suffix, "f%d%s", floatn_nx_types[i].n,
                              floatn_nx_types[i].extended ? "x" : "");
                     found_suffix = true;
+                   sprintf (float_h_prefix, "FLT%d%s", floatn_nx_types[i].n,
+                            floatn_nx_types[i].extended ? "X" : "");
                     break;
                   }
               gcc_assert (found_suffix);
@@ -1347,11 +1359,33 @@ c_cpp_builtins (cpp_reader *pfile)
             default:
               gcc_unreachable ();
             }
-         macro_name = (char *) alloca (strlen (name)
-                                       + sizeof ("__LIBGCC__EXCESS_"
-                                                 "PRECISION__"));
+         macro_name = XALLOCAVEC (char, name_len
+                                  + sizeof ("__LIBGCC__EXCESS_PRECISION__"));
           sprintf (macro_name, "__LIBGCC_%s_EXCESS_PRECISION__", name);
           builtin_define_with_int_value (macro_name, excess_precision);
+
+         char val_name[64];
+
+         macro_name = XALLOCAVEC (char, name_len
+                                  + sizeof ("__LIBGCC__EPSILON__"));
+         sprintf (macro_name, "__LIBGCC_%s_EPSILON__", name);
+         sprintf (val_name, "__%s_EPSILON__", float_h_prefix);
+         builtin_define_with_value (macro_name, val_name, 0);
+
+         macro_name = XALLOCAVEC (char, name_len + sizeof ("__LIBGCC__MAX__"));
+         sprintf (macro_name, "__LIBGCC_%s_MAX__", name);
+         sprintf (val_name, "__%s_MAX__", float_h_prefix);
+         builtin_define_with_value (macro_name, val_name, 0);
+
+         macro_name = XALLOCAVEC (char, name_len + sizeof ("__LIBGCC__MIN__"));
+         sprintf (macro_name, "__LIBGCC_%s_MIN__", name);
+         sprintf (val_name, "__%s_MIN__", float_h_prefix);
+         builtin_define_with_value (macro_name, val_name, 0);
+
+#ifdef HAVE_adddf3
+         builtin_define_with_int_value ("__LIBGCC_HAVE_HWDBL__",
+                                        HAVE_adddf3);
+#endif
         }
  
        /* For libgcc crtstuff.c and libgcc2.c.  */
diff --git a/gcc/testsuite/gcc.c-torture/execute/ieee/cdivchkd.c b/gcc/testsuite/gcc.c-torture/execute/ieee/cdivchkd.c

new file mode 100644 (file)

index 0000000..3ef5fad
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/ieee/cdivchkd.c
@@ -0,0 +1,126 @@
+/*
+  Program to test complex divide for correct results on selected values.
+  Checking known failure points.
+*/
+
+#include <float.h>
+
+extern void abort (void);
+extern void exit (int);
+
+extern int ilogb (double);
+int match (double _Complex, double _Complex);
+
+#define SMALL DBL_MIN
+#define MAXBIT DBL_MANT_DIG
+#define ERRLIM 6
+
+/*
+  Compare c (computed value) with z (expected value).
+  Return 0 if within allowed range.  Return 1 if not.
+*/
+int match (double _Complex c, double _Complex z)
+{
+  double rz, iz, rc, ic;
+  double rerr, ierr, rmax;
+  int biterr;
+  rz = __real__ z;
+  iz = __imag__ z;
+  rc = __real__ c;
+  ic = __imag__ c;
+
+  if (__builtin_fabs (rz) > SMALL)
+    {
+      rerr = __builtin_fabs (rz - rc) / __builtin_fabs (rz);
+    }
+  else if (__builtin_fabs (rz) == 0.0)
+    {
+      rerr = __builtin_fabs (rc);
+    }
+  else
+    {
+      rerr = __builtin_fabs (rz - rc) / SMALL;
+    }
+
+  if (__builtin_fabs (iz) > SMALL)
+    {
+      ierr = __builtin_fabs (iz - ic) / __builtin_fabs (iz);
+    }
+  else if (__builtin_fabs (iz) == 0.0)
+    {
+      ierr = __builtin_fabs (ic);
+    }
+  else
+    {
+      ierr = __builtin_fabs (iz - ic) / SMALL;
+    }
+  rmax = __builtin_fmax(rerr, ierr);
+  biterr = 0;
+  if ( rmax != 0.0)      
+    {
+      biterr = ilogb (rmax) + MAXBIT + 1;
+    }
+
+  if (biterr >= ERRLIM)
+    return 0;
+  else
+    return 1;
+}
+
+
+int main (int argc, char** argv)
+{
+  double _Complex a,b,c,z;
+  double xr[4], xi[4], yr[4], yi[4], zr[4], zi[4];
+  double cr, ci;
+  int i;
+  int ok = 1;
+  xr[0] = -0x1.16e7fad79e45ep+651;
+  xi[0] = -0x1.f7f75b94c6c6ap-860;
+  yr[0] = -0x1.2f40d8ff7e55ep+245;
+  yi[0] = -0x0.0000000004ebcp-1022;
+  zr[0] = 0x1.d6e4b0e282869p+405;
+  zi[0] = -0x1.e9095e311e706p-900;
+
+  xr[1] = -0x1.21ff587f953d3p-310;
+  xi[1] = -0x1.5a526dcc59960p+837;
+  yr[1] = 0x1.b88b8b552eaadp+735;
+  yi[1] = -0x1.873e2d6544d92p-327;
+  zr[1] = 0x1.65734a88b2de0p-961;
+  zi[1] =  -0x1.927e85b8b5770p+101;
+
+  xr[2] = 0x1.4612e41aa8080p-846;
+  xi[2] = -0x0.0000000613e07p-1022;
+  yr[2] = 0x1.df9cd0d58caafp-820;
+  yi[2] = -0x1.e47051a9036dbp-584;
+  zr[2] = 0x1.9b194f3fffa32p-469;
+  zi[2] = 0x1.58a00ab740a6bp-263;
+
+  xr[3] = 0x1.cb27eece7c585p-355;
+  xi[3] = 0x0.000000223b8a8p-1022;
+  yr[3] = -0x1.74e7ed2b9189fp-22;
+  yi[3] = 0x1.3d80439e9a119p-731;
+  zr[3] = -0x1.3b35ed806ae5ap-333;
+  zi[3] = -0x0.05e01bcbfd9f6p-1022;
+
+
+  for (i = 0; i < 4; i++)
+    {
+      __real__ a = xr[i];
+      __imag__ a = xi[i];
+      __real__ b = yr[i];
+      __imag__ b = yi[i];
+      __real__ z = zr[i];
+      __imag__ z = zi[i];
+      c = a / b;
+      cr = __real__ c;
+      ci = __imag__ c;
+
+      if (!match (c,z)){
+       ok = 0;
+      }
+    }
+  if (!ok)
+    abort ();
+  exit (0);
+}
diff --git a/gcc/testsuite/gcc.c-torture/execute/ieee/cdivchkf.c b/gcc/testsuite/gcc.c-torture/execute/ieee/cdivchkf.c

new file mode 100644 (file)

index 0000000..adf1ed9
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/ieee/cdivchkf.c
@@ -0,0 +1,125 @@
+/*
+  Program to test complex divide for correct results on selected values.
+  Checking known failure points.
+*/
+
+#include <float.h>
+
+extern void abort (void);
+extern void exit (int);
+
+extern int ilogbf (float);
+int match (float _Complex, float _Complex);
+
+#define SMALL FLT_MIN
+#define MAXBIT FLT_MANT_DIG
+#define ERRLIM 6
+
+/*
+  Compare c (computed value) with z (expected value).
+  Return 0 if within allowed range.  Return 1 if not.
+*/
+int match (float _Complex c, float _Complex z)
+{
+  float rz, iz, rc, ic;
+  float rerr, ierr, rmax;
+  int biterr;
+  rz = __real__ z;
+  iz = __imag__ z;
+  rc = __real__ c;
+  ic = __imag__ c;
+
+  if (__builtin_fabsf (rz) > SMALL)
+    {
+      rerr = __builtin_fabsf (rz - rc) / __builtin_fabsf (rz);
+    }
+  else if (__builtin_fabsf (rz) == 0.0)
+    {
+      rerr = __builtin_fabsf (rc);
+    }
+  else
+    {
+      rerr = __builtin_fabsf (rz - rc) / SMALL;
+    }
+
+  if (__builtin_fabsf (iz) > SMALL)
+    {
+      ierr = __builtin_fabsf (iz - ic) / __builtin_fabsf (iz);
+    }
+  else if (__builtin_fabsf (iz) == 0.0)
+    {
+      ierr = __builtin_fabsf (ic);
+    }
+  else
+    {
+      ierr = __builtin_fabsf (iz - ic) / SMALL;
+    }
+  rmax = __builtin_fmaxf(rerr, ierr);
+  biterr = 0;
+  if ( rmax != 0.0)      
+    {
+      biterr = ilogbf (rmax) + MAXBIT + 1;
+    }
+
+  if (biterr >= ERRLIM)
+    return 0;
+  else
+    return 1;
+}
+
+
+int main(int argc, char** argv)
+{
+  float _Complex a,b,c,z;
+  float xr[4], xi[4], yr[4], yi[4], zr[4], zi[4];
+  float cr, ci;
+  int i;
+  int ok = 1;
+  xr[0] = 0x1.0b1600p-133;
+  xi[0] = 0x1.5e1c28p+54;
+  yr[0] = -0x1.cdec8cp-119;
+  yi[0] = 0x1.1e72ccp+32;
+  zr[0] = 0x1.38e502p+22;
+  zi[0] = -0x1.f89220p-129;
+
+  xr[1] = -0x1.b1bee2p+121;
+  xi[1] = -0x1.cb403ep-59;
+  yr[1] = 0x1.480000p-144;
+  yi[1] = -0x1.c66fc4p+5;
+  zr[1] = -0x1.60b8cap-34;
+  zi[1] = -0x1.e8b02ap+115;
+
+  xr[2] = -0x1.3f6e00p-97;
+  xi[2] = -0x1.c00000p-146;
+  yr[2] = 0x1.000000p-148;
+  yi[2] = -0x1.0c4e70p-91;
+  zr[2] = 0x1.aa50d0p-55;
+  zi[2] = -0x1.30c746p-6;
+
+  xr[3] = 0x1.000000p-148;
+  xi[3] = 0x1.f4bc04p-84;
+  yr[3] = 0x1.00ad74p-20;
+  yi[3] = 0x1.2ad02ep-85;
+  zr[3] = 0x1.1102ccp-127;
+  zi[3] = 0x1.f369a4p-64;
+
+  for (i = 0; i < 4; i++)
+    {
+      __real__ a = xr[i];
+      __imag__ a = xi[i];
+      __real__ b = yr[i];
+      __imag__ b = yi[i];
+      __real__ z = zr[i];
+      __imag__ z = zi[i];
+      c = a / b;
+      cr = __real__ c;
+      ci = __imag__ c;
+
+      if (!match (c,z)){
+       ok = 0;
+      }
+    }
+  if (!ok)
+    abort ();
+  exit (0);
+}
diff --git a/gcc/testsuite/gcc.c-torture/execute/ieee/cdivchkld.c b/gcc/testsuite/gcc.c-torture/execute/ieee/cdivchkld.c

new file mode 100644 (file)

index 0000000..ffe9c34
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/ieee/cdivchkld.c
@@ -0,0 +1,168 @@
+/*
+  Program to test complex divide for correct results on selected values.
+  Checking known failure points.
+*/
+
+#include <float.h>
+
+extern void abort (void);
+extern void exit (int);
+
+extern int ilogbl (long double);
+int match (long double _Complex,long double _Complex);
+
+#define SMALL LDBL_MIN
+#define MAXBIT LDBL_MANT_DIG
+#define ERRLIM 6
+
+/*
+  Compare c (computed value) with z (expected value).
+  Return 0 if within allowed range.  Return 1 if not.
+*/
+int match (long double _Complex c,long double _Complex z)
+{
+  long double rz, iz, rc, ic;
+  long double rerr, ierr, rmax;
+  int biterr;
+  rz = __real__ z;
+  iz = __imag__ z;
+  rc = __real__ c;
+  ic = __imag__ c;
+
+  if (__builtin_fabsl (rz) > SMALL)
+    {
+      rerr = __builtin_fabsl (rz - rc) / __builtin_fabsl(rz);
+    }
+  else if (__builtin_fabsl (rz) == 0.0)
+    {
+      rerr = __builtin_fabsl (rc);
+    }
+  else
+    {
+      rerr = __builtin_fabsl (rz - rc) / SMALL;
+    }
+
+  if (__builtin_fabsl (iz) > SMALL)
+    {
+      ierr = __builtin_fabsl (iz - ic) / __builtin_fabsl(iz);
+    }
+  else if (__builtin_fabsl (iz) == 0.0)
+    {
+      ierr = __builtin_fabsl (ic);
+    }
+  else
+    {
+      ierr = __builtin_fabsl (iz - ic) / SMALL;
+    }
+  rmax = __builtin_fmaxl (rerr, ierr);
+  biterr = 0;
+  if ( rmax != 0.0)      
+    {
+      biterr = ilogbl (rmax) + MAXBIT + 1;
+    }
+
+  if (biterr >= ERRLIM)
+    return 0;
+  else
+    return 1;
+}
+
+
+int main (int argc, char** argv)
+{
+  long double _Complex a,b,c,z;
+  long double xr[4], xi[4], yr[4], yi[4], zr[4], zi[4];
+  long double cr, ci;
+  int i;
+  int ok = 1;
+
+#if (LDBL_MAX_EXP < 2048)
+  /*
+    Test values when mantissa is 11 or fewer bits.  Either LDBL is
+    using DBL on this platform or we are using IBM extended double
+    precision. Test values will be automatically truncated when
+    the available precision is smaller than the explicit precision.
+  */
+  xr[0] = -0x1.16e7fad79e45ep+651;
+  xi[0] = -0x1.f7f75b94c6c6ap-860;
+  yr[0] = -0x1.2f40d8ff7e55ep+245;
+  yi[0] = -0x0.0000000004ebcp-968;
+  zr[0] = 0x1.d6e4b0e2828694570ba839070beep+405L;
+  zi[0] = -0x1.e9095e311e70498db810196259b7p-846L;
+
+  xr[1] = -0x1.21ff587f953d3p-310;
+  xi[1] = -0x1.5a526dcc59960p+837;
+  yr[1] = 0x1.b88b8b552eaadp+735;
+  yi[1] = -0x1.873e2d6544d92p-327;
+  zr[1] = 0x1.65734a88b2ddff699c482ee8eef6p-961L;
+  zi[1] = -0x1.927e85b8b576f94a797a1bcb733dp+101L;
+
+  xr[2] = 0x1.4612e41aa8080p-846;
+  xi[2] = -0x0.0000000613e07p-968;
+  yr[2] = 0x1.df9cd0d58caafp-820;
+  yi[2] = -0x1.e47051a9036dbp-584;
+  zr[2] = 0x1.9b194f3aaadea545174c5372d8p-415L;
+  zi[2] = 0x1.58a00ab740a6ad3249002f2b79p-263L;
+
+  xr[3] = 0x1.cb27eece7c585p-355;
+  xi[3] = 0x0.000000223b8a8p-968;
+  yr[3] = -0x1.74e7ed2b9189fp-22;
+  yi[3] = 0x1.3d80439e9a119p-731;
+  zr[3] = -0x1.3b35ed806ae5a2a8cc1c9a96931dp-333L;
+  zi[3] = -0x1.7802c17c774895bd541adeb200p-974L;
+#else
+  /*
+    Test values intended for either IEEE128 or Intel80 formats.  In
+    either case, 15 bits of exponent are available.  Test values will
+    be automatically truncated when the available precision is smaller
+    than the explicit precision.
+  */
+  xr[0] = -0x9.c793985b7d029d90p-8480L;
+  xi[0] = 0x8.018745ffa61a8fe0p+16329L;
+  yr[0] = -0xe.d5bee9c523a35ad0p-15599L;
+  yi[0] = -0xa.8c93c5a4f94128f0p+869L;
+  zr[0] = -0x1.849178451c035b95d16311d0efdap+15459L;
+  zi[0] = -0x1.11375ed2c1f58b9d047ab64aed97p-1008L;
+
+  xr[1] = 0xb.68e44bc6d0b91a30p+16026L;
+  xi[1] = 0xb.ab10f5453e972f30p-14239L;
+  yr[1] = 0x8.8cbd470705428ff0p-16350L;
+  yi[1] = -0xa.0c1cbeae4e4b69f0p+347L;
+  zr[1] = 0x1.eec40848785e500d9f0945ab58d3p-1019L;
+  zi[1] = 0x1.22b6b579927a3f238b772bb6dc95p+15679L;
+
+  xr[2] = -0x9.e8c093a43b546a90p+15983L;
+  xi[2] = 0xc.95b18274208311e0p-2840L;
+  yr[2] = -0x8.dedb729b5c1b2ec0p+8L;
+  yi[2] = 0xa.a49fb81b24738370p-16385L;
+  zr[2] = 0x1.1df99ee89bb118f3201369e06576p+15975L;
+  zi[2] = 0x1.571e7ef904d6b6eee7acb0dcf098p-418L;
+
+  xr[3] = 0xc.4687f251c0f48bd0p-3940L;
+  xi[3] = -0xe.a3f2138992d85fa0p+15598L;
+  yr[3] = 0xe.4b0c25c3d5ebb830p-16344L;
+  yi[3] = -0xa.6cbf1ba80f7b97a0p+78L;
+  zr[3] = 0x1.6785ba23bfb744cee97b4142348bp+15520L;
+  zi[3] = -0x1.ecee7b8c7bdd36237eb538324289p-902L;
+#endif
+
+  for (i = 0; i < 4; i++)
+    {
+      __real__ a = xr[i];
+      __imag__ a = xi[i];
+      __real__ b = yr[i];
+      __imag__ b = yi[i];
+      __real__ z = zr[i];
+      __imag__ z = zi[i];
+      c = a / b;
+      cr = __real__ c;
+      ci = __imag__ c;
+
+      if (!match (c,z)){
+       ok = 0;
+      }
+    }
+  if (!ok)
+    abort ();
+  exit (0);
+}
diff --git a/libgcc/config/rs6000/_divkc3.c b/libgcc/config/rs6000/_divkc3.c

index d261f40..a1d29d2 100644 (file)
--- a/libgcc/config/rs6000/_divkc3.c
+++ b/libgcc/config/rs6000/_divkc3.c
@@ -37,29 +37,122 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
  #define __divkc3 __divkc3_sw
  #endif
  
+#ifndef __LONG_DOUBLE_IEEE128__
+#define RBIG   (__LIBGCC_KF_MAX__ / 2)
+#define RMIN   (__LIBGCC_KF_MIN__)
+#define RMIN2  (__LIBGCC_KF_EPSILON__)
+#define RMINSCAL (1 / __LIBGCC_KF_EPSILON__)
+#define RMAX2  (RBIG * RMIN2)
+#else
+#define RBIG   (__LIBGCC_TF_MAX__ / 2)
+#define RMIN   (__LIBGCC_TF_MIN__)
+#define RMIN2  (__LIBGCC_TF_EPSILON__)
+#define RMINSCAL (1 / __LIBGCC_TF_EPSILON__)
+#define RMAX2  (RBIG * RMIN2)
+#endif
+
  TCtype
  __divkc3 (TFtype a, TFtype b, TFtype c, TFtype d)
  {
    TFtype denom, ratio, x, y;
    TCtype res;
  
-  /* ??? We can get better behavior from logarithmic scaling instead of
-     the division.  But that would mean starting to link libgcc against
-     libm.  We could implement something akin to ldexp/frexp as gcc builtins
-     fairly easily...  */
+  /* long double has significant potential underflow/overflow errors that
+     can be greatly reduced with a limited number of tests and adjustments.
+  */
+
+  /* Scale by max(c,d) to reduce chances of denominator overflowing.  */
    if (FABS (c) < FABS (d))
      {
+      /* Prevent underflow when denominator is near max representable.  */
+      if (FABS (d) >= RBIG)
+       {
+         a = a / 2;
+         b = b / 2;
+         c = c / 2;
+         d = d / 2;
+       }
+      /* Avoid overflow/underflow issues when c and d are small.
+        Scaling up helps avoid some underflows.
+        No new overflow possible since c&d < RMIN2.  */
+      if (FABS (d) < RMIN2)
+       {
+         a = a * RMINSCAL;
+         b = b * RMINSCAL;
+         c = c * RMINSCAL;
+         d = d * RMINSCAL;
+       }
+      else
+       {
+         if (((FABS (a) < RMIN) && (FABS (b) < RMAX2) && (FABS (d) < RMAX2))
+             || ((FABS (b) < RMIN) && (FABS (a) < RMAX2)
+                 && (FABS (d) < RMAX2)))
+           {
+             a = a * RMINSCAL;
+             b = b * RMINSCAL;
+             c = c * RMINSCAL;
+             d = d * RMINSCAL;
+           }
+       }
        ratio = c / d;
        denom = (c * ratio) + d;
-      x = ((a * ratio) + b) / denom;
-      y = ((b * ratio) - a) / denom;
+      /* Choose alternate order of computation if ratio is subnormal.  */
+      if (FABS (ratio) > RMIN)
+       {
+         x = ((a * ratio) + b) / denom;
+         y = ((b * ratio) - a) / denom;
+       }
+      else
+       {
+         x = ((c * (a / d)) + b) / denom;
+         y = ((c * (b / d)) - a) / denom;
+       }
      }
    else
      {
+      /* Prevent underflow when denominator is near max representable.  */
+      if (FABS (c) >= RBIG)
+       {
+         a = a / 2;
+         b = b / 2;
+         c = c / 2;
+         d = d / 2;
+       }
+      /* Avoid overflow/underflow issues when both c and d are small.
+        Scaling up helps avoid some underflows.
+        No new overflow possible since both c&d are less than RMIN2.  */
+      if (FABS (c) < RMIN2)
+       {
+         a = a * RMINSCAL;
+         b = b * RMINSCAL;
+         c = c * RMINSCAL;
+         d = d * RMINSCAL;
+       }
+      else
+       {
+         if (((FABS (a) < RMIN) && (FABS (b) < RMAX2) && (FABS (c) < RMAX2))
+             || ((FABS (b) < RMIN) && (FABS (a) < RMAX2)
+                 && (FABS (c) < RMAX2)))
+           {
+             a = a * RMINSCAL;
+             b = b * RMINSCAL;
+             c = c * RMINSCAL;
+             d = d * RMINSCAL;
+           }
+       }
        ratio = d / c;
        denom = (d * ratio) + c;
-      x = ((b * ratio) + a) / denom;
-      y = (b - (a * ratio)) / denom;
+      /* Choose alternate order of computation if ratio is subnormal.  */
+      if (FABS (ratio) > RMIN)
+       {
+         x = ((b * ratio) + a) / denom;
+         y = (b - (a * ratio)) / denom;
+       }
+      else
+       {
+         x = (a + (d * (b / c))) / denom;
+         y = (b - (d * (a / c))) / denom;
+       }
      }
  
    /* Recover infinities and zeros that computed as NaN+iNaN; the only cases
diff --git a/libgcc/libgcc2.c b/libgcc/libgcc2.c

index 17de0a7..38f935e 100644 (file)
--- a/libgcc/libgcc2.c
+++ b/libgcc/libgcc2.c
@@ -1860,33 +1860,55 @@ NAME (TYPE x, int m)
  #if defined(L_mulhc3) || defined(L_divhc3)
  # define MTYPE HFtype
  # define CTYPE HCtype
+# define AMTYPE SFtype
  # define MODE  hc
  # define CEXT  __LIBGCC_HF_FUNC_EXT__
  # define NOTRUNC (!__LIBGCC_HF_EXCESS_PRECISION__)
  #elif defined(L_mulsc3) || defined(L_divsc3)
  # define MTYPE SFtype
  # define CTYPE SCtype
+# define AMTYPE DFtype
  # define MODE  sc
  # define CEXT  __LIBGCC_SF_FUNC_EXT__
  # define NOTRUNC (!__LIBGCC_SF_EXCESS_PRECISION__)
+# define RBIG  (__LIBGCC_SF_MAX__ / 2)
+# define RMIN  (__LIBGCC_SF_MIN__)
+# define RMIN2 (__LIBGCC_SF_EPSILON__)
+# define RMINSCAL (1 / __LIBGCC_SF_EPSILON__)
+# define RMAX2 (RBIG * RMIN2)
  #elif defined(L_muldc3) || defined(L_divdc3)
  # define MTYPE DFtype
  # define CTYPE DCtype
  # define MODE  dc
  # define CEXT  __LIBGCC_DF_FUNC_EXT__
  # define NOTRUNC (!__LIBGCC_DF_EXCESS_PRECISION__)
+# define RBIG  (__LIBGCC_DF_MAX__ / 2)
+# define RMIN  (__LIBGCC_DF_MIN__)
+# define RMIN2 (__LIBGCC_DF_EPSILON__)
+# define RMINSCAL (1 / __LIBGCC_DF_EPSILON__)
+# define RMAX2  (RBIG * RMIN2)
  #elif defined(L_mulxc3) || defined(L_divxc3)
  # define MTYPE XFtype
  # define CTYPE XCtype
  # define MODE  xc
  # define CEXT  __LIBGCC_XF_FUNC_EXT__
  # define NOTRUNC (!__LIBGCC_XF_EXCESS_PRECISION__)
+# define RBIG  (__LIBGCC_XF_MAX__ / 2)
+# define RMIN  (__LIBGCC_XF_MIN__)
+# define RMIN2 (__LIBGCC_XF_EPSILON__)
+# define RMINSCAL (1 / __LIBGCC_XF_EPSILON__)
+# define RMAX2 (RBIG * RMIN2)
  #elif defined(L_multc3) || defined(L_divtc3)
  # define MTYPE TFtype
  # define CTYPE TCtype
  # define MODE  tc
  # define CEXT  __LIBGCC_TF_FUNC_EXT__
  # define NOTRUNC (!__LIBGCC_TF_EXCESS_PRECISION__)
+# define RBIG  (__LIBGCC_TF_MAX__ / 2)
+# define RMIN  (__LIBGCC_TF_MIN__)
+# define RMIN2 (__LIBGCC_TF_EPSILON__)
+# define RMINSCAL (1 / __LIBGCC_TF_EPSILON__)
+# define RMAX2 (RBIG * RMIN2)
  #else
  # error
  #endif
@@ -1994,30 +2016,136 @@ CONCAT3(__mul,MODE,3) (MTYPE a, MTYPE b, MTYPE c, MTYPE d)
  CTYPE
  CONCAT3(__div,MODE,3) (MTYPE a, MTYPE b, MTYPE c, MTYPE d)
  {
+#if defined(L_divhc3)                                          \
+  || (defined(L_divsc3) && defined(__LIBGCC_HAVE_HWDBL__) )
+
+  /* Half precision is handled with float precision.
+     float is handled with double precision when double precision
+     hardware is available.
+     Due to the additional precision, the simple complex divide
+     method (without Smith's method) is sufficient to get accurate
+     answers and runs slightly faster than Smith's method.  */
+
+  AMTYPE aa, bb, cc, dd;
+  AMTYPE denom;
+  MTYPE x, y;
+  CTYPE res;
+  aa = a;
+  bb = b;
+  cc = c;
+  dd = d;
+
+  denom = (cc * cc) + (dd * dd);
+  x = ((aa * cc) + (bb * dd)) / denom;
+  y = ((bb * cc) - (aa * dd)) / denom;
+
+#else
    MTYPE denom, ratio, x, y;
    CTYPE res;
  
-  /* ??? We can get better behavior from logarithmic scaling instead of
-     the division.  But that would mean starting to link libgcc against
-     libm.  We could implement something akin to ldexp/frexp as gcc builtins
-     fairly easily...  */
+  /* double, extended, long double have significant potential
+     underflow/overflow errors that can be greatly reduced with
+     a limited number of tests and adjustments.  float is handled
+     the same way when no HW double is available.
+  */
+
+  /* Scale by max(c,d) to reduce chances of denominator overflowing.  */
    if (FABS (c) < FABS (d))
      {
+      /* Prevent underflow when denominator is near max representable.  */
+      if (FABS (d) >= RBIG)
+       {
+         a = a / 2;
+         b = b / 2;
+         c = c / 2;
+         d = d / 2;
+       }
+      /* Avoid overflow/underflow issues when c and d are small.
+        Scaling up helps avoid some underflows.
+        No new overflow possible since c&d < RMIN2.  */
+      if (FABS (d) < RMIN2)
+       {
+         a = a * RMINSCAL;
+         b = b * RMINSCAL;
+         c = c * RMINSCAL;
+         d = d * RMINSCAL;
+       }
+      else
+       {
+         if (((FABS (a) < RMIN) && (FABS (b) < RMAX2) && (FABS (d) < RMAX2))
+             || ((FABS (b) < RMIN) && (FABS (a) < RMAX2)
+                 && (FABS (d) < RMAX2)))
+           {
+             a = a * RMINSCAL;
+             b = b * RMINSCAL;
+             c = c * RMINSCAL;
+             d = d * RMINSCAL;
+           }
+       }
        ratio = c / d;
        denom = (c * ratio) + d;
-      x = ((a * ratio) + b) / denom;
-      y = ((b * ratio) - a) / denom;
+      /* Choose alternate order of computation if ratio is subnormal.  */
+      if (FABS (ratio) > RMIN)
+       {
+         x = ((a * ratio) + b) / denom;
+         y = ((b * ratio) - a) / denom;
+       }
+      else
+       {
+         x = ((c * (a / d)) + b) / denom;
+         y = ((c * (b / d)) - a) / denom;
+       }
      }
    else
      {
+      /* Prevent underflow when denominator is near max representable.  */
+      if (FABS (c) >= RBIG)
+       {
+         a = a / 2;
+         b = b / 2;
+         c = c / 2;
+         d = d / 2;
+       }
+      /* Avoid overflow/underflow issues when both c and d are small.
+        Scaling up helps avoid some underflows.
+        No new overflow possible since both c&d are less than RMIN2.  */
+      if (FABS (c) < RMIN2)
+       {
+         a = a * RMINSCAL;
+         b = b * RMINSCAL;
+         c = c * RMINSCAL;
+         d = d * RMINSCAL;
+       }
+      else
+       {
+         if (((FABS (a) < RMIN) && (FABS (b) < RMAX2) && (FABS (c) < RMAX2))
+             || ((FABS (b) < RMIN) && (FABS (a) < RMAX2)
+                 && (FABS (c) < RMAX2)))
+           {
+             a = a * RMINSCAL;
+             b = b * RMINSCAL;
+             c = c * RMINSCAL;
+             d = d * RMINSCAL;
+           }
+       }
        ratio = d / c;
        denom = (d * ratio) + c;
-      x = ((b * ratio) + a) / denom;
-      y = (b - (a * ratio)) / denom;
+      /* Choose alternate order of computation if ratio is subnormal.  */
+      if (FABS (ratio) > RMIN)
+       {
+         x = ((b * ratio) + a) / denom;
+         y = (b - (a * ratio)) / denom;
+       }
+      else
+       {
+         x = (a + (d * (b / c))) / denom;
+         y = (b - (d * (a / c))) / denom;
+       }
      }
+#endif
  
-  /* Recover infinities and zeros that computed as NaN+iNaN; the only cases
-     are nonzero/zero, infinite/finite, and finite/infinite.  */
+  /* Recover infinities and zeros that computed as NaN+iNaN; the only
+     cases are nonzero/zero, infinite/finite, and finite/infinite.  */
    if (isnan (x) && isnan (y))
      {
        if (c == 0.0 && d == 0.0 && (!isnan (a) || !isnan (b)))
author	Patrick McGehearty <patrick.mcgehearty@oracle.com>
	Wed, 28 Apr 2021 19:14:48 +0000 (19:14 +0000)
committer	Jose E. Marchesi <jose.marchesi@oracle.com>
	Wed, 28 Apr 2021 19:54:44 +0000 (21:54 +0200)
gcc/c-family/c-cppbuiltin.c		patch \| blob \| history
gcc/testsuite/gcc.c-torture/execute/ieee/cdivchkd.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.c-torture/execute/ieee/cdivchkf.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.c-torture/execute/ieee/cdivchkld.c	[new file with mode: 0644]	patch \| blob
libgcc/config/rs6000/_divkc3.c		patch \| blob \| history
libgcc/libgcc2.c		patch \| blob \| history