{
scalar_float_mode mode = mode_iter.require ();
const char *name = GET_MODE_NAME (mode);
+ const size_t name_len = strlen (name);
+ char float_h_prefix[16] = "";
char *macro_name
- = (char *) alloca (strlen (name)
- + sizeof ("__LIBGCC__MANT_DIG__"));
+ = XALLOCAVEC (char, name_len + sizeof ("__LIBGCC__MANT_DIG__"));
sprintf (macro_name, "__LIBGCC_%s_MANT_DIG__", name);
builtin_define_with_int_value (macro_name,
REAL_MODE_FORMAT (mode)->p);
if (!targetm.scalar_mode_supported_p (mode)
|| !targetm.libgcc_floating_mode_supported_p (mode))
continue;
- macro_name = (char *) alloca (strlen (name)
- + sizeof ("__LIBGCC_HAS__MODE__"));
+ macro_name = XALLOCAVEC (char, name_len
+ + sizeof ("__LIBGCC_HAS__MODE__"));
sprintf (macro_name, "__LIBGCC_HAS_%s_MODE__", name);
cpp_define (pfile, macro_name);
- macro_name = (char *) alloca (strlen (name)
- + sizeof ("__LIBGCC__FUNC_EXT__"));
+ macro_name = XALLOCAVEC (char, name_len
+ + sizeof ("__LIBGCC__FUNC_EXT__"));
sprintf (macro_name, "__LIBGCC_%s_FUNC_EXT__", name);
char suffix[20] = "";
if (mode == TYPE_MODE (double_type_node))
- ; /* Empty suffix correct. */
+ {
+ /* Empty suffix correct. */
+ memcpy (float_h_prefix, "DBL", 4);
+ }
else if (mode == TYPE_MODE (float_type_node))
- suffix[0] = 'f';
+ {
+ suffix[0] = 'f';
+ memcpy (float_h_prefix, "FLT", 4);
+ }
else if (mode == TYPE_MODE (long_double_type_node))
- suffix[0] = 'l';
+ {
+ suffix[0] = 'l';
+ memcpy (float_h_prefix, "LDBL", 5);
+ }
else
{
bool found_suffix = false;
sprintf (suffix, "f%d%s", floatn_nx_types[i].n,
floatn_nx_types[i].extended ? "x" : "");
found_suffix = true;
+ sprintf (float_h_prefix, "FLT%d%s", floatn_nx_types[i].n,
+ floatn_nx_types[i].extended ? "X" : "");
break;
}
gcc_assert (found_suffix);
default:
gcc_unreachable ();
}
- macro_name = (char *) alloca (strlen (name)
- + sizeof ("__LIBGCC__EXCESS_"
- "PRECISION__"));
+ macro_name = XALLOCAVEC (char, name_len
+ + sizeof ("__LIBGCC__EXCESS_PRECISION__"));
sprintf (macro_name, "__LIBGCC_%s_EXCESS_PRECISION__", name);
builtin_define_with_int_value (macro_name, excess_precision);
+
+ char val_name[64];
+
+ macro_name = XALLOCAVEC (char, name_len
+ + sizeof ("__LIBGCC__EPSILON__"));
+ sprintf (macro_name, "__LIBGCC_%s_EPSILON__", name);
+ sprintf (val_name, "__%s_EPSILON__", float_h_prefix);
+ builtin_define_with_value (macro_name, val_name, 0);
+
+ macro_name = XALLOCAVEC (char, name_len + sizeof ("__LIBGCC__MAX__"));
+ sprintf (macro_name, "__LIBGCC_%s_MAX__", name);
+ sprintf (val_name, "__%s_MAX__", float_h_prefix);
+ builtin_define_with_value (macro_name, val_name, 0);
+
+ macro_name = XALLOCAVEC (char, name_len + sizeof ("__LIBGCC__MIN__"));
+ sprintf (macro_name, "__LIBGCC_%s_MIN__", name);
+ sprintf (val_name, "__%s_MIN__", float_h_prefix);
+ builtin_define_with_value (macro_name, val_name, 0);
+
+#ifdef HAVE_adddf3
+ builtin_define_with_int_value ("__LIBGCC_HAVE_HWDBL__",
+ HAVE_adddf3);
+#endif
}
/* For libgcc crtstuff.c and libgcc2.c. */
--- /dev/null
+/*
+ Program to test complex divide for correct results on selected values.
+ Checking known failure points.
+*/
+
+#include <float.h>
+
+extern void abort (void);
+extern void exit (int);
+
+extern int ilogb (double);
+int match (double _Complex, double _Complex);
+
+#define SMALL DBL_MIN
+#define MAXBIT DBL_MANT_DIG
+#define ERRLIM 6
+
+/*
+ Compare c (computed value) with z (expected value).
+ Return 0 if within allowed range. Return 1 if not.
+*/
+int match (double _Complex c, double _Complex z)
+{
+ double rz, iz, rc, ic;
+ double rerr, ierr, rmax;
+ int biterr;
+ rz = __real__ z;
+ iz = __imag__ z;
+ rc = __real__ c;
+ ic = __imag__ c;
+
+ if (__builtin_fabs (rz) > SMALL)
+ {
+ rerr = __builtin_fabs (rz - rc) / __builtin_fabs (rz);
+ }
+ else if (__builtin_fabs (rz) == 0.0)
+ {
+ rerr = __builtin_fabs (rc);
+ }
+ else
+ {
+ rerr = __builtin_fabs (rz - rc) / SMALL;
+ }
+
+ if (__builtin_fabs (iz) > SMALL)
+ {
+ ierr = __builtin_fabs (iz - ic) / __builtin_fabs (iz);
+ }
+ else if (__builtin_fabs (iz) == 0.0)
+ {
+ ierr = __builtin_fabs (ic);
+ }
+ else
+ {
+ ierr = __builtin_fabs (iz - ic) / SMALL;
+ }
+ rmax = __builtin_fmax(rerr, ierr);
+ biterr = 0;
+ if ( rmax != 0.0)
+ {
+ biterr = ilogb (rmax) + MAXBIT + 1;
+ }
+
+ if (biterr >= ERRLIM)
+ return 0;
+ else
+ return 1;
+}
+
+
+int main (int argc, char** argv)
+{
+ double _Complex a,b,c,z;
+ double xr[4], xi[4], yr[4], yi[4], zr[4], zi[4];
+ double cr, ci;
+ int i;
+ int ok = 1;
+ xr[0] = -0x1.16e7fad79e45ep+651;
+ xi[0] = -0x1.f7f75b94c6c6ap-860;
+ yr[0] = -0x1.2f40d8ff7e55ep+245;
+ yi[0] = -0x0.0000000004ebcp-1022;
+ zr[0] = 0x1.d6e4b0e282869p+405;
+ zi[0] = -0x1.e9095e311e706p-900;
+
+ xr[1] = -0x1.21ff587f953d3p-310;
+ xi[1] = -0x1.5a526dcc59960p+837;
+ yr[1] = 0x1.b88b8b552eaadp+735;
+ yi[1] = -0x1.873e2d6544d92p-327;
+ zr[1] = 0x1.65734a88b2de0p-961;
+ zi[1] = -0x1.927e85b8b5770p+101;
+
+ xr[2] = 0x1.4612e41aa8080p-846;
+ xi[2] = -0x0.0000000613e07p-1022;
+ yr[2] = 0x1.df9cd0d58caafp-820;
+ yi[2] = -0x1.e47051a9036dbp-584;
+ zr[2] = 0x1.9b194f3fffa32p-469;
+ zi[2] = 0x1.58a00ab740a6bp-263;
+
+ xr[3] = 0x1.cb27eece7c585p-355;
+ xi[3] = 0x0.000000223b8a8p-1022;
+ yr[3] = -0x1.74e7ed2b9189fp-22;
+ yi[3] = 0x1.3d80439e9a119p-731;
+ zr[3] = -0x1.3b35ed806ae5ap-333;
+ zi[3] = -0x0.05e01bcbfd9f6p-1022;
+
+
+ for (i = 0; i < 4; i++)
+ {
+ __real__ a = xr[i];
+ __imag__ a = xi[i];
+ __real__ b = yr[i];
+ __imag__ b = yi[i];
+ __real__ z = zr[i];
+ __imag__ z = zi[i];
+ c = a / b;
+ cr = __real__ c;
+ ci = __imag__ c;
+
+ if (!match (c,z)){
+ ok = 0;
+ }
+ }
+ if (!ok)
+ abort ();
+ exit (0);
+}
--- /dev/null
+/*
+ Program to test complex divide for correct results on selected values.
+ Checking known failure points.
+*/
+
+#include <float.h>
+
+extern void abort (void);
+extern void exit (int);
+
+extern int ilogbf (float);
+int match (float _Complex, float _Complex);
+
+#define SMALL FLT_MIN
+#define MAXBIT FLT_MANT_DIG
+#define ERRLIM 6
+
+/*
+ Compare c (computed value) with z (expected value).
+ Return 0 if within allowed range. Return 1 if not.
+*/
+int match (float _Complex c, float _Complex z)
+{
+ float rz, iz, rc, ic;
+ float rerr, ierr, rmax;
+ int biterr;
+ rz = __real__ z;
+ iz = __imag__ z;
+ rc = __real__ c;
+ ic = __imag__ c;
+
+ if (__builtin_fabsf (rz) > SMALL)
+ {
+ rerr = __builtin_fabsf (rz - rc) / __builtin_fabsf (rz);
+ }
+ else if (__builtin_fabsf (rz) == 0.0)
+ {
+ rerr = __builtin_fabsf (rc);
+ }
+ else
+ {
+ rerr = __builtin_fabsf (rz - rc) / SMALL;
+ }
+
+ if (__builtin_fabsf (iz) > SMALL)
+ {
+ ierr = __builtin_fabsf (iz - ic) / __builtin_fabsf (iz);
+ }
+ else if (__builtin_fabsf (iz) == 0.0)
+ {
+ ierr = __builtin_fabsf (ic);
+ }
+ else
+ {
+ ierr = __builtin_fabsf (iz - ic) / SMALL;
+ }
+ rmax = __builtin_fmaxf(rerr, ierr);
+ biterr = 0;
+ if ( rmax != 0.0)
+ {
+ biterr = ilogbf (rmax) + MAXBIT + 1;
+ }
+
+ if (biterr >= ERRLIM)
+ return 0;
+ else
+ return 1;
+}
+
+
+int main(int argc, char** argv)
+{
+ float _Complex a,b,c,z;
+ float xr[4], xi[4], yr[4], yi[4], zr[4], zi[4];
+ float cr, ci;
+ int i;
+ int ok = 1;
+ xr[0] = 0x1.0b1600p-133;
+ xi[0] = 0x1.5e1c28p+54;
+ yr[0] = -0x1.cdec8cp-119;
+ yi[0] = 0x1.1e72ccp+32;
+ zr[0] = 0x1.38e502p+22;
+ zi[0] = -0x1.f89220p-129;
+
+ xr[1] = -0x1.b1bee2p+121;
+ xi[1] = -0x1.cb403ep-59;
+ yr[1] = 0x1.480000p-144;
+ yi[1] = -0x1.c66fc4p+5;
+ zr[1] = -0x1.60b8cap-34;
+ zi[1] = -0x1.e8b02ap+115;
+
+ xr[2] = -0x1.3f6e00p-97;
+ xi[2] = -0x1.c00000p-146;
+ yr[2] = 0x1.000000p-148;
+ yi[2] = -0x1.0c4e70p-91;
+ zr[2] = 0x1.aa50d0p-55;
+ zi[2] = -0x1.30c746p-6;
+
+ xr[3] = 0x1.000000p-148;
+ xi[3] = 0x1.f4bc04p-84;
+ yr[3] = 0x1.00ad74p-20;
+ yi[3] = 0x1.2ad02ep-85;
+ zr[3] = 0x1.1102ccp-127;
+ zi[3] = 0x1.f369a4p-64;
+
+ for (i = 0; i < 4; i++)
+ {
+ __real__ a = xr[i];
+ __imag__ a = xi[i];
+ __real__ b = yr[i];
+ __imag__ b = yi[i];
+ __real__ z = zr[i];
+ __imag__ z = zi[i];
+ c = a / b;
+ cr = __real__ c;
+ ci = __imag__ c;
+
+ if (!match (c,z)){
+ ok = 0;
+ }
+ }
+ if (!ok)
+ abort ();
+ exit (0);
+}
--- /dev/null
+/*
+ Program to test complex divide for correct results on selected values.
+ Checking known failure points.
+*/
+
+#include <float.h>
+
+extern void abort (void);
+extern void exit (int);
+
+extern int ilogbl (long double);
+int match (long double _Complex,long double _Complex);
+
+#define SMALL LDBL_MIN
+#define MAXBIT LDBL_MANT_DIG
+#define ERRLIM 6
+
+/*
+ Compare c (computed value) with z (expected value).
+ Return 0 if within allowed range. Return 1 if not.
+*/
+int match (long double _Complex c,long double _Complex z)
+{
+ long double rz, iz, rc, ic;
+ long double rerr, ierr, rmax;
+ int biterr;
+ rz = __real__ z;
+ iz = __imag__ z;
+ rc = __real__ c;
+ ic = __imag__ c;
+
+ if (__builtin_fabsl (rz) > SMALL)
+ {
+ rerr = __builtin_fabsl (rz - rc) / __builtin_fabsl(rz);
+ }
+ else if (__builtin_fabsl (rz) == 0.0)
+ {
+ rerr = __builtin_fabsl (rc);
+ }
+ else
+ {
+ rerr = __builtin_fabsl (rz - rc) / SMALL;
+ }
+
+ if (__builtin_fabsl (iz) > SMALL)
+ {
+ ierr = __builtin_fabsl (iz - ic) / __builtin_fabsl(iz);
+ }
+ else if (__builtin_fabsl (iz) == 0.0)
+ {
+ ierr = __builtin_fabsl (ic);
+ }
+ else
+ {
+ ierr = __builtin_fabsl (iz - ic) / SMALL;
+ }
+ rmax = __builtin_fmaxl (rerr, ierr);
+ biterr = 0;
+ if ( rmax != 0.0)
+ {
+ biterr = ilogbl (rmax) + MAXBIT + 1;
+ }
+
+ if (biterr >= ERRLIM)
+ return 0;
+ else
+ return 1;
+}
+
+
+int main (int argc, char** argv)
+{
+ long double _Complex a,b,c,z;
+ long double xr[4], xi[4], yr[4], yi[4], zr[4], zi[4];
+ long double cr, ci;
+ int i;
+ int ok = 1;
+
+#if (LDBL_MAX_EXP < 2048)
+ /*
+ Test values when mantissa is 11 or fewer bits. Either LDBL is
+ using DBL on this platform or we are using IBM extended double
+ precision. Test values will be automatically truncated when
+ the available precision is smaller than the explicit precision.
+ */
+ xr[0] = -0x1.16e7fad79e45ep+651;
+ xi[0] = -0x1.f7f75b94c6c6ap-860;
+ yr[0] = -0x1.2f40d8ff7e55ep+245;
+ yi[0] = -0x0.0000000004ebcp-968;
+ zr[0] = 0x1.d6e4b0e2828694570ba839070beep+405L;
+ zi[0] = -0x1.e9095e311e70498db810196259b7p-846L;
+
+ xr[1] = -0x1.21ff587f953d3p-310;
+ xi[1] = -0x1.5a526dcc59960p+837;
+ yr[1] = 0x1.b88b8b552eaadp+735;
+ yi[1] = -0x1.873e2d6544d92p-327;
+ zr[1] = 0x1.65734a88b2ddff699c482ee8eef6p-961L;
+ zi[1] = -0x1.927e85b8b576f94a797a1bcb733dp+101L;
+
+ xr[2] = 0x1.4612e41aa8080p-846;
+ xi[2] = -0x0.0000000613e07p-968;
+ yr[2] = 0x1.df9cd0d58caafp-820;
+ yi[2] = -0x1.e47051a9036dbp-584;
+ zr[2] = 0x1.9b194f3aaadea545174c5372d8p-415L;
+ zi[2] = 0x1.58a00ab740a6ad3249002f2b79p-263L;
+
+ xr[3] = 0x1.cb27eece7c585p-355;
+ xi[3] = 0x0.000000223b8a8p-968;
+ yr[3] = -0x1.74e7ed2b9189fp-22;
+ yi[3] = 0x1.3d80439e9a119p-731;
+ zr[3] = -0x1.3b35ed806ae5a2a8cc1c9a96931dp-333L;
+ zi[3] = -0x1.7802c17c774895bd541adeb200p-974L;
+#else
+ /*
+ Test values intended for either IEEE128 or Intel80 formats. In
+ either case, 15 bits of exponent are available. Test values will
+ be automatically truncated when the available precision is smaller
+ than the explicit precision.
+ */
+ xr[0] = -0x9.c793985b7d029d90p-8480L;
+ xi[0] = 0x8.018745ffa61a8fe0p+16329L;
+ yr[0] = -0xe.d5bee9c523a35ad0p-15599L;
+ yi[0] = -0xa.8c93c5a4f94128f0p+869L;
+ zr[0] = -0x1.849178451c035b95d16311d0efdap+15459L;
+ zi[0] = -0x1.11375ed2c1f58b9d047ab64aed97p-1008L;
+
+ xr[1] = 0xb.68e44bc6d0b91a30p+16026L;
+ xi[1] = 0xb.ab10f5453e972f30p-14239L;
+ yr[1] = 0x8.8cbd470705428ff0p-16350L;
+ yi[1] = -0xa.0c1cbeae4e4b69f0p+347L;
+ zr[1] = 0x1.eec40848785e500d9f0945ab58d3p-1019L;
+ zi[1] = 0x1.22b6b579927a3f238b772bb6dc95p+15679L;
+
+ xr[2] = -0x9.e8c093a43b546a90p+15983L;
+ xi[2] = 0xc.95b18274208311e0p-2840L;
+ yr[2] = -0x8.dedb729b5c1b2ec0p+8L;
+ yi[2] = 0xa.a49fb81b24738370p-16385L;
+ zr[2] = 0x1.1df99ee89bb118f3201369e06576p+15975L;
+ zi[2] = 0x1.571e7ef904d6b6eee7acb0dcf098p-418L;
+
+ xr[3] = 0xc.4687f251c0f48bd0p-3940L;
+ xi[3] = -0xe.a3f2138992d85fa0p+15598L;
+ yr[3] = 0xe.4b0c25c3d5ebb830p-16344L;
+ yi[3] = -0xa.6cbf1ba80f7b97a0p+78L;
+ zr[3] = 0x1.6785ba23bfb744cee97b4142348bp+15520L;
+ zi[3] = -0x1.ecee7b8c7bdd36237eb538324289p-902L;
+#endif
+
+ for (i = 0; i < 4; i++)
+ {
+ __real__ a = xr[i];
+ __imag__ a = xi[i];
+ __real__ b = yr[i];
+ __imag__ b = yi[i];
+ __real__ z = zr[i];
+ __imag__ z = zi[i];
+ c = a / b;
+ cr = __real__ c;
+ ci = __imag__ c;
+
+ if (!match (c,z)){
+ ok = 0;
+ }
+ }
+ if (!ok)
+ abort ();
+ exit (0);
+}
#define __divkc3 __divkc3_sw
#endif
+#ifndef __LONG_DOUBLE_IEEE128__
+#define RBIG (__LIBGCC_KF_MAX__ / 2)
+#define RMIN (__LIBGCC_KF_MIN__)
+#define RMIN2 (__LIBGCC_KF_EPSILON__)
+#define RMINSCAL (1 / __LIBGCC_KF_EPSILON__)
+#define RMAX2 (RBIG * RMIN2)
+#else
+#define RBIG (__LIBGCC_TF_MAX__ / 2)
+#define RMIN (__LIBGCC_TF_MIN__)
+#define RMIN2 (__LIBGCC_TF_EPSILON__)
+#define RMINSCAL (1 / __LIBGCC_TF_EPSILON__)
+#define RMAX2 (RBIG * RMIN2)
+#endif
+
TCtype
__divkc3 (TFtype a, TFtype b, TFtype c, TFtype d)
{
TFtype denom, ratio, x, y;
TCtype res;
- /* ??? We can get better behavior from logarithmic scaling instead of
- the division. But that would mean starting to link libgcc against
- libm. We could implement something akin to ldexp/frexp as gcc builtins
- fairly easily... */
+ /* long double has significant potential underflow/overflow errors that
+ can be greatly reduced with a limited number of tests and adjustments.
+ */
+
+ /* Scale by max(c,d) to reduce chances of denominator overflowing. */
if (FABS (c) < FABS (d))
{
+ /* Prevent underflow when denominator is near max representable. */
+ if (FABS (d) >= RBIG)
+ {
+ a = a / 2;
+ b = b / 2;
+ c = c / 2;
+ d = d / 2;
+ }
+ /* Avoid overflow/underflow issues when c and d are small.
+ Scaling up helps avoid some underflows.
+ No new overflow possible since c&d < RMIN2. */
+ if (FABS (d) < RMIN2)
+ {
+ a = a * RMINSCAL;
+ b = b * RMINSCAL;
+ c = c * RMINSCAL;
+ d = d * RMINSCAL;
+ }
+ else
+ {
+ if (((FABS (a) < RMIN) && (FABS (b) < RMAX2) && (FABS (d) < RMAX2))
+ || ((FABS (b) < RMIN) && (FABS (a) < RMAX2)
+ && (FABS (d) < RMAX2)))
+ {
+ a = a * RMINSCAL;
+ b = b * RMINSCAL;
+ c = c * RMINSCAL;
+ d = d * RMINSCAL;
+ }
+ }
ratio = c / d;
denom = (c * ratio) + d;
- x = ((a * ratio) + b) / denom;
- y = ((b * ratio) - a) / denom;
+ /* Choose alternate order of computation if ratio is subnormal. */
+ if (FABS (ratio) > RMIN)
+ {
+ x = ((a * ratio) + b) / denom;
+ y = ((b * ratio) - a) / denom;
+ }
+ else
+ {
+ x = ((c * (a / d)) + b) / denom;
+ y = ((c * (b / d)) - a) / denom;
+ }
}
else
{
+ /* Prevent underflow when denominator is near max representable. */
+ if (FABS (c) >= RBIG)
+ {
+ a = a / 2;
+ b = b / 2;
+ c = c / 2;
+ d = d / 2;
+ }
+ /* Avoid overflow/underflow issues when both c and d are small.
+ Scaling up helps avoid some underflows.
+ No new overflow possible since both c&d are less than RMIN2. */
+ if (FABS (c) < RMIN2)
+ {
+ a = a * RMINSCAL;
+ b = b * RMINSCAL;
+ c = c * RMINSCAL;
+ d = d * RMINSCAL;
+ }
+ else
+ {
+ if (((FABS (a) < RMIN) && (FABS (b) < RMAX2) && (FABS (c) < RMAX2))
+ || ((FABS (b) < RMIN) && (FABS (a) < RMAX2)
+ && (FABS (c) < RMAX2)))
+ {
+ a = a * RMINSCAL;
+ b = b * RMINSCAL;
+ c = c * RMINSCAL;
+ d = d * RMINSCAL;
+ }
+ }
ratio = d / c;
denom = (d * ratio) + c;
- x = ((b * ratio) + a) / denom;
- y = (b - (a * ratio)) / denom;
+ /* Choose alternate order of computation if ratio is subnormal. */
+ if (FABS (ratio) > RMIN)
+ {
+ x = ((b * ratio) + a) / denom;
+ y = (b - (a * ratio)) / denom;
+ }
+ else
+ {
+ x = (a + (d * (b / c))) / denom;
+ y = (b - (d * (a / c))) / denom;
+ }
}
/* Recover infinities and zeros that computed as NaN+iNaN; the only cases
#if defined(L_mulhc3) || defined(L_divhc3)
# define MTYPE HFtype
# define CTYPE HCtype
+# define AMTYPE SFtype
# define MODE hc
# define CEXT __LIBGCC_HF_FUNC_EXT__
# define NOTRUNC (!__LIBGCC_HF_EXCESS_PRECISION__)
#elif defined(L_mulsc3) || defined(L_divsc3)
# define MTYPE SFtype
# define CTYPE SCtype
+# define AMTYPE DFtype
# define MODE sc
# define CEXT __LIBGCC_SF_FUNC_EXT__
# define NOTRUNC (!__LIBGCC_SF_EXCESS_PRECISION__)
+# define RBIG (__LIBGCC_SF_MAX__ / 2)
+# define RMIN (__LIBGCC_SF_MIN__)
+# define RMIN2 (__LIBGCC_SF_EPSILON__)
+# define RMINSCAL (1 / __LIBGCC_SF_EPSILON__)
+# define RMAX2 (RBIG * RMIN2)
#elif defined(L_muldc3) || defined(L_divdc3)
# define MTYPE DFtype
# define CTYPE DCtype
# define MODE dc
# define CEXT __LIBGCC_DF_FUNC_EXT__
# define NOTRUNC (!__LIBGCC_DF_EXCESS_PRECISION__)
+# define RBIG (__LIBGCC_DF_MAX__ / 2)
+# define RMIN (__LIBGCC_DF_MIN__)
+# define RMIN2 (__LIBGCC_DF_EPSILON__)
+# define RMINSCAL (1 / __LIBGCC_DF_EPSILON__)
+# define RMAX2 (RBIG * RMIN2)
#elif defined(L_mulxc3) || defined(L_divxc3)
# define MTYPE XFtype
# define CTYPE XCtype
# define MODE xc
# define CEXT __LIBGCC_XF_FUNC_EXT__
# define NOTRUNC (!__LIBGCC_XF_EXCESS_PRECISION__)
+# define RBIG (__LIBGCC_XF_MAX__ / 2)
+# define RMIN (__LIBGCC_XF_MIN__)
+# define RMIN2 (__LIBGCC_XF_EPSILON__)
+# define RMINSCAL (1 / __LIBGCC_XF_EPSILON__)
+# define RMAX2 (RBIG * RMIN2)
#elif defined(L_multc3) || defined(L_divtc3)
# define MTYPE TFtype
# define CTYPE TCtype
# define MODE tc
# define CEXT __LIBGCC_TF_FUNC_EXT__
# define NOTRUNC (!__LIBGCC_TF_EXCESS_PRECISION__)
+# define RBIG (__LIBGCC_TF_MAX__ / 2)
+# define RMIN (__LIBGCC_TF_MIN__)
+# define RMIN2 (__LIBGCC_TF_EPSILON__)
+# define RMINSCAL (1 / __LIBGCC_TF_EPSILON__)
+# define RMAX2 (RBIG * RMIN2)
#else
# error
#endif
CTYPE
CONCAT3(__div,MODE,3) (MTYPE a, MTYPE b, MTYPE c, MTYPE d)
{
+#if defined(L_divhc3) \
+ || (defined(L_divsc3) && defined(__LIBGCC_HAVE_HWDBL__) )
+
+ /* Half precision is handled with float precision.
+ float is handled with double precision when double precision
+ hardware is available.
+ Due to the additional precision, the simple complex divide
+ method (without Smith's method) is sufficient to get accurate
+ answers and runs slightly faster than Smith's method. */
+
+ AMTYPE aa, bb, cc, dd;
+ AMTYPE denom;
+ MTYPE x, y;
+ CTYPE res;
+ aa = a;
+ bb = b;
+ cc = c;
+ dd = d;
+
+ denom = (cc * cc) + (dd * dd);
+ x = ((aa * cc) + (bb * dd)) / denom;
+ y = ((bb * cc) - (aa * dd)) / denom;
+
+#else
MTYPE denom, ratio, x, y;
CTYPE res;
- /* ??? We can get better behavior from logarithmic scaling instead of
- the division. But that would mean starting to link libgcc against
- libm. We could implement something akin to ldexp/frexp as gcc builtins
- fairly easily... */
+ /* double, extended, long double have significant potential
+ underflow/overflow errors that can be greatly reduced with
+ a limited number of tests and adjustments. float is handled
+ the same way when no HW double is available.
+ */
+
+ /* Scale by max(c,d) to reduce chances of denominator overflowing. */
if (FABS (c) < FABS (d))
{
+ /* Prevent underflow when denominator is near max representable. */
+ if (FABS (d) >= RBIG)
+ {
+ a = a / 2;
+ b = b / 2;
+ c = c / 2;
+ d = d / 2;
+ }
+ /* Avoid overflow/underflow issues when c and d are small.
+ Scaling up helps avoid some underflows.
+ No new overflow possible since c&d < RMIN2. */
+ if (FABS (d) < RMIN2)
+ {
+ a = a * RMINSCAL;
+ b = b * RMINSCAL;
+ c = c * RMINSCAL;
+ d = d * RMINSCAL;
+ }
+ else
+ {
+ if (((FABS (a) < RMIN) && (FABS (b) < RMAX2) && (FABS (d) < RMAX2))
+ || ((FABS (b) < RMIN) && (FABS (a) < RMAX2)
+ && (FABS (d) < RMAX2)))
+ {
+ a = a * RMINSCAL;
+ b = b * RMINSCAL;
+ c = c * RMINSCAL;
+ d = d * RMINSCAL;
+ }
+ }
ratio = c / d;
denom = (c * ratio) + d;
- x = ((a * ratio) + b) / denom;
- y = ((b * ratio) - a) / denom;
+ /* Choose alternate order of computation if ratio is subnormal. */
+ if (FABS (ratio) > RMIN)
+ {
+ x = ((a * ratio) + b) / denom;
+ y = ((b * ratio) - a) / denom;
+ }
+ else
+ {
+ x = ((c * (a / d)) + b) / denom;
+ y = ((c * (b / d)) - a) / denom;
+ }
}
else
{
+ /* Prevent underflow when denominator is near max representable. */
+ if (FABS (c) >= RBIG)
+ {
+ a = a / 2;
+ b = b / 2;
+ c = c / 2;
+ d = d / 2;
+ }
+ /* Avoid overflow/underflow issues when both c and d are small.
+ Scaling up helps avoid some underflows.
+ No new overflow possible since both c&d are less than RMIN2. */
+ if (FABS (c) < RMIN2)
+ {
+ a = a * RMINSCAL;
+ b = b * RMINSCAL;
+ c = c * RMINSCAL;
+ d = d * RMINSCAL;
+ }
+ else
+ {
+ if (((FABS (a) < RMIN) && (FABS (b) < RMAX2) && (FABS (c) < RMAX2))
+ || ((FABS (b) < RMIN) && (FABS (a) < RMAX2)
+ && (FABS (c) < RMAX2)))
+ {
+ a = a * RMINSCAL;
+ b = b * RMINSCAL;
+ c = c * RMINSCAL;
+ d = d * RMINSCAL;
+ }
+ }
ratio = d / c;
denom = (d * ratio) + c;
- x = ((b * ratio) + a) / denom;
- y = (b - (a * ratio)) / denom;
+ /* Choose alternate order of computation if ratio is subnormal. */
+ if (FABS (ratio) > RMIN)
+ {
+ x = ((b * ratio) + a) / denom;
+ y = (b - (a * ratio)) / denom;
+ }
+ else
+ {
+ x = (a + (d * (b / c))) / denom;
+ y = (b - (d * (a / c))) / denom;
+ }
}
+#endif
- /* Recover infinities and zeros that computed as NaN+iNaN; the only cases
- are nonzero/zero, infinite/finite, and finite/infinite. */
+ /* Recover infinities and zeros that computed as NaN+iNaN; the only
+ cases are nonzero/zero, infinite/finite, and finite/infinite. */
if (isnan (x) && isnan (y))
{
if (c == 0.0 && d == 0.0 && (!isnan (a) || !isnan (b)))