From 8627a2329c5e6fc09bf9f0f070a21aca41f2b122 Mon Sep 17 00:00:00 2001 From: Joseph Myers Date: Tue, 30 Oct 2012 13:54:50 +0000 Subject: [PATCH] Fix fma missing underflows and bad results for some subnormal results (bugs 14152, 14783). --- ChangeLog | 13 ++++++++ NEWS | 14 ++++----- math/libm-test.inc | 65 ++++++++++++++++++++++++++++++++++++--- sysdeps/ieee754/dbl-64/s_fma.c | 22 +++++-------- sysdeps/ieee754/ldbl-128/s_fmal.c | 22 +++++-------- sysdeps/ieee754/ldbl-96/s_fmal.c | 22 +++++-------- 6 files changed, 105 insertions(+), 53 deletions(-) diff --git a/ChangeLog b/ChangeLog index 067337a..912167b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,18 @@ 2012-10-30 Joseph Myers + [BZ #14152] + [BZ #14783] + * sysdeps/ieee754/dbl-64/s_fma.c (__fma): Extract low bits of + result and shift together with sticky bit instead of replicating + round-to-nearest rounding. + * sysdeps/ieee754/ldbl-128/s_fmal.c (__fmal): Likewise. + * sysdeps/ieee754/ldbl-96/s_fmal.c (__fmal): Likewise. + * math/libm-test.inc (fma_test): Add more tests. Do not permit + missing underflow exceptions. + (fma_test_towardzero): Add more tests. + (fma_test_downward): Likewise. + (fma_test_upward): Likewise. + [BZ #14047] * sysdeps/generic/tininess.h: New file. * sysdeps/i386/tininess.h: Likewise. diff --git a/NEWS b/NEWS index e4e6726..b54365f 100644 --- a/NEWS +++ b/NEWS @@ -11,13 +11,13 @@ Version 2.17 1349, 3479, 5044, 5298, 5400, 6530, 6778, 6808, 9685, 9914, 10014, 10038, 10631, 11438, 11607, 12140, 13412, 13542, 13601, 13629, 13679, 13696, - 13717, 13741, 13939, 13966, 14042, 14047, 14090, 14150, 14151, 14154, - 14157, 14166, 14173, 14195, 14237, 14251, 14252, 14283, 14298, 14303, - 14307, 14328, 14331, 14336, 14337, 14347, 14349, 14376, 14417, 14459, - 14476, 14477, 14505, 14510, 14516, 14518, 14519, 14530, 14532, 14538, - 14543, 14544, 14545, 14557, 14562, 14568, 14576, 14579, 14583, 14587, - 14602, 14621, 14638, 14645, 14648, 14652, 14660, 14661, 14683, 14694, - 14716, 14743, 14767. + 13717, 13741, 13939, 13966, 14042, 14047, 14090, 14150, 14151, 14152, + 14154, 14157, 14166, 14173, 14195, 14237, 14251, 14252, 14283, 14298, + 14303, 14307, 14328, 14331, 14336, 14337, 14347, 14349, 14376, 14417, + 14459, 14476, 14477, 14505, 14510, 14516, 14518, 14519, 14530, 14532, + 14538, 14543, 14544, 14545, 14557, 14562, 14568, 14576, 14579, 14583, + 14587, 14602, 14621, 14638, 14645, 14648, 14652, 14660, 14661, 14683, + 14694, 14716, 14743, 14767, 14783. * Support for STT_GNU_IFUNC symbols added for s390 and s390x. Optimized versions of memcpy, memset, and memcmp added for System z10 and diff --git a/math/libm-test.inc b/math/libm-test.inc index e828fc2..1e067fe 100644 --- a/math/libm-test.inc +++ b/math/libm-test.inc @@ -4616,6 +4616,8 @@ fma_test (void) TEST_fff_f (fma, 0x1.fffffep+127, 0x1.001p+0, -0x1.fffffep+127, 0x1.fffffep+115); TEST_fff_f (fma, -0x1.fffffep+127, 0x1.fffffep+0, 0x1.fffffep+127, -0x1.fffffap+127); TEST_fff_f (fma, 0x1.fffffep+127, 2.0, -0x1.fffffep+127, 0x1.fffffep+127); + TEST_fff_f (fma, 0x1.4p-126, 0x1.000004p-1, 0x1p-128, 0x1.c00004p-127, UNDERFLOW_EXCEPTION); + TEST_fff_f (fma, -0x1.4p-126, 0x1.000004p-1, -0x1p-128, -0x1.c00004p-127, UNDERFLOW_EXCEPTION); #endif #if defined (TEST_DOUBLE) && DBL_MANT_DIG == 53 TEST_fff_f (fma, 0x1.7fp+13, 0x1.0000000000001p+0, 0x1.ffep-48, 0x1.7f00000000001p+13); @@ -4635,10 +4637,11 @@ fma_test (void) TEST_fff_f (fma, 0x1.4000004p-967, 0x1p-106, 0x0.000001p-1022, 0x0.0000010000003p-1022, UNDERFLOW_EXCEPTION); TEST_fff_f (fma, 0x1.4p-967, -0x1p-106, -0x0.000001p-1022, -0x0.0000010000002p-1022, UNDERFLOW_EXCEPTION); TEST_fff_f (fma, -0x1.19cab66d73e17p-959, 0x1.c7108a8c5ff51p-107, -0x0.80b0ad65d9b64p-1022, -0x0.80b0ad65d9d59p-1022, UNDERFLOW_EXCEPTION); - /* Sometimes the FE_UNDERFLOW is not set, so be prepared. See Bug 14152. */ - TEST_fff_f (fma, -0x1.d2eaed6e8e9d3p-979, -0x1.4e066c62ac9ddp-63, -0x0.9245e6b003454p-1022, -0x0.9245c09c5fb5dp-1022, UNDERFLOW_EXCEPTION_OK); + TEST_fff_f (fma, -0x1.d2eaed6e8e9d3p-979, -0x1.4e066c62ac9ddp-63, -0x0.9245e6b003454p-1022, -0x0.9245c09c5fb5dp-1022, UNDERFLOW_EXCEPTION); TEST_fff_f (fma, 0x1.153d650bb9f06p-907, 0x1.2d01230d48407p-125, -0x0.b278d5acfc3cp-1022, -0x0.b22757123bbe9p-1022, UNDERFLOW_EXCEPTION); TEST_fff_f (fma, -0x1.fffffffffffffp-711, 0x1.fffffffffffffp-275, 0x1.fffffe00007ffp-983, 0x1.7ffffe00007ffp-983); + TEST_fff_f (fma, 0x1.4p-1022, 0x1.0000000000002p-1, 0x1p-1024, 0x1.c000000000002p-1023, UNDERFLOW_EXCEPTION); + TEST_fff_f (fma, -0x1.4p-1022, 0x1.0000000000002p-1, -0x1p-1024, -0x1.c000000000002p-1023, UNDERFLOW_EXCEPTION); #endif #if defined (TEST_LDOUBLE) && LDBL_MANT_DIG == 64 TEST_fff_f (fma, -0x8.03fcp+3696L, 0xf.fffffffffffffffp-6140L, 0x8.3ffffffffffffffp-2450L, -0x8.01ecp-2440L); @@ -4646,8 +4649,9 @@ fma_test (void) TEST_fff_f (fma, 0xc.7fc000003ffffffp-1194L, 0x8.1e0003fffffffffp+15327L, -0x8.fffep+14072L, 0xc.ae9f164020effffp+14136L); TEST_fff_f (fma, -0x8.0001fc000000003p+1798L, 0xcp-2230L, 0x8.f7e000000000007p-468L, -0xc.0002f9ffee10404p-429L); TEST_fff_f (fma, 0xc.0000000000007ffp+10130L, -0x8.000000000000001p+4430L, 0xc.07000000001ffffp+14513L, -0xb.fffffffffffd7e4p+14563L); - /* Bug 14152: underflow exception may be missing. */ - TEST_fff_f (fma, 0xb.ffffp-4777L, 0x8.000000fffffffffp-11612L, -0x0.3800fff8p-16385L, 0x5.c7fe80c7ffeffffp-16385L, UNDERFLOW_EXCEPTION_OK); + TEST_fff_f (fma, 0xb.ffffp-4777L, 0x8.000000fffffffffp-11612L, -0x0.3800fff8p-16385L, 0x5.c7fe80c7ffeffffp-16385L, UNDERFLOW_EXCEPTION); + TEST_fff_f (fma, 0x1.4p-16382L, 0x1.0000000000000004p-1L, 0x1p-16384L, 0x1.c000000000000004p-16383L, UNDERFLOW_EXCEPTION); + TEST_fff_f (fma, -0x1.4p-16382L, 0x1.0000000000000004p-1L, -0x1p-16384L, -0x1.c000000000000004p-16383L, UNDERFLOW_EXCEPTION); #endif #if defined (TEST_LDOUBLE) && LDBL_MANT_DIG == 113 TEST_fff_f (fma, 0x1.bb2de33e02ccbbfa6e245a7c1f71p-2584L, -0x1.6b500daf0580d987f1bc0cadfcddp-13777L, 0x1.613cd91d9fed34b33820e5ab9d8dp-16378L, -0x1.3a79fb50eb9ce887cffa0f09bd9fp-16360L); @@ -4663,6 +4667,8 @@ fma_test (void) TEST_fff_f (fma, 0x1.00000000000007ffffffffffffffp-9045L, -0x1.ffffffffffff80000001ffffffffp+4773L, -0x1.f8p-4316L, -0x1.00000000000f88000000fffffdffp-4271L); TEST_fff_f (fma, 0x1.4e922764c90701d4a2f21d01893dp-8683L, -0x1.955a12e2d7c9447c27fa022fc865p+212L, -0x1.e9634462eaef96528b90b6944578p-8521L, -0x1.08e1783184a371943d3598e10865p-8470L); TEST_fff_f (fma, 0x1.801181509c03bdbef10d6165588cp-15131L, 0x1.ad86f8e57d3d40bfa8007780af63p-368L, -0x1.6e9df0dab1c9f1d7a6043c390741p-15507L, 0x1.417c9b2b15e2ad57dc9e0e920844p-15498L); + TEST_fff_f (fma, 0x1.4p-16382L, 0x1.0000000000000000000000000002p-1L, 0x1p-16384L, 0x1.c000000000000000000000000002p-16383L, UNDERFLOW_EXCEPTION); + TEST_fff_f (fma, -0x1.4p-16382L, 0x1.0000000000000000000000000002p-1L, -0x1p-16384L, -0x1.c000000000000000000000000002p-16383L, UNDERFLOW_EXCEPTION); #endif END (fma); @@ -4717,6 +4723,23 @@ fma_test_towardzero (void) TEST_fff_f (fma, -min_value, min_value, minus_zero, minus_zero, UNDERFLOW_EXCEPTION); TEST_fff_f (fma, -min_value, -min_value, plus_zero, plus_zero, UNDERFLOW_EXCEPTION); TEST_fff_f (fma, -min_value, -min_value, minus_zero, plus_zero, UNDERFLOW_EXCEPTION); + +#if defined (TEST_FLOAT) && FLT_MANT_DIG == 24 + TEST_fff_f (fma, 0x1.4p-126, 0x1.000004p-1, 0x1p-128, 0x1.c00004p-127, UNDERFLOW_EXCEPTION); + TEST_fff_f (fma, -0x1.4p-126, 0x1.000004p-1, -0x1p-128, -0x1.c00004p-127, UNDERFLOW_EXCEPTION); +#endif +#if defined (TEST_DOUBLE) && DBL_MANT_DIG == 53 + TEST_fff_f (fma, 0x1.4p-1022, 0x1.0000000000002p-1, 0x1p-1024, 0x1.c000000000002p-1023, UNDERFLOW_EXCEPTION); + TEST_fff_f (fma, -0x1.4p-1022, 0x1.0000000000002p-1, -0x1p-1024, -0x1.c000000000002p-1023, UNDERFLOW_EXCEPTION); +#endif +#if defined (TEST_LDOUBLE) && LDBL_MANT_DIG == 64 + TEST_fff_f (fma, 0x1.4p-16382L, 0x1.0000000000000004p-1L, 0x1p-16384L, 0x1.c000000000000004p-16383L, UNDERFLOW_EXCEPTION); + TEST_fff_f (fma, -0x1.4p-16382L, 0x1.0000000000000004p-1L, -0x1p-16384L, -0x1.c000000000000004p-16383L, UNDERFLOW_EXCEPTION); +#endif +#if defined (TEST_LDOUBLE) && LDBL_MANT_DIG == 113 + TEST_fff_f (fma, 0x1.4p-16382L, 0x1.0000000000000000000000000002p-1L, 0x1p-16384L, 0x1.c000000000000000000000000002p-16383L, UNDERFLOW_EXCEPTION); + TEST_fff_f (fma, -0x1.4p-16382L, 0x1.0000000000000000000000000002p-1L, -0x1p-16384L, -0x1.c000000000000000000000000002p-16383L, UNDERFLOW_EXCEPTION); +#endif } fesetround (save_round_mode); @@ -4773,6 +4796,23 @@ fma_test_downward (void) TEST_fff_f (fma, -min_value, min_value, minus_zero, -min_subnorm_value, UNDERFLOW_EXCEPTION); TEST_fff_f (fma, -min_value, -min_value, plus_zero, plus_zero, UNDERFLOW_EXCEPTION); TEST_fff_f (fma, -min_value, -min_value, minus_zero, plus_zero, UNDERFLOW_EXCEPTION); + +#if defined (TEST_FLOAT) && FLT_MANT_DIG == 24 + TEST_fff_f (fma, 0x1.4p-126, 0x1.000004p-1, 0x1p-128, 0x1.c00004p-127, UNDERFLOW_EXCEPTION); + TEST_fff_f (fma, -0x1.4p-126, 0x1.000004p-1, -0x1p-128, -0x1.c00008p-127, UNDERFLOW_EXCEPTION); +#endif +#if defined (TEST_DOUBLE) && DBL_MANT_DIG == 53 + TEST_fff_f (fma, 0x1.4p-1022, 0x1.0000000000002p-1, 0x1p-1024, 0x1.c000000000002p-1023, UNDERFLOW_EXCEPTION); + TEST_fff_f (fma, -0x1.4p-1022, 0x1.0000000000002p-1, -0x1p-1024, -0x1.c000000000004p-1023, UNDERFLOW_EXCEPTION); +#endif +#if defined (TEST_LDOUBLE) && LDBL_MANT_DIG == 64 + TEST_fff_f (fma, 0x1.4p-16382L, 0x1.0000000000000004p-1L, 0x1p-16384L, 0x1.c000000000000004p-16383L, UNDERFLOW_EXCEPTION); + TEST_fff_f (fma, -0x1.4p-16382L, 0x1.0000000000000004p-1L, -0x1p-16384L, -0x1.c000000000000008p-16383L, UNDERFLOW_EXCEPTION); +#endif +#if defined (TEST_LDOUBLE) && LDBL_MANT_DIG == 113 + TEST_fff_f (fma, 0x1.4p-16382L, 0x1.0000000000000000000000000002p-1L, 0x1p-16384L, 0x1.c000000000000000000000000002p-16383L, UNDERFLOW_EXCEPTION); + TEST_fff_f (fma, -0x1.4p-16382L, 0x1.0000000000000000000000000002p-1L, -0x1p-16384L, -0x1.c000000000000000000000000004p-16383L, UNDERFLOW_EXCEPTION); +#endif } fesetround (save_round_mode); @@ -4829,6 +4869,23 @@ fma_test_upward (void) TEST_fff_f (fma, -min_value, min_value, minus_zero, minus_zero, UNDERFLOW_EXCEPTION); TEST_fff_f (fma, -min_value, -min_value, plus_zero, min_subnorm_value, UNDERFLOW_EXCEPTION); TEST_fff_f (fma, -min_value, -min_value, minus_zero, min_subnorm_value, UNDERFLOW_EXCEPTION); + +#if defined (TEST_FLOAT) && FLT_MANT_DIG == 24 + TEST_fff_f (fma, 0x1.4p-126, 0x1.000004p-1, 0x1p-128, 0x1.c00008p-127, UNDERFLOW_EXCEPTION); + TEST_fff_f (fma, -0x1.4p-126, 0x1.000004p-1, -0x1p-128, -0x1.c00004p-127, UNDERFLOW_EXCEPTION); +#endif +#if defined (TEST_DOUBLE) && DBL_MANT_DIG == 53 + TEST_fff_f (fma, 0x1.4p-1022, 0x1.0000000000002p-1, 0x1p-1024, 0x1.c000000000004p-1023, UNDERFLOW_EXCEPTION); + TEST_fff_f (fma, -0x1.4p-1022, 0x1.0000000000002p-1, -0x1p-1024, -0x1.c000000000002p-1023, UNDERFLOW_EXCEPTION); +#endif +#if defined (TEST_LDOUBLE) && LDBL_MANT_DIG == 64 + TEST_fff_f (fma, 0x1.4p-16382L, 0x1.0000000000000004p-1L, 0x1p-16384L, 0x1.c000000000000008p-16383L, UNDERFLOW_EXCEPTION); + TEST_fff_f (fma, -0x1.4p-16382L, 0x1.0000000000000004p-1L, -0x1p-16384L, -0x1.c000000000000004p-16383L, UNDERFLOW_EXCEPTION); +#endif +#if defined (TEST_LDOUBLE) && LDBL_MANT_DIG == 113 + TEST_fff_f (fma, 0x1.4p-16382L, 0x1.0000000000000000000000000002p-1L, 0x1p-16384L, 0x1.c000000000000000000000000004p-16383L, UNDERFLOW_EXCEPTION); + TEST_fff_f (fma, -0x1.4p-16382L, 0x1.0000000000000000000000000002p-1L, -0x1p-16384L, -0x1.c000000000000000000000000002p-16383L, UNDERFLOW_EXCEPTION); +#endif } fesetround (save_round_mode); diff --git a/sysdeps/ieee754/dbl-64/s_fma.c b/sysdeps/ieee754/dbl-64/s_fma.c index 5e21461..108fd66 100644 --- a/sysdeps/ieee754/dbl-64/s_fma.c +++ b/sysdeps/ieee754/dbl-64/s_fma.c @@ -210,20 +210,14 @@ __fma (double x, double y, double z) { /* v.ieee.mantissa1 & 2 is LSB bit of the result before rounding, v.ieee.mantissa1 & 1 is the round bit and j is our sticky - bit. In round-to-nearest 001 rounds down like 00, - 011 rounds up, even though 01 rounds down (thus we need - to adjust), 101 rounds down like 10 and 111 rounds up - like 11. */ - if ((v.ieee.mantissa1 & 3) == 1) - { - v.d *= 0x1p-106; - if (v.ieee.negative) - return v.d - 0x1p-1074 /* __DBL_DENORM_MIN__ */; - else - return v.d + 0x1p-1074 /* __DBL_DENORM_MIN__ */; - } - else - return v.d * 0x1p-106; + bit. */ + w.d = 0.0; + w.ieee.mantissa1 = ((v.ieee.mantissa1 & 3) << 1) | j; + w.ieee.negative = v.ieee.negative; + v.ieee.mantissa1 &= ~3U; + v.d *= 0x1p-106; + w.d *= 0x1p-2; + return v.d + w.d; } v.ieee.mantissa1 |= j; return v.d * 0x1p-106; diff --git a/sysdeps/ieee754/ldbl-128/s_fmal.c b/sysdeps/ieee754/ldbl-128/s_fmal.c index 46b3d81..7fb9856 100644 --- a/sysdeps/ieee754/ldbl-128/s_fmal.c +++ b/sysdeps/ieee754/ldbl-128/s_fmal.c @@ -211,20 +211,14 @@ __fmal (long double x, long double y, long double z) { /* v.ieee.mantissa3 & 2 is LSB bit of the result before rounding, v.ieee.mantissa3 & 1 is the round bit and j is our sticky - bit. In round-to-nearest 001 rounds down like 00, - 011 rounds up, even though 01 rounds down (thus we need - to adjust), 101 rounds down like 10 and 111 rounds up - like 11. */ - if ((v.ieee.mantissa3 & 3) == 1) - { - v.d *= 0x1p-226L; - if (v.ieee.negative) - return v.d - 0x1p-16494L /* __LDBL_DENORM_MIN__ */; - else - return v.d + 0x1p-16494L /* __LDBL_DENORM_MIN__ */; - } - else - return v.d * 0x1p-226L; + bit. */ + w.d = 0.0L; + w.ieee.mantissa3 = ((v.ieee.mantissa3 & 3) << 1) | j; + w.ieee.negative = v.ieee.negative; + v.ieee.mantissa3 &= ~3U; + v.d *= 0x1p-226L; + w.d *= 0x1p-2L; + return v.d + w.d; } v.ieee.mantissa3 |= j; return v.d * 0x1p-226L; diff --git a/sysdeps/ieee754/ldbl-96/s_fmal.c b/sysdeps/ieee754/ldbl-96/s_fmal.c index d125124..7310491 100644 --- a/sysdeps/ieee754/ldbl-96/s_fmal.c +++ b/sysdeps/ieee754/ldbl-96/s_fmal.c @@ -211,20 +211,14 @@ __fmal (long double x, long double y, long double z) { /* v.ieee.mantissa1 & 2 is LSB bit of the result before rounding, v.ieee.mantissa1 & 1 is the round bit and j is our sticky - bit. In round-to-nearest 001 rounds down like 00, - 011 rounds up, even though 01 rounds down (thus we need - to adjust), 101 rounds down like 10 and 111 rounds up - like 11. */ - if ((v.ieee.mantissa1 & 3) == 1) - { - v.d *= 0x1p-128L; - if (v.ieee.negative) - return v.d - 0x1p-16445L /* __LDBL_DENORM_MIN__ */; - else - return v.d + 0x1p-16445L /* __LDBL_DENORM_MIN__ */; - } - else - return v.d * 0x1p-128L; + bit. */ + w.d = 0.0L; + w.ieee.mantissa1 = ((v.ieee.mantissa1 & 3) << 1) | j; + w.ieee.negative = v.ieee.negative; + v.ieee.mantissa1 &= ~3U; + v.d *= 0x1p-128L; + w.d *= 0x1p-2L; + return v.d + w.d; } v.ieee.mantissa1 |= j; return v.d * 0x1p-128L; -- 2.7.4