From 82477c28f46c579a149a8333c07233e9f4e43408 Mon Sep 17 00:00:00 2001 From: Joseph Myers Date: Tue, 6 Nov 2012 14:12:54 +0000 Subject: [PATCH] Fix fma underflows with small x * y (bug 14793). --- ChangeLog | 13 ++++ NEWS | 2 +- math/libm-test.inc | 128 ++++++++++++++++++++++++++++++++++++++ sysdeps/ieee754/dbl-64/s_fma.c | 45 ++++++++------ sysdeps/ieee754/ldbl-128/s_fmal.c | 45 ++++++++------ sysdeps/ieee754/ldbl-96/s_fmal.c | 45 ++++++++------ 6 files changed, 223 insertions(+), 55 deletions(-) diff --git a/ChangeLog b/ChangeLog index 3ff6937..04ccde0 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,16 @@ +2012-11-06 Joseph Myers + + [BZ #14793] + * sysdeps/ieee754/dbl-64/s_fma.c (__fma): In case of large z + exponent and small x and y exponents, scale x or y up. Increase + by 2 the exponent used in scaling up. + * sysdeps/ieee754/ldbl-128/s_fmal.c (__fmal): Likewise. + * sysdeps/ieee754/ldbl-96/s_fmal.c (__fmal): Likewise. + * math/libm-test.inc (fma_test): Add more tests. + (fma_test_towardzero): Likewise. + (fma_test_downward): Likewise. + (fma_test_upward): Likewise. + 2012-11-05 Joseph Myers [BZ #14805] diff --git a/NEWS b/NEWS index fec122b..331d212 100644 --- a/NEWS +++ b/NEWS @@ -18,7 +18,7 @@ Version 2.17 14518, 14519, 14530, 14532, 14538, 14543, 14544, 14545, 14557, 14562, 14568, 14576, 14579, 14583, 14587, 14595, 14602, 14610, 14621, 14638, 14645, 14648, 14652, 14660, 14661, 14669, 14683, 14694, 14716, 14743, - 14767, 14783, 14784, 14785, 14796, 14797, 14801, 14805. + 14767, 14783, 14784, 14785, 14793, 14796, 14797, 14801, 14805. * Support for STT_GNU_IFUNC symbols added for s390 and s390x. Optimized versions of memcpy, memset, and memcmp added for System z10 and diff --git a/math/libm-test.inc b/math/libm-test.inc index 55892c3..a52ce6a 100644 --- a/math/libm-test.inc +++ b/math/libm-test.inc @@ -4662,6 +4662,14 @@ fma_test (void) TEST_fff_f (fma, 0x0.fffp0, -0x0.fffp0, 0x0.ffep0, -0x1p-24); TEST_fff_f (fma, -0x0.fffp0, 0x0.fffp0, 0x0.ffep0, -0x1p-24); TEST_fff_f (fma, -0x0.fffp0, -0x0.fffp0, -0x0.ffep0, 0x1p-24); + TEST_fff_f (fma, 0x1.000002p-126, 0x1.000002p-26, 0x1p127, 0x1p127); + TEST_fff_f (fma, 0x1.000002p-126, -0x1.000002p-26, 0x1p127, 0x1p127); + TEST_fff_f (fma, 0x1.000002p-126, 0x1.000002p-26, -0x1p127, -0x1p127); + TEST_fff_f (fma, 0x1.000002p-126, -0x1.000002p-26, -0x1p127, -0x1p127); + TEST_fff_f (fma, 0x1.000002p-126, 0x1.000002p-26, 0x1p103, 0x1p103); + TEST_fff_f (fma, 0x1.000002p-126, -0x1.000002p-26, 0x1p103, 0x1p103); + TEST_fff_f (fma, 0x1.000002p-126, 0x1.000002p-26, -0x1p103, -0x1p103); + TEST_fff_f (fma, 0x1.000002p-126, -0x1.000002p-26, -0x1p103, -0x1p103); #endif #if defined (TEST_DOUBLE) && DBL_MANT_DIG == 53 TEST_fff_f (fma, 0x1.7fp+13, 0x1.0000000000001p+0, 0x1.ffep-48, 0x1.7f00000000001p+13); @@ -4712,6 +4720,14 @@ fma_test (void) TEST_fff_f (fma, 0x0.fffffffffffff8p0, -0x0.fffffffffffff8p0, 0x0.fffffffffffffp0, -0x1p-106); TEST_fff_f (fma, -0x0.fffffffffffff8p0, 0x0.fffffffffffff8p0, 0x0.fffffffffffffp0, -0x1p-106); TEST_fff_f (fma, -0x0.fffffffffffff8p0, -0x0.fffffffffffff8p0, -0x0.fffffffffffffp0, 0x1p-106); + TEST_fff_f (fma, 0x1.0000000000001p-1022, 0x1.0000000000001p-55, 0x1p1023, 0x1p1023); + TEST_fff_f (fma, 0x1.0000000000001p-1022, -0x1.0000000000001p-55, 0x1p1023, 0x1p1023); + TEST_fff_f (fma, 0x1.0000000000001p-1022, 0x1.0000000000001p-55, -0x1p1023, -0x1p1023); + TEST_fff_f (fma, 0x1.0000000000001p-1022, -0x1.0000000000001p-55, -0x1p1023, -0x1p1023); + TEST_fff_f (fma, 0x1.0000000000001p-1022, 0x1.0000000000001p-55, 0x1p970, 0x1p970); + TEST_fff_f (fma, 0x1.0000000000001p-1022, -0x1.0000000000001p-55, 0x1p970, 0x1p970); + TEST_fff_f (fma, 0x1.0000000000001p-1022, 0x1.0000000000001p-55, -0x1p970, -0x1p970); + TEST_fff_f (fma, 0x1.0000000000001p-1022, -0x1.0000000000001p-55, -0x1p970, -0x1p970); #endif #if defined (TEST_LDOUBLE) && LDBL_MANT_DIG == 64 TEST_fff_f (fma, -0x8.03fcp+3696L, 0xf.fffffffffffffffp-6140L, 0x8.3ffffffffffffffp-2450L, -0x8.01ecp-2440L); @@ -4748,6 +4764,14 @@ fma_test (void) TEST_fff_f (fma, 0x0.ffffffffffffffffp0L, -0x0.ffffffffffffffffp0L, 0x0.fffffffffffffffep0L, -0x1p-128L); TEST_fff_f (fma, -0x0.ffffffffffffffffp0L, 0x0.ffffffffffffffffp0L, 0x0.fffffffffffffffep0L, -0x1p-128L); TEST_fff_f (fma, -0x0.ffffffffffffffffp0L, -0x0.ffffffffffffffffp0L, -0x0.fffffffffffffffep0L, 0x1p-128L); + TEST_fff_f (fma, 0x1.0000000000000002p-16382L, 0x1.0000000000000002p-66L, 0x1p16383L, 0x1p16383L); + TEST_fff_f (fma, 0x1.0000000000000002p-16382L, -0x1.0000000000000002p-66L, 0x1p16383L, 0x1p16383L); + TEST_fff_f (fma, 0x1.0000000000000002p-16382L, 0x1.0000000000000002p-66L, -0x1p16383L, -0x1p16383L); + TEST_fff_f (fma, 0x1.0000000000000002p-16382L, -0x1.0000000000000002p-66L, -0x1p16383L, -0x1p16383L); + TEST_fff_f (fma, 0x1.0000000000000002p-16382L, 0x1.0000000000000002p-66L, 0x1p16319L, 0x1p16319L); + TEST_fff_f (fma, 0x1.0000000000000002p-16382L, -0x1.0000000000000002p-66L, 0x1p16319L, 0x1p16319L); + TEST_fff_f (fma, 0x1.0000000000000002p-16382L, 0x1.0000000000000002p-66L, -0x1p16319L, -0x1p16319L); + TEST_fff_f (fma, 0x1.0000000000000002p-16382L, -0x1.0000000000000002p-66L, -0x1p16319L, -0x1p16319L); #endif #if defined (TEST_LDOUBLE) && LDBL_MANT_DIG == 113 TEST_fff_f (fma, 0x1.bb2de33e02ccbbfa6e245a7c1f71p-2584L, -0x1.6b500daf0580d987f1bc0cadfcddp-13777L, 0x1.613cd91d9fed34b33820e5ab9d8dp-16378L, -0x1.3a79fb50eb9ce887cffa0f09bd9fp-16360L); @@ -4791,6 +4815,14 @@ fma_test (void) TEST_fff_f (fma, 0x0.ffffffffffffffffffffffffffff8p0L, -0x0.ffffffffffffffffffffffffffff8p0L, 0x0.ffffffffffffffffffffffffffffp0L, -0x1p-226L); TEST_fff_f (fma, -0x0.ffffffffffffffffffffffffffff8p0L, 0x0.ffffffffffffffffffffffffffff8p0L, 0x0.ffffffffffffffffffffffffffffp0L, -0x1p-226L); TEST_fff_f (fma, -0x0.ffffffffffffffffffffffffffff8p0L, -0x0.ffffffffffffffffffffffffffff8p0L, -0x0.ffffffffffffffffffffffffffffp0L, 0x1p-226L); + TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, 0x1.0000000000000000000000000001p-66L, 0x1p16383L, 0x1p16383L); + TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, -0x1.0000000000000000000000000001p-66L, 0x1p16383L, 0x1p16383L); + TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, 0x1.0000000000000000000000000001p-66L, -0x1p16383L, -0x1p16383L); + TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, -0x1.0000000000000000000000000001p-66L, -0x1p16383L, -0x1p16383L); + TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, 0x1.0000000000000000000000000001p-66L, 0x1p16319L, 0x1p16319L); + TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, -0x1.0000000000000000000000000001p-66L, 0x1p16319L, 0x1p16319L); + TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, 0x1.0000000000000000000000000001p-66L, -0x1p16319L, -0x1p16319L); + TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, -0x1.0000000000000000000000000001p-66L, -0x1p16319L, -0x1p16319L); #endif END (fma); @@ -4884,6 +4916,14 @@ fma_test_towardzero (void) TEST_fff_f (fma, 0x0.fffp0, -0x0.fffp0, 0x0.ffep0, -0x1p-24); TEST_fff_f (fma, -0x0.fffp0, 0x0.fffp0, 0x0.ffep0, -0x1p-24); TEST_fff_f (fma, -0x0.fffp0, -0x0.fffp0, -0x0.ffep0, 0x1p-24); + TEST_fff_f (fma, 0x1.000002p-126, 0x1.000002p-26, 0x1p127, 0x1p127); + TEST_fff_f (fma, 0x1.000002p-126, -0x1.000002p-26, 0x1p127, 0x0.ffffffp127); + TEST_fff_f (fma, 0x1.000002p-126, 0x1.000002p-26, -0x1p127, -0x0.ffffffp127); + TEST_fff_f (fma, 0x1.000002p-126, -0x1.000002p-26, -0x1p127, -0x1p127); + TEST_fff_f (fma, 0x1.000002p-126, 0x1.000002p-26, 0x1p103, 0x1p103); + TEST_fff_f (fma, 0x1.000002p-126, -0x1.000002p-26, 0x1p103, 0x0.ffffffp103); + TEST_fff_f (fma, 0x1.000002p-126, 0x1.000002p-26, -0x1p103, -0x0.ffffffp103); + TEST_fff_f (fma, 0x1.000002p-126, -0x1.000002p-26, -0x1p103, -0x1p103); #endif #if defined (TEST_DOUBLE) && DBL_MANT_DIG == 53 TEST_fff_f (fma, 0x1.4p-1022, 0x1.0000000000002p-1, 0x1p-1024, 0x1.c000000000002p-1023, UNDERFLOW_EXCEPTION); @@ -4914,6 +4954,14 @@ fma_test_towardzero (void) TEST_fff_f (fma, 0x0.fffffffffffff8p0, -0x0.fffffffffffff8p0, 0x0.fffffffffffffp0, -0x1p-106); TEST_fff_f (fma, -0x0.fffffffffffff8p0, 0x0.fffffffffffff8p0, 0x0.fffffffffffffp0, -0x1p-106); TEST_fff_f (fma, -0x0.fffffffffffff8p0, -0x0.fffffffffffff8p0, -0x0.fffffffffffffp0, 0x1p-106); + TEST_fff_f (fma, 0x1.0000000000001p-1022, 0x1.0000000000001p-55, 0x1p1023, 0x1p1023); + TEST_fff_f (fma, 0x1.0000000000001p-1022, -0x1.0000000000001p-55, 0x1p1023, 0x0.fffffffffffff8p1023); + TEST_fff_f (fma, 0x1.0000000000001p-1022, 0x1.0000000000001p-55, -0x1p1023, -0x0.fffffffffffff8p1023); + TEST_fff_f (fma, 0x1.0000000000001p-1022, -0x1.0000000000001p-55, -0x1p1023, -0x1p1023); + TEST_fff_f (fma, 0x1.0000000000001p-1022, 0x1.0000000000001p-55, 0x1p970, 0x1p970); + TEST_fff_f (fma, 0x1.0000000000001p-1022, -0x1.0000000000001p-55, 0x1p970, 0x0.fffffffffffff8p970); + TEST_fff_f (fma, 0x1.0000000000001p-1022, 0x1.0000000000001p-55, -0x1p970, -0x0.fffffffffffff8p970); + TEST_fff_f (fma, 0x1.0000000000001p-1022, -0x1.0000000000001p-55, -0x1p970, -0x1p970); #endif #if defined (TEST_LDOUBLE) && LDBL_MANT_DIG == 64 TEST_fff_f (fma, 0x1.4p-16382L, 0x1.0000000000000004p-1L, 0x1p-16384L, 0x1.c000000000000004p-16383L, UNDERFLOW_EXCEPTION); @@ -4944,6 +4992,14 @@ fma_test_towardzero (void) TEST_fff_f (fma, 0x0.ffffffffffffffffp0L, -0x0.ffffffffffffffffp0L, 0x0.fffffffffffffffep0L, -0x1p-128L); TEST_fff_f (fma, -0x0.ffffffffffffffffp0L, 0x0.ffffffffffffffffp0L, 0x0.fffffffffffffffep0L, -0x1p-128L); TEST_fff_f (fma, -0x0.ffffffffffffffffp0L, -0x0.ffffffffffffffffp0L, -0x0.fffffffffffffffep0L, 0x1p-128L); + TEST_fff_f (fma, 0x1.0000000000000002p-16382L, 0x1.0000000000000002p-66L, 0x1p16383L, 0x1p16383L); + TEST_fff_f (fma, 0x1.0000000000000002p-16382L, -0x1.0000000000000002p-66L, 0x1p16383L, 0x0.ffffffffffffffffp16383L); + TEST_fff_f (fma, 0x1.0000000000000002p-16382L, 0x1.0000000000000002p-66L, -0x1p16383L, -0x0.ffffffffffffffffp16383L); + TEST_fff_f (fma, 0x1.0000000000000002p-16382L, -0x1.0000000000000002p-66L, -0x1p16383L, -0x1p16383L); + TEST_fff_f (fma, 0x1.0000000000000002p-16382L, 0x1.0000000000000002p-66L, 0x1p16319L, 0x1p16319L); + TEST_fff_f (fma, 0x1.0000000000000002p-16382L, -0x1.0000000000000002p-66L, 0x1p16319L, 0x0.ffffffffffffffffp16319L); + TEST_fff_f (fma, 0x1.0000000000000002p-16382L, 0x1.0000000000000002p-66L, -0x1p16319L, -0x0.ffffffffffffffffp16319L); + TEST_fff_f (fma, 0x1.0000000000000002p-16382L, -0x1.0000000000000002p-66L, -0x1p16319L, -0x1p16319L); #endif #if defined (TEST_LDOUBLE) && LDBL_MANT_DIG == 113 TEST_fff_f (fma, 0x1.4p-16382L, 0x1.0000000000000000000000000002p-1L, 0x1p-16384L, 0x1.c000000000000000000000000002p-16383L, UNDERFLOW_EXCEPTION); @@ -4974,6 +5030,14 @@ fma_test_towardzero (void) TEST_fff_f (fma, 0x0.ffffffffffffffffffffffffffff8p0L, -0x0.ffffffffffffffffffffffffffff8p0L, 0x0.ffffffffffffffffffffffffffffp0L, -0x1p-226L); TEST_fff_f (fma, -0x0.ffffffffffffffffffffffffffff8p0L, 0x0.ffffffffffffffffffffffffffff8p0L, 0x0.ffffffffffffffffffffffffffffp0L, -0x1p-226L); TEST_fff_f (fma, -0x0.ffffffffffffffffffffffffffff8p0L, -0x0.ffffffffffffffffffffffffffff8p0L, -0x0.ffffffffffffffffffffffffffffp0L, 0x1p-226L); + TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, 0x1.0000000000000000000000000001p-66L, 0x1p16383L, 0x1p16383L); + TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, -0x1.0000000000000000000000000001p-66L, 0x1p16383L, 0x0.ffffffffffffffffffffffffffff8p16383L); + TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, 0x1.0000000000000000000000000001p-66L, -0x1p16383L, -0x0.ffffffffffffffffffffffffffff8p16383L); + TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, -0x1.0000000000000000000000000001p-66L, -0x1p16383L, -0x1p16383L); + TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, 0x1.0000000000000000000000000001p-66L, 0x1p16319L, 0x1p16319L); + TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, -0x1.0000000000000000000000000001p-66L, 0x1p16319L, 0x0.ffffffffffffffffffffffffffff8p16319L); + TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, 0x1.0000000000000000000000000001p-66L, -0x1p16319L, -0x0.ffffffffffffffffffffffffffff8p16319L); + TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, -0x1.0000000000000000000000000001p-66L, -0x1p16319L, -0x1p16319L); #endif } @@ -5070,6 +5134,14 @@ fma_test_downward (void) TEST_fff_f (fma, 0x0.fffp0, -0x0.fffp0, 0x0.ffep0, -0x1p-24); TEST_fff_f (fma, -0x0.fffp0, 0x0.fffp0, 0x0.ffep0, -0x1p-24); TEST_fff_f (fma, -0x0.fffp0, -0x0.fffp0, -0x0.ffep0, 0x1p-24); + TEST_fff_f (fma, 0x1.000002p-126, 0x1.000002p-26, 0x1p127, 0x1p127); + TEST_fff_f (fma, 0x1.000002p-126, -0x1.000002p-26, 0x1p127, 0x0.ffffffp127); + TEST_fff_f (fma, 0x1.000002p-126, 0x1.000002p-26, -0x1p127, -0x1p127); + TEST_fff_f (fma, 0x1.000002p-126, -0x1.000002p-26, -0x1p127, -0x1.000002p127); + TEST_fff_f (fma, 0x1.000002p-126, 0x1.000002p-26, 0x1p103, 0x1p103); + TEST_fff_f (fma, 0x1.000002p-126, -0x1.000002p-26, 0x1p103, 0x0.ffffffp103); + TEST_fff_f (fma, 0x1.000002p-126, 0x1.000002p-26, -0x1p103, -0x1p103); + TEST_fff_f (fma, 0x1.000002p-126, -0x1.000002p-26, -0x1p103, -0x1.000002p103); #endif #if defined (TEST_DOUBLE) && DBL_MANT_DIG == 53 TEST_fff_f (fma, 0x1.4p-1022, 0x1.0000000000002p-1, 0x1p-1024, 0x1.c000000000002p-1023, UNDERFLOW_EXCEPTION); @@ -5100,6 +5172,14 @@ fma_test_downward (void) TEST_fff_f (fma, 0x0.fffffffffffff8p0, -0x0.fffffffffffff8p0, 0x0.fffffffffffffp0, -0x1p-106); TEST_fff_f (fma, -0x0.fffffffffffff8p0, 0x0.fffffffffffff8p0, 0x0.fffffffffffffp0, -0x1p-106); TEST_fff_f (fma, -0x0.fffffffffffff8p0, -0x0.fffffffffffff8p0, -0x0.fffffffffffffp0, 0x1p-106); + TEST_fff_f (fma, 0x1.0000000000001p-1022, 0x1.0000000000001p-55, 0x1p1023, 0x1p1023); + TEST_fff_f (fma, 0x1.0000000000001p-1022, -0x1.0000000000001p-55, 0x1p1023, 0x0.fffffffffffff8p1023); + TEST_fff_f (fma, 0x1.0000000000001p-1022, 0x1.0000000000001p-55, -0x1p1023, -0x1p1023); + TEST_fff_f (fma, 0x1.0000000000001p-1022, -0x1.0000000000001p-55, -0x1p1023, -0x1.0000000000001p1023); + TEST_fff_f (fma, 0x1.0000000000001p-1022, 0x1.0000000000001p-55, 0x1p970, 0x1p970); + TEST_fff_f (fma, 0x1.0000000000001p-1022, -0x1.0000000000001p-55, 0x1p970, 0x0.fffffffffffff8p970); + TEST_fff_f (fma, 0x1.0000000000001p-1022, 0x1.0000000000001p-55, -0x1p970, -0x1p970); + TEST_fff_f (fma, 0x1.0000000000001p-1022, -0x1.0000000000001p-55, -0x1p970, -0x1.0000000000001p970); #endif #if defined (TEST_LDOUBLE) && LDBL_MANT_DIG == 64 TEST_fff_f (fma, 0x1.4p-16382L, 0x1.0000000000000004p-1L, 0x1p-16384L, 0x1.c000000000000004p-16383L, UNDERFLOW_EXCEPTION); @@ -5130,6 +5210,14 @@ fma_test_downward (void) TEST_fff_f (fma, 0x0.ffffffffffffffffp0L, -0x0.ffffffffffffffffp0L, 0x0.fffffffffffffffep0L, -0x1p-128L); TEST_fff_f (fma, -0x0.ffffffffffffffffp0L, 0x0.ffffffffffffffffp0L, 0x0.fffffffffffffffep0L, -0x1p-128L); TEST_fff_f (fma, -0x0.ffffffffffffffffp0L, -0x0.ffffffffffffffffp0L, -0x0.fffffffffffffffep0L, 0x1p-128L); + TEST_fff_f (fma, 0x1.0000000000000002p-16382L, 0x1.0000000000000002p-66L, 0x1p16383L, 0x1p16383L); + TEST_fff_f (fma, 0x1.0000000000000002p-16382L, -0x1.0000000000000002p-66L, 0x1p16383L, 0x0.ffffffffffffffffp16383L); + TEST_fff_f (fma, 0x1.0000000000000002p-16382L, 0x1.0000000000000002p-66L, -0x1p16383L, -0x1p16383L); + TEST_fff_f (fma, 0x1.0000000000000002p-16382L, -0x1.0000000000000002p-66L, -0x1p16383L, -0x1.0000000000000002p16383L); + TEST_fff_f (fma, 0x1.0000000000000002p-16382L, 0x1.0000000000000002p-66L, 0x1p16319L, 0x1p16319L); + TEST_fff_f (fma, 0x1.0000000000000002p-16382L, -0x1.0000000000000002p-66L, 0x1p16319L, 0x0.ffffffffffffffffp16319L); + TEST_fff_f (fma, 0x1.0000000000000002p-16382L, 0x1.0000000000000002p-66L, -0x1p16319L, -0x1p16319L); + TEST_fff_f (fma, 0x1.0000000000000002p-16382L, -0x1.0000000000000002p-66L, -0x1p16319L, -0x1.0000000000000002p16319L); #endif #if defined (TEST_LDOUBLE) && LDBL_MANT_DIG == 113 TEST_fff_f (fma, 0x1.4p-16382L, 0x1.0000000000000000000000000002p-1L, 0x1p-16384L, 0x1.c000000000000000000000000002p-16383L, UNDERFLOW_EXCEPTION); @@ -5160,6 +5248,14 @@ fma_test_downward (void) TEST_fff_f (fma, 0x0.ffffffffffffffffffffffffffff8p0L, -0x0.ffffffffffffffffffffffffffff8p0L, 0x0.ffffffffffffffffffffffffffffp0L, -0x1p-226L); TEST_fff_f (fma, -0x0.ffffffffffffffffffffffffffff8p0L, 0x0.ffffffffffffffffffffffffffff8p0L, 0x0.ffffffffffffffffffffffffffffp0L, -0x1p-226L); TEST_fff_f (fma, -0x0.ffffffffffffffffffffffffffff8p0L, -0x0.ffffffffffffffffffffffffffff8p0L, -0x0.ffffffffffffffffffffffffffffp0L, 0x1p-226L); + TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, 0x1.0000000000000000000000000001p-66L, 0x1p16383L, 0x1p16383L); + TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, -0x1.0000000000000000000000000001p-66L, 0x1p16383L, 0x0.ffffffffffffffffffffffffffff8p16383L); + TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, 0x1.0000000000000000000000000001p-66L, -0x1p16383L, -0x1p16383L); + TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, -0x1.0000000000000000000000000001p-66L, -0x1p16383L, -0x1.0000000000000000000000000001p16383L); + TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, 0x1.0000000000000000000000000001p-66L, 0x1p16319L, 0x1p16319L); + TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, -0x1.0000000000000000000000000001p-66L, 0x1p16319L, 0x0.ffffffffffffffffffffffffffff8p16319L); + TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, 0x1.0000000000000000000000000001p-66L, -0x1p16319L, -0x1p16319L); + TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, -0x1.0000000000000000000000000001p-66L, -0x1p16319L, -0x1.0000000000000000000000000001p16319L); #endif } @@ -5256,6 +5352,14 @@ fma_test_upward (void) TEST_fff_f (fma, 0x0.fffp0, -0x0.fffp0, 0x0.ffep0, -0x1p-24); TEST_fff_f (fma, -0x0.fffp0, 0x0.fffp0, 0x0.ffep0, -0x1p-24); TEST_fff_f (fma, -0x0.fffp0, -0x0.fffp0, -0x0.ffep0, 0x1p-24); + TEST_fff_f (fma, 0x1.000002p-126, 0x1.000002p-26, 0x1p127, 0x1.000002p127); + TEST_fff_f (fma, 0x1.000002p-126, -0x1.000002p-26, 0x1p127, 0x1p127); + TEST_fff_f (fma, 0x1.000002p-126, 0x1.000002p-26, -0x1p127, -0x0.ffffffp127); + TEST_fff_f (fma, 0x1.000002p-126, -0x1.000002p-26, -0x1p127, -0x1p127); + TEST_fff_f (fma, 0x1.000002p-126, 0x1.000002p-26, 0x1p103, 0x1.000002p103); + TEST_fff_f (fma, 0x1.000002p-126, -0x1.000002p-26, 0x1p103, 0x1p103); + TEST_fff_f (fma, 0x1.000002p-126, 0x1.000002p-26, -0x1p103, -0x0.ffffffp103); + TEST_fff_f (fma, 0x1.000002p-126, -0x1.000002p-26, -0x1p103, -0x1p103); #endif #if defined (TEST_DOUBLE) && DBL_MANT_DIG == 53 TEST_fff_f (fma, 0x1.4p-1022, 0x1.0000000000002p-1, 0x1p-1024, 0x1.c000000000004p-1023, UNDERFLOW_EXCEPTION); @@ -5286,6 +5390,14 @@ fma_test_upward (void) TEST_fff_f (fma, 0x0.fffffffffffff8p0, -0x0.fffffffffffff8p0, 0x0.fffffffffffffp0, -0x1p-106); TEST_fff_f (fma, -0x0.fffffffffffff8p0, 0x0.fffffffffffff8p0, 0x0.fffffffffffffp0, -0x1p-106); TEST_fff_f (fma, -0x0.fffffffffffff8p0, -0x0.fffffffffffff8p0, -0x0.fffffffffffffp0, 0x1p-106); + TEST_fff_f (fma, 0x1.0000000000001p-1022, 0x1.0000000000001p-55, 0x1p1023, 0x1.0000000000001p1023); + TEST_fff_f (fma, 0x1.0000000000001p-1022, -0x1.0000000000001p-55, 0x1p1023, 0x1p1023); + TEST_fff_f (fma, 0x1.0000000000001p-1022, 0x1.0000000000001p-55, -0x1p1023, -0x0.fffffffffffff8p1023); + TEST_fff_f (fma, 0x1.0000000000001p-1022, -0x1.0000000000001p-55, -0x1p1023, -0x1p1023); + TEST_fff_f (fma, 0x1.0000000000001p-1022, 0x1.0000000000001p-55, 0x1p970, 0x1.0000000000001p970); + TEST_fff_f (fma, 0x1.0000000000001p-1022, -0x1.0000000000001p-55, 0x1p970, 0x1p970); + TEST_fff_f (fma, 0x1.0000000000001p-1022, 0x1.0000000000001p-55, -0x1p970, -0x0.fffffffffffff8p970); + TEST_fff_f (fma, 0x1.0000000000001p-1022, -0x1.0000000000001p-55, -0x1p970, -0x1p970); #endif #if defined (TEST_LDOUBLE) && LDBL_MANT_DIG == 64 TEST_fff_f (fma, 0x1.4p-16382L, 0x1.0000000000000004p-1L, 0x1p-16384L, 0x1.c000000000000008p-16383L, UNDERFLOW_EXCEPTION); @@ -5316,6 +5428,14 @@ fma_test_upward (void) TEST_fff_f (fma, 0x0.ffffffffffffffffp0L, -0x0.ffffffffffffffffp0L, 0x0.fffffffffffffffep0L, -0x1p-128L); TEST_fff_f (fma, -0x0.ffffffffffffffffp0L, 0x0.ffffffffffffffffp0L, 0x0.fffffffffffffffep0L, -0x1p-128L); TEST_fff_f (fma, -0x0.ffffffffffffffffp0L, -0x0.ffffffffffffffffp0L, -0x0.fffffffffffffffep0L, 0x1p-128L); + TEST_fff_f (fma, 0x1.0000000000000002p-16382L, 0x1.0000000000000002p-66L, 0x1p16383L, 0x1.0000000000000002p16383L); + TEST_fff_f (fma, 0x1.0000000000000002p-16382L, -0x1.0000000000000002p-66L, 0x1p16383L, 0x1p16383L); + TEST_fff_f (fma, 0x1.0000000000000002p-16382L, 0x1.0000000000000002p-66L, -0x1p16383L, -0x0.ffffffffffffffffp16383L); + TEST_fff_f (fma, 0x1.0000000000000002p-16382L, -0x1.0000000000000002p-66L, -0x1p16383L, -0x1p16383L); + TEST_fff_f (fma, 0x1.0000000000000002p-16382L, 0x1.0000000000000002p-66L, 0x1p16319L, 0x1.0000000000000002p16319L); + TEST_fff_f (fma, 0x1.0000000000000002p-16382L, -0x1.0000000000000002p-66L, 0x1p16319L, 0x1p16319L); + TEST_fff_f (fma, 0x1.0000000000000002p-16382L, 0x1.0000000000000002p-66L, -0x1p16319L, -0x0.ffffffffffffffffp16319L); + TEST_fff_f (fma, 0x1.0000000000000002p-16382L, -0x1.0000000000000002p-66L, -0x1p16319L, -0x1p16319L); #endif #if defined (TEST_LDOUBLE) && LDBL_MANT_DIG == 113 TEST_fff_f (fma, 0x1.4p-16382L, 0x1.0000000000000000000000000002p-1L, 0x1p-16384L, 0x1.c000000000000000000000000004p-16383L, UNDERFLOW_EXCEPTION); @@ -5346,6 +5466,14 @@ fma_test_upward (void) TEST_fff_f (fma, 0x0.ffffffffffffffffffffffffffff8p0L, -0x0.ffffffffffffffffffffffffffff8p0L, 0x0.ffffffffffffffffffffffffffffp0L, -0x1p-226L); TEST_fff_f (fma, -0x0.ffffffffffffffffffffffffffff8p0L, 0x0.ffffffffffffffffffffffffffff8p0L, 0x0.ffffffffffffffffffffffffffffp0L, -0x1p-226L); TEST_fff_f (fma, -0x0.ffffffffffffffffffffffffffff8p0L, -0x0.ffffffffffffffffffffffffffff8p0L, -0x0.ffffffffffffffffffffffffffffp0L, 0x1p-226L); + TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, 0x1.0000000000000000000000000001p-66L, 0x1p16383L, 0x1.0000000000000000000000000001p16383L); + TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, -0x1.0000000000000000000000000001p-66L, 0x1p16383L, 0x1p16383L); + TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, 0x1.0000000000000000000000000001p-66L, -0x1p16383L, -0x0.ffffffffffffffffffffffffffff8p16383L); + TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, -0x1.0000000000000000000000000001p-66L, -0x1p16383L, -0x1p16383L); + TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, 0x1.0000000000000000000000000001p-66L, 0x1p16319L, 0x1.0000000000000000000000000001p16319L); + TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, -0x1.0000000000000000000000000001p-66L, 0x1p16319L, 0x1p16319L); + TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, 0x1.0000000000000000000000000001p-66L, -0x1p16319L, -0x0.ffffffffffffffffffffffffffff8p16319L); + TEST_fff_f (fma, 0x1.0000000000000000000000000001p-16382L, -0x1.0000000000000000000000000001p-66L, -0x1p16319L, -0x1p16319L); #endif } diff --git a/sysdeps/ieee754/dbl-64/s_fma.c b/sysdeps/ieee754/dbl-64/s_fma.c index cd28830..8c69b98 100644 --- a/sysdeps/ieee754/dbl-64/s_fma.c +++ b/sysdeps/ieee754/dbl-64/s_fma.c @@ -114,8 +114,17 @@ __fma (double x, double y, double z) { /* Similarly. If z exponent is very large and x and y exponents are - very small, it doesn't matter if we don't adjust it. */ - if (u.ieee.exponent > v.ieee.exponent) + very small, adjust them up to avoid spurious underflows, + rather than down. */ + if (u.ieee.exponent + v.ieee.exponent + <= IEEE754_DOUBLE_BIAS + DBL_MANT_DIG) + { + if (u.ieee.exponent > v.ieee.exponent) + u.ieee.exponent += 2 * DBL_MANT_DIG + 2; + else + v.ieee.exponent += 2 * DBL_MANT_DIG + 2; + } + else if (u.ieee.exponent > v.ieee.exponent) { if (u.ieee.exponent > DBL_MANT_DIG) u.ieee.exponent -= DBL_MANT_DIG; @@ -145,15 +154,15 @@ __fma (double x, double y, double z) <= IEEE754_DOUBLE_BIAS + DBL_MANT_DIG) */ { if (u.ieee.exponent > v.ieee.exponent) - u.ieee.exponent += 2 * DBL_MANT_DIG; + u.ieee.exponent += 2 * DBL_MANT_DIG + 2; else - v.ieee.exponent += 2 * DBL_MANT_DIG; - if (w.ieee.exponent <= 4 * DBL_MANT_DIG + 4) + v.ieee.exponent += 2 * DBL_MANT_DIG + 2; + if (w.ieee.exponent <= 4 * DBL_MANT_DIG + 6) { if (w.ieee.exponent) - w.ieee.exponent += 2 * DBL_MANT_DIG; + w.ieee.exponent += 2 * DBL_MANT_DIG + 2; else - w.d *= 0x1p106; + w.d *= 0x1p108; adjust = -1; } /* Otherwise x * y should just affect inexact @@ -238,19 +247,19 @@ __fma (double x, double y, double z) /* If a1 + u.d is exact, the only rounding happens during scaling down. */ if (j == 0) - return v.d * 0x1p-106; + return v.d * 0x1p-108; /* If result rounded to zero is not subnormal, no double rounding will occur. */ - if (v.ieee.exponent > 106) - return (a1 + u.d) * 0x1p-106; - /* If v.d * 0x1p-106 with round to zero is a subnormal above - or equal to DBL_MIN / 2, then v.d * 0x1p-106 shifts mantissa + if (v.ieee.exponent > 108) + return (a1 + u.d) * 0x1p-108; + /* If v.d * 0x1p-108 with round to zero is a subnormal above + or equal to DBL_MIN / 2, then v.d * 0x1p-108 shifts mantissa down just by 1 bit, which means v.ieee.mantissa1 |= j would change the round bit, not sticky or guard bit. - v.d * 0x1p-106 never normalizes by shifting up, + v.d * 0x1p-108 never normalizes by shifting up, so round bit plus sticky bit should be already enough for proper rounding. */ - if (v.ieee.exponent == 106) + if (v.ieee.exponent == 108) { /* If the exponent would be in the normal range when rounding to normal precision with unbounded exponent @@ -260,8 +269,8 @@ __fma (double x, double y, double z) if (TININESS_AFTER_ROUNDING) { w.d = a1 + u.d; - if (w.ieee.exponent == 107) - return w.d * 0x1p-106; + if (w.ieee.exponent == 109) + return w.d * 0x1p-108; } /* v.ieee.mantissa1 & 2 is LSB bit of the result before rounding, v.ieee.mantissa1 & 1 is the round bit and j is our sticky @@ -270,12 +279,12 @@ __fma (double x, double y, double z) w.ieee.mantissa1 = ((v.ieee.mantissa1 & 3) << 1) | j; w.ieee.negative = v.ieee.negative; v.ieee.mantissa1 &= ~3U; - v.d *= 0x1p-106; + v.d *= 0x1p-108; w.d *= 0x1p-2; return v.d + w.d; } v.ieee.mantissa1 |= j; - return v.d * 0x1p-106; + return v.d * 0x1p-108; } } #ifndef __fma diff --git a/sysdeps/ieee754/ldbl-128/s_fmal.c b/sysdeps/ieee754/ldbl-128/s_fmal.c index 6fa663a..c9accad 100644 --- a/sysdeps/ieee754/ldbl-128/s_fmal.c +++ b/sysdeps/ieee754/ldbl-128/s_fmal.c @@ -118,8 +118,17 @@ __fmal (long double x, long double y, long double z) { /* Similarly. If z exponent is very large and x and y exponents are - very small, it doesn't matter if we don't adjust it. */ - if (u.ieee.exponent > v.ieee.exponent) + very small, adjust them up to avoid spurious underflows, + rather than down. */ + if (u.ieee.exponent + v.ieee.exponent + <= IEEE854_LONG_DOUBLE_BIAS + LDBL_MANT_DIG) + { + if (u.ieee.exponent > v.ieee.exponent) + u.ieee.exponent += 2 * LDBL_MANT_DIG + 2; + else + v.ieee.exponent += 2 * LDBL_MANT_DIG + 2; + } + else if (u.ieee.exponent > v.ieee.exponent) { if (u.ieee.exponent > LDBL_MANT_DIG) u.ieee.exponent -= LDBL_MANT_DIG; @@ -149,15 +158,15 @@ __fmal (long double x, long double y, long double z) <= IEEE854_LONG_DOUBLE_BIAS + LDBL_MANT_DIG) */ { if (u.ieee.exponent > v.ieee.exponent) - u.ieee.exponent += 2 * LDBL_MANT_DIG; + u.ieee.exponent += 2 * LDBL_MANT_DIG + 2; else - v.ieee.exponent += 2 * LDBL_MANT_DIG; - if (w.ieee.exponent <= 4 * LDBL_MANT_DIG + 4) + v.ieee.exponent += 2 * LDBL_MANT_DIG + 2; + if (w.ieee.exponent <= 4 * LDBL_MANT_DIG + 6) { if (w.ieee.exponent) - w.ieee.exponent += 2 * LDBL_MANT_DIG; + w.ieee.exponent += 2 * LDBL_MANT_DIG + 2; else - w.d *= 0x1p226L; + w.d *= 0x1p228L; adjust = -1; } /* Otherwise x * y should just affect inexact @@ -242,19 +251,19 @@ __fmal (long double x, long double y, long double z) /* If a1 + u.d is exact, the only rounding happens during scaling down. */ if (j == 0) - return v.d * 0x1p-226L; + return v.d * 0x1p-228L; /* If result rounded to zero is not subnormal, no double rounding will occur. */ - if (v.ieee.exponent > 226) - return (a1 + u.d) * 0x1p-226L; - /* If v.d * 0x1p-226L with round to zero is a subnormal above - or equal to LDBL_MIN / 2, then v.d * 0x1p-226L shifts mantissa + if (v.ieee.exponent > 228) + return (a1 + u.d) * 0x1p-228L; + /* If v.d * 0x1p-228L with round to zero is a subnormal above + or equal to LDBL_MIN / 2, then v.d * 0x1p-228L shifts mantissa down just by 1 bit, which means v.ieee.mantissa3 |= j would change the round bit, not sticky or guard bit. - v.d * 0x1p-226L never normalizes by shifting up, + v.d * 0x1p-228L never normalizes by shifting up, so round bit plus sticky bit should be already enough for proper rounding. */ - if (v.ieee.exponent == 226) + if (v.ieee.exponent == 228) { /* If the exponent would be in the normal range when rounding to normal precision with unbounded exponent @@ -264,8 +273,8 @@ __fmal (long double x, long double y, long double z) if (TININESS_AFTER_ROUNDING) { w.d = a1 + u.d; - if (w.ieee.exponent == 227) - return w.d * 0x1p-226L; + if (w.ieee.exponent == 229) + return w.d * 0x1p-228L; } /* v.ieee.mantissa3 & 2 is LSB bit of the result before rounding, v.ieee.mantissa3 & 1 is the round bit and j is our sticky @@ -274,12 +283,12 @@ __fmal (long double x, long double y, long double z) w.ieee.mantissa3 = ((v.ieee.mantissa3 & 3) << 1) | j; w.ieee.negative = v.ieee.negative; v.ieee.mantissa3 &= ~3U; - v.d *= 0x1p-226L; + v.d *= 0x1p-228L; w.d *= 0x1p-2L; return v.d + w.d; } v.ieee.mantissa3 |= j; - return v.d * 0x1p-226L; + return v.d * 0x1p-228L; } } weak_alias (__fmal, fmal) diff --git a/sysdeps/ieee754/ldbl-96/s_fmal.c b/sysdeps/ieee754/ldbl-96/s_fmal.c index 53098b6..c86dff6 100644 --- a/sysdeps/ieee754/ldbl-96/s_fmal.c +++ b/sysdeps/ieee754/ldbl-96/s_fmal.c @@ -116,8 +116,17 @@ __fmal (long double x, long double y, long double z) { /* Similarly. If z exponent is very large and x and y exponents are - very small, it doesn't matter if we don't adjust it. */ - if (u.ieee.exponent > v.ieee.exponent) + very small, adjust them up to avoid spurious underflows, + rather than down. */ + if (u.ieee.exponent + v.ieee.exponent + <= IEEE854_LONG_DOUBLE_BIAS + LDBL_MANT_DIG) + { + if (u.ieee.exponent > v.ieee.exponent) + u.ieee.exponent += 2 * LDBL_MANT_DIG + 2; + else + v.ieee.exponent += 2 * LDBL_MANT_DIG + 2; + } + else if (u.ieee.exponent > v.ieee.exponent) { if (u.ieee.exponent > LDBL_MANT_DIG) u.ieee.exponent -= LDBL_MANT_DIG; @@ -147,15 +156,15 @@ __fmal (long double x, long double y, long double z) <= IEEE854_LONG_DOUBLE_BIAS + LDBL_MANT_DIG) */ { if (u.ieee.exponent > v.ieee.exponent) - u.ieee.exponent += 2 * LDBL_MANT_DIG; + u.ieee.exponent += 2 * LDBL_MANT_DIG + 2; else - v.ieee.exponent += 2 * LDBL_MANT_DIG; - if (w.ieee.exponent <= 4 * LDBL_MANT_DIG + 4) + v.ieee.exponent += 2 * LDBL_MANT_DIG + 2; + if (w.ieee.exponent <= 4 * LDBL_MANT_DIG + 6) { if (w.ieee.exponent) - w.ieee.exponent += 2 * LDBL_MANT_DIG; + w.ieee.exponent += 2 * LDBL_MANT_DIG + 2; else - w.d *= 0x1p128L; + w.d *= 0x1p130L; adjust = -1; } /* Otherwise x * y should just affect inexact @@ -240,19 +249,19 @@ __fmal (long double x, long double y, long double z) /* If a1 + u.d is exact, the only rounding happens during scaling down. */ if (j == 0) - return v.d * 0x1p-128L; + return v.d * 0x1p-130L; /* If result rounded to zero is not subnormal, no double rounding will occur. */ - if (v.ieee.exponent > 128) - return (a1 + u.d) * 0x1p-128L; - /* If v.d * 0x1p-128L with round to zero is a subnormal above - or equal to LDBL_MIN / 2, then v.d * 0x1p-128L shifts mantissa + if (v.ieee.exponent > 130) + return (a1 + u.d) * 0x1p-130L; + /* If v.d * 0x1p-130L with round to zero is a subnormal above + or equal to LDBL_MIN / 2, then v.d * 0x1p-130L shifts mantissa down just by 1 bit, which means v.ieee.mantissa1 |= j would change the round bit, not sticky or guard bit. - v.d * 0x1p-128L never normalizes by shifting up, + v.d * 0x1p-130L never normalizes by shifting up, so round bit plus sticky bit should be already enough for proper rounding. */ - if (v.ieee.exponent == 128) + if (v.ieee.exponent == 130) { /* If the exponent would be in the normal range when rounding to normal precision with unbounded exponent @@ -262,8 +271,8 @@ __fmal (long double x, long double y, long double z) if (TININESS_AFTER_ROUNDING) { w.d = a1 + u.d; - if (w.ieee.exponent == 129) - return w.d * 0x1p-128L; + if (w.ieee.exponent == 131) + return w.d * 0x1p-130L; } /* v.ieee.mantissa1 & 2 is LSB bit of the result before rounding, v.ieee.mantissa1 & 1 is the round bit and j is our sticky @@ -272,12 +281,12 @@ __fmal (long double x, long double y, long double z) w.ieee.mantissa1 = ((v.ieee.mantissa1 & 3) << 1) | j; w.ieee.negative = v.ieee.negative; v.ieee.mantissa1 &= ~3U; - v.d *= 0x1p-128L; + v.d *= 0x1p-130L; w.d *= 0x1p-2L; return v.d + w.d; } v.ieee.mantissa1 |= j; - return v.d * 0x1p-128L; + return v.d * 0x1p-130L; } } weak_alias (__fmal, fmal) -- 2.7.4