Fix spurious underflows from pow with results close to 1 (bug 14811).

author Joseph Myers <joseph@codesourcery.com>

Wed, 7 Nov 2012 13:03:31 +0000 (13:03 +0000)

committer Joseph Myers <joseph@codesourcery.com>

Wed, 7 Nov 2012 13:03:31 +0000 (13:03 +0000)
author Joseph Myers <joseph@codesourcery.com>
Wed, 7 Nov 2012 13:03:31 +0000 (13:03 +0000)
committer Joseph Myers <joseph@codesourcery.com>
Wed, 7 Nov 2012 13:03:31 +0000 (13:03 +0000)
diff --git a/ChangeLog b/ChangeLog

index f811967..aeebbc4 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,19 @@
+2012-11-07  Joseph Myers  <joseph@codesourcery.com>
+
+       [BZ #14811]
+       * sysdeps/i386/fpu/e_powl.S (pm79): New object.
+       (__ieee754_powl): Saturate nonzero exponents with absolute value
+       below 0x1p-79 to +/- 0x1p-79.
+       * sysdeps/ieee754/dbl-64/e_pow.c (__ieee754_pow): Saturate nonzero
+       exponents with absolute value below 0x1p-64 to +/- 0x1p-64.
+       * sysdeps/ieee754/flt-32/e_powf.c (__ieee754_powf): Saturate
+       nonzero exponents with absolute value below 0x1p-32 to +/-
+       0x1p-32.
+       * sysdeps/x86_64/fpu/e_powl.S (pm79): New object.
+       (__ieee754_powl): Saturate nonzero exponents with absolute value
+       below 0x1p-79 to +/- 0x1p-79.
+       * math/libm-test.inc (pow_test): Add more tests.
+
  2012-11-07  Andreas Krebbel  <Andreas.Krebbel@de.ibm.com>
  
         * sysdeps/s390/dl-procinfo.c (_dl_s390_cap_flags): Sync
diff --git a/NEWS b/NEWS

index 331d212..dbe5e31 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -18,7 +18,7 @@ Version 2.17
    14518, 14519, 14530, 14532, 14538, 14543, 14544, 14545, 14557, 14562,
    14568, 14576, 14579, 14583, 14587, 14595, 14602, 14610, 14621, 14638,
    14645, 14648, 14652, 14660, 14661, 14669, 14683, 14694, 14716, 14743,
-  14767, 14783, 14784, 14785, 14793, 14796, 14797, 14801, 14805.
+  14767, 14783, 14784, 14785, 14793, 14796, 14797, 14801, 14805, 14811.
  
  * Support for STT_GNU_IFUNC symbols added for s390 and s390x.
    Optimized versions of memcpy, memset, and memcmp added for System z10 and
diff --git a/math/libm-test.inc b/math/libm-test.inc

index a52ce6a..74488e7 100644 (file)
--- a/math/libm-test.inc
+++ b/math/libm-test.inc
@@ -7743,10 +7743,12 @@ pow_test (void)
    TEST_ff_f (pow, plus_infty, 1e-7L, plus_infty);
    TEST_ff_f (pow, plus_infty, 1, plus_infty);
    TEST_ff_f (pow, plus_infty, 1e7L, plus_infty);
+  TEST_ff_f (pow, plus_infty, min_subnorm_value, plus_infty);
  
    TEST_ff_f (pow, plus_infty, -1e-7L, 0);
    TEST_ff_f (pow, plus_infty, -1, 0);
    TEST_ff_f (pow, plus_infty, -1e7L, 0);
+  TEST_ff_f (pow, plus_infty, -min_subnorm_value, 0);
  
    TEST_ff_f (pow, minus_infty, 1, minus_infty);
    TEST_ff_f (pow, minus_infty, 11, minus_infty);
@@ -7759,6 +7761,7 @@ pow_test (void)
    TEST_ff_f (pow, minus_infty, 1.1L, plus_infty);
    TEST_ff_f (pow, minus_infty, 11.1L, plus_infty);
    TEST_ff_f (pow, minus_infty, 1001.1L, plus_infty);
+  TEST_ff_f (pow, minus_infty, min_subnorm_value, plus_infty);
  
    TEST_ff_f (pow, minus_infty, -1, minus_zero);
    TEST_ff_f (pow, minus_infty, -11, minus_zero);
@@ -7771,6 +7774,7 @@ pow_test (void)
    TEST_ff_f (pow, minus_infty, -1.1L, 0);
    TEST_ff_f (pow, minus_infty, -11.1L, 0);
    TEST_ff_f (pow, minus_infty, -1001.1L, 0);
+  TEST_ff_f (pow, minus_infty, -min_subnorm_value, 0);
  #endif
  
    TEST_ff_f (pow, nan_value, nan_value, nan_value);
@@ -7793,6 +7797,8 @@ pow_test (void)
    TEST_ff_f (pow, nan_value, minus_infty, nan_value);
    TEST_ff_f (pow, nan_value, 2.5, nan_value);
    TEST_ff_f (pow, nan_value, -2.5, nan_value);
+  TEST_ff_f (pow, nan_value, min_subnorm_value, nan_value);
+  TEST_ff_f (pow, nan_value, -min_subnorm_value, nan_value);
  
    TEST_ff_f (pow, 1, plus_infty, 1);
    TEST_ff_f (pow, -1, plus_infty, 1);
@@ -7806,6 +7812,8 @@ pow_test (void)
    TEST_ff_f (pow, 1, 0x1p63L, 1);
    TEST_ff_f (pow, 1, 0x1p64L, 1);
    TEST_ff_f (pow, 1, 0x1p72L, 1);
+  TEST_ff_f (pow, 1, min_subnorm_value, 1);
+  TEST_ff_f (pow, 1, -min_subnorm_value, 1);
  
    /* pow (x, +-0) == 1.  */
    TEST_ff_f (pow, plus_infty, 0, 1);
@@ -7825,6 +7833,10 @@ pow_test (void)
    TEST_ff_f (pow, -0.1L, -1.1L, nan_value, INVALID_EXCEPTION);
    TEST_ff_f (pow, -10.1L, 1.1L, nan_value, INVALID_EXCEPTION);
    TEST_ff_f (pow, -10.1L, -1.1L, nan_value, INVALID_EXCEPTION);
+  TEST_ff_f (pow, -1.01L, min_subnorm_value, nan_value, INVALID_EXCEPTION);
+  TEST_ff_f (pow, -1.01L, -min_subnorm_value, nan_value, INVALID_EXCEPTION);
+  TEST_ff_f (pow, -1.0L, min_subnorm_value, nan_value, INVALID_EXCEPTION);
+  TEST_ff_f (pow, -1.0L, -min_subnorm_value, nan_value, INVALID_EXCEPTION);
  
    errno = 0;
    TEST_ff_f (pow, 0, -1, plus_infty, DIVIDE_BY_ZERO_EXCEPTION);
@@ -7910,6 +7922,9 @@ pow_test (void)
    TEST_ff_f (pow, 0, -11.1L, plus_infty, DIVIDE_BY_ZERO_EXCEPTION);
    check_int ("errno for pow(0,-num) == ERANGE", errno, ERANGE, 0, 0, 0);
    errno = 0;
+  TEST_ff_f (pow, 0, -min_subnorm_value, plus_infty, DIVIDE_BY_ZERO_EXCEPTION);
+  check_int ("errno for pow(0,-num) == ERANGE", errno, ERANGE, 0, 0, 0);
+  errno = 0;
    TEST_ff_f (pow, 0, -0x1p24, plus_infty, DIVIDE_BY_ZERO_EXCEPTION);
    check_int ("errno for pow(0,-num) == ERANGE", errno, ERANGE, 0, 0, 0);
    errno = 0;
@@ -7925,6 +7940,9 @@ pow_test (void)
    TEST_ff_f (pow, minus_zero, -11.1L, plus_infty, DIVIDE_BY_ZERO_EXCEPTION);
    check_int ("errno for pow(-0,-num) == ERANGE", errno, ERANGE, 0, 0, 0);
    errno = 0;
+  TEST_ff_f (pow, minus_zero, -min_subnorm_value, plus_infty, DIVIDE_BY_ZERO_EXCEPTION);
+  check_int ("errno for pow(-0,-num) == ERANGE", errno, ERANGE, 0, 0, 0);
+  errno = 0;
    TEST_ff_f (pow, minus_zero, -0x1p24, plus_infty, DIVIDE_BY_ZERO_EXCEPTION);
    check_int ("errno for pow(-0,-num) == ERANGE", errno, ERANGE, 0, 0, 0);
    errno = 0;
@@ -8114,12 +8132,14 @@ pow_test (void)
    TEST_ff_f (pow, 0.0, 0x1p24, 0.0);
    TEST_ff_f (pow, 0.0, 0x1p127, 0.0);
    TEST_ff_f (pow, 0.0, max_value, 0.0);
+  TEST_ff_f (pow, 0.0, min_subnorm_value, 0.0);
  
    /* pow (-0, y) == +0 for y > 0 and not an odd integer.  */
    TEST_ff_f (pow, minus_zero, 4, 0.0);
    TEST_ff_f (pow, minus_zero, 0x1p24, 0.0);
    TEST_ff_f (pow, minus_zero, 0x1p127, 0.0);
    TEST_ff_f (pow, minus_zero, max_value, 0.0);
+  TEST_ff_f (pow, minus_zero, min_subnorm_value, 0.0);
  
    TEST_ff_f (pow, 16, 0.25L, 2);
    TEST_ff_f (pow, 0x1p64L, 0.125L, 256);
@@ -8407,6 +8427,15 @@ pow_test (void)
    TEST_ff_f (pow, 0x1.0000000000001p0L, -0x1.23456789abcdfp61L, 1.0118762747828234466621210689458255908670e-253L);
  #endif
  
+  TEST_ff_f (pow, min_subnorm_value, min_subnorm_value, 1.0L);
+  TEST_ff_f (pow, min_subnorm_value, -min_subnorm_value, 1.0L);
+  TEST_ff_f (pow, max_value, min_subnorm_value, 1.0L);
+  TEST_ff_f (pow, max_value, -min_subnorm_value, 1.0L);
+  TEST_ff_f (pow, 0.99L, min_subnorm_value, 1.0L);
+  TEST_ff_f (pow, 0.99L, -min_subnorm_value, 1.0L);
+  TEST_ff_f (pow, 1.01L, min_subnorm_value, 1.0L);
+  TEST_ff_f (pow, 1.01L, -min_subnorm_value, 1.0L);
+
    TEST_ff_f (pow, 2.0L, -100000.0L, plus_zero, UNDERFLOW_EXCEPTION);
  
    END (pow);
diff --git a/sysdeps/i386/fpu/e_powl.S b/sysdeps/i386/fpu/e_powl.S

index 933418c..ac4842c 100644 (file)
--- a/sysdeps/i386/fpu/e_powl.S
+++ b/sysdeps/i386/fpu/e_powl.S
@@ -38,6 +38,9 @@ p64:  .byte 0, 0, 0, 0, 0, 0, 0xf0, 0x43
         .type p78,@object
  p78:   .byte 0, 0, 0, 0, 0, 0, 0xd0, 0x44
         ASM_SIZE_DIRECTIVE(p78)
+       .type pm79,@object
+pm79:  .byte 0, 0, 0, 0, 0, 0, 0, 0x3b
+       ASM_SIZE_DIRECTIVE(pm79)
  
         .section .rodata.cst16,"aM",@progbits,16
  
@@ -120,9 +123,25 @@ ENTRY(__ieee754_powl)
         fucomp  %st(1)          // y : x
         fnstsw
         sahf
-       jne     3f
+       je      9f
  
-       /* OK, we have an integer value for y.  */
+       // If y has absolute value at most 0x1p-79, then any finite
+       // nonzero x will result in 1.  Saturate y to those bounds to
+       // avoid underflow in the calculation of y*log2(x).
+       fld     %st             // y : y : x
+       fabs                    // |y| : y : x
+       fcompl  MO(pm79)        // y : x
+       fnstsw
+       sahf
+       jnc     3f
+       fstp    %st(0)          // pop y
+       fldl    MO(pm79)        // 0x1p-79 : x
+       testb   $2, %dl
+       jnz     3f              // y > 0
+       fchs                    // -0x1p-79 : x
+       jmp     3f
+
+9:     /* OK, we have an integer value for y.  */
         popl    %eax
         cfi_adjust_cfa_offset (-4)
         popl    %edx
diff --git a/sysdeps/ieee754/dbl-64/e_pow.c b/sysdeps/ieee754/dbl-64/e_pow.c

index 3fd5e65..5131718 100644 (file)
--- a/sysdeps/ieee754/dbl-64/e_pow.c
+++ b/sysdeps/ieee754/dbl-64/e_pow.c
@@ -90,6 +90,10 @@ __ieee754_pow(double x, double y) {
  
      SET_RESTORE_ROUND (FE_TONEAREST);
  
+    /* Avoid internal underflow for tiny y.  The exact value of y does
+       not matter if |y| <= 2**-64.  */
+    if (ABS (y) < 0x1p-64)
+      y = y < 0 ? -0x1p-64 : 0x1p-64;
      z = log1(x,&aa,&error);                                 /* x^y  =e^(y log (X)) */
      t = y*134217729.0;
      y1 = t - (t-y);
diff --git a/sysdeps/ieee754/flt-32/e_powf.c b/sysdeps/ieee754/flt-32/e_powf.c

index 4306947..12c408f 100644 (file)
--- a/sysdeps/ieee754/flt-32/e_powf.c
+++ b/sysdeps/ieee754/flt-32/e_powf.c
@@ -141,6 +141,10 @@ __ieee754_powf(float x, float y)
             t2 = v-(t1-u);
         } else {
             float s2,s_h,s_l,t_h,t_l;
+           /* Avoid internal underflow for tiny y.  The exact value
+              of y does not matter if |y| <= 2**-32.  */
+           if (iy < 0x2f800000)
+             SET_FLOAT_WORD (y, (hy & 0x80000000) | 0x2f800000);
             n = 0;
         /* take care subnormal number */
             if(ix<0x00800000)
diff --git a/sysdeps/x86_64/fpu/e_powl.S b/sysdeps/x86_64/fpu/e_powl.S

index 4fe23c0..1b37185 100644 (file)
--- a/sysdeps/x86_64/fpu/e_powl.S
+++ b/sysdeps/x86_64/fpu/e_powl.S
@@ -38,6 +38,9 @@ p64:  .byte 0, 0, 0, 0, 0, 0, 0xf0, 0x43
         .type p78,@object
  p78:   .byte 0, 0, 0, 0, 0, 0, 0xd0, 0x44
         ASM_SIZE_DIRECTIVE(p78)
+       .type pm79,@object
+pm79:  .byte 0, 0, 0, 0, 0, 0, 0, 0x3b
+       ASM_SIZE_DIRECTIVE(pm79)
  
         .section .rodata.cst16,"aM",@progbits,16
  
@@ -110,9 +113,25 @@ ENTRY(__ieee754_powl)
         fistpll -8(%rsp)        // y : x
         fildll  -8(%rsp)        // int(y) : y : x
         fucomip %st(1),%st      // y : x
-       jne     3f
+       je      9f
+
+       // If y has absolute value at most 0x1p-79, then any finite
+       // nonzero x will result in 1.  Saturate y to those bounds to
+       // avoid underflow in the calculation of y*log2(x).
+       fldl    MO(pm79)        // 0x1p-79 : y : x
+       fld     %st(1)          // y : 0x1p-79 : y : x
+       fabs                    // |y| : 0x1p-79 : y : x
+       fcomip  %st(1), %st     // 0x1p-79 : y : x
+       fstp    %st(0)          // y : x
+       jnc     3f
+       fstp    %st(0)          // pop y
+       fldl    MO(pm79)        // 0x1p-79 : x
+       testb   $2, %dl
+       jnz     3f              // y > 0
+       fchs                    // -0x1p-79 : x
+       jmp     3f
  
-       /* OK, we have an integer value for y.  */
+9:     /* OK, we have an integer value for y.  */
         mov     -8(%rsp),%eax
         mov     -4(%rsp),%edx
         orl     $0, %edx
author	Joseph Myers <joseph@codesourcery.com>
	Wed, 7 Nov 2012 13:03:31 +0000 (13:03 +0000)
committer	Joseph Myers <joseph@codesourcery.com>
	Wed, 7 Nov 2012 13:03:31 +0000 (13:03 +0000)
ChangeLog		patch \| blob \| history
NEWS		patch \| blob \| history
math/libm-test.inc		patch \| blob \| history
sysdeps/i386/fpu/e_powl.S		patch \| blob \| history
sysdeps/ieee754/dbl-64/e_pow.c		patch \| blob \| history
sysdeps/ieee754/flt-32/e_powf.c		patch \| blob \| history
sysdeps/x86_64/fpu/e_powl.S		patch \| blob \| history