Optimize f_mod
authorUlrich Drepper <drepper@gmail.com>
Mon, 24 Oct 2011 00:55:26 +0000 (20:55 -0400)
committerUlrich Drepper <drepper@gmail.com>
Mon, 24 Oct 2011 00:55:26 +0000 (20:55 -0400)
Branch prediction for the 32-bit implementation and a new optimized
64-bit implementation.

ChangeLog
sysdeps/ieee754/dbl-64/e_fmod.c
sysdeps/ieee754/dbl-64/wordsize-64/e_fmod.c [new file with mode: 0644]

index c41dfd8..b867e4f 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,9 @@
 2011-10-23  Ulrich Drepper  <drepper@gmail.com>
 
+       * sysdeps/ieee754/dbl-64/e_fmod.c (__ieee754_fmod): Add some branch
+       prediction.
+       * sysdeps/ieee754/dbl-64/wordsize-64/e_fmod.c: New file.
+
        * string/strnlen.c: Don't define STRNLEN, reverse logic.
        Remove unused variable magic_bits.
        * sysdeps/i386/i686/multiarch/rtld-strnlen.c: New file.
index a575f61..0328b01 100644 (file)
@@ -1,4 +1,3 @@
-/* @(#)e_fmod.c 5.1 93/09/24 */
 /*
  * ====================================================
  * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
@@ -44,7 +43,7 @@ __ieee754_fmod (double x, double y)
        }
 
     /* determine ix = ilogb(x) */
-       if(hx<0x00100000) {     /* subnormal x */
+       if(__builtin_expect(hx<0x00100000, 0)) {        /* subnormal x */
            if(hx==0) {
                for (ix = -1043, i=lx; i>0; i<<=1) ix -=1;
            } else {
@@ -53,7 +52,7 @@ __ieee754_fmod (double x, double y)
        } else ix = (hx>>20)-1023;
 
     /* determine iy = ilogb(y) */
-       if(hy<0x00100000) {     /* subnormal y */
+       if(__builtin_expect(hy<0x00100000, 0)) {        /* subnormal y */
            if(hy==0) {
                for (iy = -1043, i=ly; i>0; i<<=1) iy -=1;
            } else {
@@ -62,7 +61,7 @@ __ieee754_fmod (double x, double y)
        } else iy = (hy>>20)-1023;
 
     /* set up {hx,lx}, {hy,ly} and align y to x */
-       if(ix >= -1022)
+       if(__builtin_expect(ix >= -1022, 1))
            hx = 0x00100000|(0x000fffff&hx);
        else {          /* subnormal x, shift x to normal */
            n = -1022-ix;
@@ -74,7 +73,7 @@ __ieee754_fmod (double x, double y)
                lx = 0;
            }
        }
-       if(iy >= -1022)
+       if(__builtin_expect(iy >= -1022, 1))
            hy = 0x00100000|(0x000fffff&hy);
        else {          /* subnormal y, shift y to normal */
            n = -1022-iy;
@@ -108,7 +107,7 @@ __ieee754_fmod (double x, double y)
            hx = hx+hx+(lx>>31); lx = lx+lx;
            iy -= 1;
        }
-       if(iy>= -1022) {        /* normalize output */
+       if(__builtin_expect(iy>= -1022, 1)) {   /* normalize output */
            hx = ((hx-0x00100000)|((iy+1023)<<20));
            INSERT_WORDS(x,hx|sx,lx);
        } else {                /* subnormal output */
diff --git a/sysdeps/ieee754/dbl-64/wordsize-64/e_fmod.c b/sysdeps/ieee754/dbl-64/wordsize-64/e_fmod.c
new file mode 100644 (file)
index 0000000..0e20571
--- /dev/null
@@ -0,0 +1,104 @@
+/* Rewritten for 64-bit machines by Ulrich Drepper <drepper@gmail.com>.  */
+/*
+ * ====================================================
+ * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+ *
+ * Developed at SunPro, a Sun Microsystems, Inc. business.
+ * Permission to use, copy, modify, and distribute this
+ * software is freely granted, provided that this notice
+ * is preserved.
+ * ====================================================
+ */
+
+/*
+ * __ieee754_fmod(x,y)
+ * Return x mod y in exact arithmetic
+ * Method: shift and subtract
+ */
+
+#include "math.h"
+#include "math_private.h"
+
+static const double one = 1.0, Zero[] = {0.0, -0.0,};
+
+double
+__ieee754_fmod (double x, double y)
+{
+       int32_t n,i,ix,iy;
+       int64_t hx,hy,hz,sx;
+
+       EXTRACT_WORDS64(hx,x);
+       EXTRACT_WORDS64(hy,y);
+       sx = hx&UINT64_C(0x8000000000000000);   /* sign of x */
+       hx ^=sx;                                /* |x| */
+       hy &= UINT64_C(0x7fffffffffffffff);     /* |y| */
+
+    /* purge off exception values */
+       if(__builtin_expect(hy==0
+                           || hx >= UINT64_C(0x7ff0000000000000)
+                           || hy > UINT64_C(0x7ff0000000000000), 0))
+         /* y=0,or x not finite or y is NaN */
+           return (x*y)/(x*y);
+       if(__builtin_expect(hx<=hy, 0)) {
+           if(hx<hy) return x; /* |x|<|y| return x */
+           return Zero[(uint64_t)sx>>63];      /* |x|=|y| return x*0*/
+       }
+
+    /* determine ix = ilogb(x) */
+       if(__builtin_expect(hx<UINT64_C(0x0010000000000000), 0)) {
+         /* subnormal x */
+         for (ix = -1022,i=(hx<<11); i>0; i<<=1) ix -=1;
+       } else ix = (hx>>52)-1023;
+
+    /* determine iy = ilogb(y) */
+       if(__builtin_expect(hy<UINT64_C(0x0010000000000000), 0)) {      /* subnormal y */
+         for (iy = -1022,i=(hy<<11); i>0; i<<=1) iy -=1;
+       } else iy = (hy>>52)-1023;
+
+    /* set up hx, hy and align y to x */
+       if(__builtin_expect(ix >= -1022, 1))
+           hx = UINT64_C(0x0010000000000000)|(UINT64_C(0x000fffffffffffff)&hx);
+       else {          /* subnormal x, shift x to normal */
+           n = -1022-ix;
+           hx<<=n;
+       }
+       if(__builtin_expect(iy >= -1022, 1))
+           hy = UINT64_C(0x0010000000000000)|(UINT64_C(0x000fffffffffffff)&hy);
+       else {          /* subnormal y, shift y to normal */
+           n = -1022-iy;
+           hy<<=n;
+       }
+
+    /* fix point fmod */
+       n = ix - iy;
+       while(n--) {
+           hz=hx-hy;
+           if(hz<0){hx = hx+hx;}
+           else {
+               if(hz==0)               /* return sign(x)*0 */
+                   return Zero[(uint64_t)sx>>63];
+               hx = hz+hz;
+           }
+       }
+       hz=hx-hy;
+       if(hz>=0) {hx=hz;}
+
+    /* convert back to floating value and restore the sign */
+       if(hx==0)                       /* return sign(x)*0 */
+           return Zero[(uint64_t)sx>>63];
+       while(hx<UINT64_C(0x0010000000000000)) {        /* normalize x */
+           hx = hx+hx;
+           iy -= 1;
+       }
+       if(__builtin_expect(iy>= -1022, 1)) {   /* normalize output */
+         hx = ((hx-UINT64_C(0x0010000000000000))|((uint64_t)(iy+1023)<<52));
+           INSERT_WORDS64(x,hx|sx);
+       } else {                /* subnormal output */
+           n = -1022 - iy;
+           hx>>=n;
+           INSERT_WORDS64(x,hx|sx);
+           x *= one;           /* create necessary signal */
+       }
+       return x;               /* exact output */
+}
+strong_alias (__ieee754_fmod, __fmod_finite)