From bcf01e6d800e837622ddbc851b42b55fa99e5636 Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Sat, 15 Oct 2011 20:22:59 -0400 Subject: [PATCH] Optimize exp Add __exp*_finite optimizations and rewrite some wrappers. --- ChangeLog | 15 ++++++ math/Versions | 1 + math/bits/math-finite.h | 7 +++ math/e_expl.c | 1 + sysdeps/i386/fpu/e_exp.S | 17 ++++++- sysdeps/i386/fpu/e_expf.S | 17 ++++++- sysdeps/i386/fpu/e_expl.c | 3 +- sysdeps/ieee754/dbl-64/e_exp.c | 3 +- sysdeps/ieee754/dbl-64/w_exp.c | 79 ++++++++++++++----------------- sysdeps/ieee754/flt-32/e_expf.c | 7 +-- sysdeps/ieee754/flt-32/w_expf.c | 90 +++++++++++++++--------------------- sysdeps/ieee754/ldbl-128/e_expl.c | 5 +- sysdeps/ieee754/ldbl-128ibm/e_expl.c | 3 +- sysdeps/ieee754/ldbl-96/w_expl.c | 89 +++++++++++++++-------------------- 14 files changed, 180 insertions(+), 157 deletions(-) diff --git a/ChangeLog b/ChangeLog index 44580a5..0b276d3 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,20 @@ 2011-10-15 Ulrich Drepper + * math/Versions [libm] (GLIBC_2.15): Add __exp_finite, __expf_finite, + __expl_finite. + * math/bits/math-finite.h: Add entries for exp. + * math/e_expl.c: Add __*_finite alias. + * sysdeps/i386/fpu/e_exp.S: Likewise. + * sysdeps/i386/fpu/e_expf.S: Likewise. + * sysdeps/i386/fpu/e_expl.c: Likewise. + * sysdeps/ieee754/dbl-64/e_exp.c: Likewise. + * sysdeps/ieee754/flt-32/e_expf.c: Likewise. + * sysdeps/ieee754/ldbl-128/e_expl.c: Likewise. + * sysdeps/ieee754/ldbl-128ibm/e_expl.c: Likewise. + * sysdeps/ieee754/dbl-64/w_exp.c: Complete rewrite. + * sysdeps/ieee754/flt-32/w_expf.c: Likewise. + * sysdeps/ieee754/ldbl-96/w_expl.c: Likewise. + * sysdeps/i386/i686/fpu/e_logf.S: No need for the fyl2xp1 use, fyl2x is sufficient, at least on modern CPUs. diff --git a/math/Versions b/math/Versions index 66bf460..0988851 100644 --- a/math/Versions +++ b/math/Versions @@ -196,5 +196,6 @@ libm { __sinh_finite; __sinhf_finite; __sinhl_finite; __sqrt_finite; __sqrtf_finite; __sqrtl_finite; __gamma_r_finite; __gammaf_r_finite; __gammal_r_finite; + __exp_finite; __expf_finite; __expl_finite; } } diff --git a/math/bits/math-finite.h b/math/bits/math-finite.h index c6b9527..7443d26 100644 --- a/math/bits/math-finite.h +++ b/math/bits/math-finite.h @@ -67,6 +67,13 @@ extern float coshf (float) __asm__ ("__coshf_finite"); extern long double coshl (long double) __asm__ ("__coshl_finite"); #endif +/* exp. */ +extern double exp (double) __asm__ ("__exp_finite"); +extern float expf (float) __asm__ ("__expf_finite"); +#ifdef __MATH_DECLARE_LDOUBLE +extern long double expl (long double) __asm__ ("__expl_finite"); +#endif + #ifdef __USE_GNU /* exp10. */ extern double exp10 (double) __asm__ ("__exp10_finite"); diff --git a/math/e_expl.c b/math/e_expl.c index f9467c3..5ba6eb5 100644 --- a/math/e_expl.c +++ b/math/e_expl.c @@ -9,6 +9,7 @@ __ieee754_expl (long double x) __set_errno (ENOSYS); return 0.0; } +strong_alias (__ieee754_expl, __expl_finite) stub_warning (expl) #include diff --git a/sysdeps/i386/fpu/e_exp.S b/sysdeps/i386/fpu/e_exp.S index 4a75fa1..2c331d9 100644 --- a/sysdeps/i386/fpu/e_exp.S +++ b/sysdeps/i386/fpu/e_exp.S @@ -5,7 +5,6 @@ #include -RCSID("$NetBSD: e_exp.S,v 1.7 1996/07/03 17:31:28 jtc Exp $") /* e^x = 2^(x * log2(e)) */ ENTRY(__ieee754_exp) @@ -39,3 +38,19 @@ ENTRY(__ieee754_exp) fldz /* Set result to 0. */ 2: ret END (__ieee754_exp) + + +ENTRY(__exp_finite) + fldl2e + fmull 4(%esp) /* x * log2(e) */ + fld %st + frndint /* int(x * log2(e)) */ + fsubr %st,%st(1) /* fract(x * log2(e)) */ + fxch + f2xm1 /* 2^(fract(x * log2(e))) - 1 */ + fld1 + faddp /* 2^(fract(x * log2(e))) */ + fscale /* e^x */ + fstp %st(1) + ret +END(__exp_finite) diff --git a/sysdeps/i386/fpu/e_expf.S b/sysdeps/i386/fpu/e_expf.S index 5fd49b8..4e4f6a0 100644 --- a/sysdeps/i386/fpu/e_expf.S +++ b/sysdeps/i386/fpu/e_expf.S @@ -6,7 +6,6 @@ #include -RCSID("$NetBSD: $") /* e^x = 2^(x * log2(e)) */ ENTRY(__ieee754_expf) @@ -40,3 +39,19 @@ ENTRY(__ieee754_expf) fldz /* Set result to 0. */ 2: ret END (__ieee754_expf) + + +ENTRY(__expf_finite) + fldl2e + fmuls 4(%esp) /* x * log2(e) */ + fld %st + frndint /* int(x * log2(e)) */ + fsubr %st,%st(1) /* fract(x * log2(e)) */ + fxch + f2xm1 /* 2^(fract(x * log2(e))) - 1 */ + fld1 + faddp /* 2^(fract(x * log2(e))) */ + fscale /* e^x */ + fstp %st(1) + ret +END(__expf_finite) diff --git a/sysdeps/i386/fpu/e_expl.c b/sysdeps/i386/fpu/e_expl.c index 2240cea..8dc9581 100644 --- a/sysdeps/i386/fpu/e_expl.c +++ b/sysdeps/i386/fpu/e_expl.c @@ -63,7 +63,7 @@ __ieee754_expl (long double x) "fld1\n\t" /* 4 1.0 */ "faddp\n\t" /* 3 2^(fract(x * log2(e))) */ "fstp %%st(1)\n\t" /* 2 */ - "fscale\n\t" /* 2 scale factor is st(1); e^x */ + "fscale\n\t" /* 2 scale factor is st(1); e^x */ "fstp %%st(1)\n\t" /* 1 */ "fstp %%st(1)\n\t" /* 0 */ "jmp 2f\n\t" @@ -75,3 +75,4 @@ __ieee754_expl (long double x) : "=t" (res) : "0" (x), "m" (c0), "m" (c1) : "ax", "dx"); return res; } +strong_alias (__ieee754_expl, __expl_finite) diff --git a/sysdeps/ieee754/dbl-64/e_exp.c b/sysdeps/ieee754/dbl-64/e_exp.c index 717469e..f4b34a6 100644 --- a/sysdeps/ieee754/dbl-64/e_exp.c +++ b/sysdeps/ieee754/dbl-64/e_exp.c @@ -1,7 +1,7 @@ /* * IBM Accurate Mathematical Library * written by International Business Machines Corp. - * Copyright (C) 2001 Free Software Foundation + * Copyright (C) 2001, 2011 Free Software Foundation * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by @@ -145,6 +145,7 @@ double __ieee754_exp(double x) { else return __slowexp(x); } } +strong_alias (__ieee754_exp, __exp_finite) /************************************************************************/ /* Compute e^(x+xx)(Double-Length number) .The routine also receive */ diff --git a/sysdeps/ieee754/dbl-64/w_exp.c b/sysdeps/ieee754/dbl-64/w_exp.c index 1216492..f1becff 100644 --- a/sysdeps/ieee754/dbl-64/w_exp.c +++ b/sysdeps/ieee754/dbl-64/w_exp.c @@ -1,55 +1,46 @@ -/* @(#)w_exp.c 5.1 93/09/24 */ -/* - * ==================================================== - * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. - * - * Developed at SunPro, a Sun Microsystems, Inc. business. - * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice - * is preserved. - * ==================================================== - */ - -#if defined(LIBM_SCCS) && !defined(lint) -static char rcsid[] = "$NetBSD: w_exp.c,v 1.6 1995/05/10 20:48:51 jtc Exp $"; -#endif +/* Copyright (C) 2011 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper , 2011. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. -/* - * wrapper exp(x) - */ + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. -#include "math.h" -#include "math_private.h" + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include +#include -#ifdef __STDC__ static const double -#else -static double -#endif o_threshold= 7.09782712893383973096e+02, /* 0x40862E42, 0xFEFA39EF */ u_threshold= -7.45133219101941108420e+02; /* 0xc0874910, 0xD52D3051 */ -#ifdef __STDC__ - double __exp(double x) /* wrapper exp */ -#else - double __exp(x) /* wrapper exp */ - double x; -#endif + +/* wrapper exp */ +double +__exp (double x) { -#ifdef _IEEE_LIBM - return __ieee754_exp(x); -#else - double z; - z = __ieee754_exp(x); - if(_LIB_VERSION == _IEEE_) return z; - if(__finite(x)) { - if(x>o_threshold) - return __kernel_standard(x,x,6); /* exp overflow */ - else if(x o_threshold, 0)) + { + if (_LIB_VERSION != _IEEE_) + return __kernel_standard_f (x, x, 6); + } + else if (__builtin_expect (x < u_threshold, 0)) + { + if (_LIB_VERSION != _IEEE_) + return __kernel_standard_f (x, x, 7); + } + + return __ieee754_exp (x); } hidden_def (__exp) weak_alias (__exp, exp) diff --git a/sysdeps/ieee754/flt-32/e_expf.c b/sysdeps/ieee754/flt-32/e_expf.c index b9cd53c..872d34b 100644 --- a/sysdeps/ieee754/flt-32/e_expf.c +++ b/sysdeps/ieee754/flt-32/e_expf.c @@ -1,5 +1,5 @@ /* Single-precision floating point e^x. - Copyright (C) 1997, 1998, 2005, 2006 Free Software Foundation, Inc. + Copyright (C) 1997, 1998, 2005, 2006, 2011 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Geoffrey Keating @@ -33,8 +33,8 @@ Then e^x is approximated as e^x = 2^n ( e^(t/512 + delta[t]) - + ( e^(t/512 + delta[t]) - * ( p(x + delta[t] + n * ln(2)) - delta ) ) ) + + ( e^(t/512 + delta[t]) + * ( p(x + delta[t] + n * ln(2)) - delta ) ) ) where - p(x) is a polynomial approximating e(x)-1; @@ -138,3 +138,4 @@ __ieee754_expf (float x) /* Return x, if x is a NaN or Inf; or overflow, otherwise. */ return TWO127*x; } +strong_alias (__ieee754_expf, __expf_finite) diff --git a/sysdeps/ieee754/flt-32/w_expf.c b/sysdeps/ieee754/flt-32/w_expf.c index 83b268f..151c584 100644 --- a/sysdeps/ieee754/flt-32/w_expf.c +++ b/sysdeps/ieee754/flt-32/w_expf.c @@ -1,60 +1,46 @@ -/* w_expf.c -- float version of w_exp.c. - * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. - */ - -/* - * ==================================================== - * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. - * - * Developed at SunPro, a Sun Microsystems, Inc. business. - * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice - * is preserved. - * ==================================================== - */ - -#if defined(LIBM_SCCS) && !defined(lint) -static char rcsid[] = "$NetBSD: w_expf.c,v 1.3 1995/05/10 20:48:53 jtc Exp $"; -#endif - -/* - * wrapper expf(x) - */ - -#include "math.h" -#include "math_private.h" - -#ifdef __STDC__ +/* Copyright (C) 2011 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper , 2011. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include +#include + static const float -#else -static float -#endif o_threshold= 8.8722831726e+01, /* 0x42b17217 */ u_threshold= -1.0397208405e+02; /* 0xc2cff1b5 */ -#ifdef __STDC__ - float __expf(float x) /* wrapper expf */ -#else - float __expf(x) /* wrapper expf */ - float x; -#endif + +/* wrapper expf */ +float +__expf (float x) { -#ifdef _IEEE_LIBM - return __ieee754_expf(x); -#else - float z; - z = __ieee754_expf(x); - if(_LIB_VERSION == _IEEE_) return z; - if(__finitef(x)) { - if(x>o_threshold) - /* exp overflow */ - return (float)__kernel_standard((double)x,(double)x,106); - else if(x o_threshold, 0)) + { + if (_LIB_VERSION != _IEEE_) + return __kernel_standard_f (x, x, 106); + } + else if (__builtin_expect (x < u_threshold, 0)) + { + if (_LIB_VERSION != _IEEE_) + return __kernel_standard_f (x, x, 107); + } + + return __ieee754_expf (x); } hidden_def (__expf) weak_alias (__expf, expf) diff --git a/sysdeps/ieee754/ldbl-128/e_expl.c b/sysdeps/ieee754/ldbl-128/e_expl.c index 31ff16f..0279e77 100644 --- a/sysdeps/ieee754/ldbl-128/e_expl.c +++ b/sysdeps/ieee754/ldbl-128/e_expl.c @@ -1,5 +1,5 @@ /* Quad-precision floating point e^x. - Copyright (C) 1999 Free Software Foundation, Inc. + Copyright (C) 1999, 2011 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Jakub Jelinek Partly based on double-precision code @@ -73,7 +73,7 @@ static const long double C[] = { /* Smallest integer x for which e^x overflows. */ #define himark C[0] 11356.523406294143949491931077970765L, - + /* Largest integer x for which e^x underflows. */ #define lomark C[1] -11433.4627433362978788372438434526231L, @@ -247,3 +247,4 @@ __ieee754_expl (long double x) /* Return x, if x is a NaN or Inf; or overflow, otherwise. */ return TWO16383*x; } +strong_alias (__ieee754_expl, __expl_finite) diff --git a/sysdeps/ieee754/ldbl-128ibm/e_expl.c b/sysdeps/ieee754/ldbl-128ibm/e_expl.c index daf2cba..9e03eae 100644 --- a/sysdeps/ieee754/ldbl-128ibm/e_expl.c +++ b/sysdeps/ieee754/ldbl-128ibm/e_expl.c @@ -1,5 +1,5 @@ /* Quad-precision floating point e^x. - Copyright (C) 1999,2004,2006, 2008 Free Software Foundation, Inc. + Copyright (C) 1999,2004,2006, 2008, 2011 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Jakub Jelinek Partly based on double-precision code @@ -255,3 +255,4 @@ __ieee754_expl (long double x) /* Return x, if x is a NaN or Inf; or overflow, otherwise. */ return TWO1023*x; } +strong_alias (__ieee754_expl, __expl_finite) diff --git a/sysdeps/ieee754/ldbl-96/w_expl.c b/sysdeps/ieee754/ldbl-96/w_expl.c index 53bb143..703a0a2 100644 --- a/sysdeps/ieee754/ldbl-96/w_expl.c +++ b/sysdeps/ieee754/ldbl-96/w_expl.c @@ -1,61 +1,48 @@ -/* w_expl.c -- long double version of w_exp.c. - * Conversion to long double by Ulrich Drepper, - * Cygnus Support, drepper@cygnus.com. - */ - -/* - * ==================================================== - * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. - * - * Developed at SunPro, a Sun Microsystems, Inc. business. - * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice - * is preserved. - * ==================================================== - */ - -#if defined(LIBM_SCCS) && !defined(lint) -static char rcsid[] = "$NetBSD: $"; -#endif - -/* - * wrapper expl(x) - */ - -#include "math.h" -#include "math_private.h" - -#ifdef __STDC__ +/* Copyright (C) 2011 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper , 2011. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include +#include + static const long double -#else -static long double -#endif o_threshold= 1.135652340629414394949193107797076489134e4, /* 0x400C, 0xB17217F7, 0xD1CF79AC */ u_threshold= -1.140019167866942050398521670162263001513e4; /* 0x400C, 0xB220C447, 0x69C201E8 */ -#ifdef __STDC__ - long double __expl(long double x) /* wrapper exp */ -#else - long double __expl(x) /* wrapper exp */ - long double x; -#endif + +/* wrapper expl */ +long double +__expl (long double x) { -#ifdef _IEEE_LIBM - return __ieee754_expl(x); -#else - long double z; - z = __ieee754_expl(x); - if(_LIB_VERSION == _IEEE_) return z; - if(__finitel(x)) { - if(x>o_threshold) - return __kernel_standard(x,x,206); /* exp overflow */ - else if(x o_threshold, 0)) + { + if (_LIB_VERSION != _IEEE_) + return __kernel_standard (x, x, 206); + } + else if (__builtin_expect (x < u_threshold, 0)) + { + if (_LIB_VERSION != _IEEE_) + return __kernel_standard (x, x, 207); + } + + return __ieee754_expl (x); } hidden_def (__expl) weak_alias (__expl, expl) -- 2.7.4