From: Ulrich Drepper Date: Sat, 15 Oct 2011 03:41:47 +0000 (-0400) Subject: Optimize x86-32 log X-Git-Tag: glibc-2.15~240 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=38ad40ceca8ba35761e79cfce4aaef0d0f7583e6;p=platform%2Fupstream%2Fglibc.git Optimize x86-32 log --- diff --git a/ChangeLog b/ChangeLog index 499050b..e2d6f38 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,12 @@ 2011-10-14 Ulrich Drepper + * sysdeps/i386/fpu/e_log.S: Add real definition of __log_finite. + * sysdeps/i386/fpu/e_logf.S: Add real definition of __logf_finite. + * sysdeps/i386/fpu/e_logl.S: Add real definition of __logl_finite. + * sysdeps/i386/i686/fpu/e_log.S: New file. + * sysdeps/i386/i686/fpu/e_logf.S: New file. + * sysdeps/i386/i686/fpu/e_logl.S: New file. + * ctype/ctype.h: Add support for inlined isXXX functions when compiling C++ code. diff --git a/sysdeps/i386/fpu/e_log.S b/sysdeps/i386/fpu/e_log.S index 8110a84..a2e4d89 100644 --- a/sysdeps/i386/fpu/e_log.S +++ b/sysdeps/i386/fpu/e_log.S @@ -62,4 +62,22 @@ ENTRY(__ieee754_log) fstp %st(1) ret END (__ieee754_log) -strong_alias (__ieee754_log, __log_finite) + +ENTRY(__log_finite) + fldln2 // log(2) + fldl 4(%esp) // x : log(2) +#ifdef PIC + LOAD_PIC_REG (dx) +#endif + fld %st // x : x : log(2) + fsubl MO(one) // x-1 : x : log(2) + fld %st // x-1 : x-1 : x : log(2) + fabs // |x-1| : x-1 : x : log(2) + fcompl MO(limit) // x-1 : x : log(2) + fnstsw // x-1 : x : log(2) + andb $0x45, %ah + jz 2b + fstp %st(1) // x-1 : log(2) + fyl2xp1 // log(x) + ret +END(__log_finite) diff --git a/sysdeps/i386/fpu/e_logf.S b/sysdeps/i386/fpu/e_logf.S index b683e13..1992cc2 100644 --- a/sysdeps/i386/fpu/e_logf.S +++ b/sysdeps/i386/fpu/e_logf.S @@ -63,4 +63,22 @@ ENTRY(__ieee754_logf) fstp %st(1) ret END (__ieee754_logf) -strong_alias (__ieee754_logf, __logf_finite) + +ENTRY(__logf_finite) + fldln2 // log(2) + flds 4(%esp) // x : log(2) +#ifdef PIC + LOAD_PIC_REG (dx) +#endif + fld %st // x : x : log(2) + fsubl MO(one) // x-1 : x : log(2) + fld %st // x-1 : x-1 : x : log(2) + fabs // |x-1| : x-1 : x : log(2) + fcompl MO(limit) // x-1 : x : log(2) + fnstsw // x-1 : x : log(2) + andb $0x45, %ah + jz 2b + fstp %st(1) // x-1 : log(2) + fyl2xp1 // log(x) + ret +END(__logf_finite) diff --git a/sysdeps/i386/fpu/e_logl.S b/sysdeps/i386/fpu/e_logl.S index ee1fb16..bfb72a3 100644 --- a/sysdeps/i386/fpu/e_logl.S +++ b/sysdeps/i386/fpu/e_logl.S @@ -63,4 +63,22 @@ ENTRY(__ieee754_logl) fstp %st(1) ret END (__ieee754_logl) -strong_alias (__ieee754_logl, __logl_finite) + +ENTRY(__logl_finite) + fldln2 // log(2) + fldt 4(%esp) // x : log(2) +#ifdef PIC + LOAD_PIC_REG (dx) +#endif + fld %st // x : x : log(2) + fsubl MO(one) // x-1 : x : log(2) + fld %st // x-1 : x-1 : x : log(2) + fabs // |x-1| : x-1 : x : log(2) + fcompl MO(limit) // x-1 : x : log(2) + fnstsw // x-1 : x : log(2) + andb $0x45, %ah + jz 2b + fstp %st(1) // x-1 : log(2) + fyl2xp1 // log(x) + ret +END(__logl_finite) diff --git a/sysdeps/i386/i686/fpu/e_log.S b/sysdeps/i386/i686/fpu/e_log.S new file mode 100644 index 0000000..c6524b1 --- /dev/null +++ b/sysdeps/i386/i686/fpu/e_log.S @@ -0,0 +1,80 @@ +/* + * Written by J.T. Conklin . + * Public domain. + * + * Changed to use fyl2xp1 for values near 1, . + * Adapted for i686 instructions. + */ + +#include + +#ifdef __ELF__ + .section .rodata.cst8,"aM",@progbits,8 +#else + .text +#endif + .p2align 3 + ASM_TYPE_DIRECTIVE(one,@object) +one: .double 1.0 + ASM_SIZE_DIRECTIVE(one) + /* It is not important that this constant is precise. It is only + a value which is known to be on the safe side for using the + fyl2xp1 instruction. */ + ASM_TYPE_DIRECTIVE(limit,@object) +limit: .double 0.29 + ASM_SIZE_DIRECTIVE(limit) + + +#ifdef PIC +# define MO(op) op##@GOTOFF(%edx) +#else +# define MO(op) op +#endif + + .text +ENTRY(__ieee754_log) + fldln2 // log(2) + fldl 4(%esp) // x : log(2) + fucomi %st + jp 3f +#ifdef PIC + LOAD_PIC_REG (dx) +#endif + fld %st // x : x : log(2) + fsubl MO(one) // x-1 : x : log(2) + fld %st // x-1 : x-1 : x : log(2) + fabs // |x-1| : x-1 : x : log(2) + fld MO(limit) // 0.29 : |x-1| : x-1 : x : log(2) + fcomip %st(1) // |x-1| : x-1 : x : log(2) + fstp %st(0) // x-1 : x : log(2) + jc 2f + fstp %st(1) // x-1 : log(2) + fyl2xp1 // log(x) + ret + +2: fstp %st(0) // x : log(2) + fyl2x // log(x) + ret + +3: fstp %st(1) + ret +END (__ieee754_log) + +ENTRY(__log_finite) + fldln2 // log(2) + fldl 4(%esp) // x : log(2) +#ifdef PIC + LOAD_PIC_REG (dx) +#endif + fld %st // x : x : log(2) + fsubl MO(one) // x-1 : x : log(2) + fld %st // x-1 : x-1 : x : log(2) + fabs // |x-1| : x-1 : x : log(2) + fld MO(limit) // 0.29 : |x-1| : x-1 : x : log(2) + fcomip %st(1) // |x-1| : x-1 : x : log(2) + fstp %st(0) // x-1 : x : log(2) + jc 2b + fstp %st(1) // x-1 : log(2) + fyl2xp1 // log(x) + ret +END(__log_finite) diff --git a/sysdeps/i386/i686/fpu/e_logf.S b/sysdeps/i386/i686/fpu/e_logf.S new file mode 100644 index 0000000..64f8807 --- /dev/null +++ b/sysdeps/i386/i686/fpu/e_logf.S @@ -0,0 +1,81 @@ +/* + * Written by J.T. Conklin . + * Public domain. + * Adapted for float by Ulrich Drepper . + * + * Changed to use fyl2xp1 for values near 1, . + * Adapted for i686 instructions. + */ + +#include + +#ifdef __ELF__ + .section .rodata.cst8,"aM",@progbits,8 +#else + .text +#endif + .p2align 3 + ASM_TYPE_DIRECTIVE(one,@object) +one: .double 1.0 + ASM_SIZE_DIRECTIVE(one) + /* It is not important that this constant is precise. It is only + a value which is known to be on the safe side for using the + fyl2xp1 instruction. */ + ASM_TYPE_DIRECTIVE(limit,@object) +limit: .double 0.29 + ASM_SIZE_DIRECTIVE(limit) + + +#ifdef PIC +# define MO(op) op##@GOTOFF(%edx) +#else +# define MO(op) op +#endif + + .text +ENTRY(__ieee754_logf) + fldln2 // log(2) + flds 4(%esp) // x : log(2) + fucomi %st + jp 3f +#ifdef PIC + LOAD_PIC_REG (dx) +#endif + fld %st // x : x : log(2) + fsubl MO(one) // x-1 : x : log(2) + fld %st // x-1 : x-1 : x : log(2) + fabs // |x-1| : x-1 : x : log(2) + fld MO(limit) // 0.29 : |x-1| : x-1 : x : log(2) + fcomip %st(1) // |x-1| : x-1 : x : log(2) + fstp %st(0) // x-1 : x : log(2) + jc 2f + fstp %st(1) // x-1 : log(2) + fyl2xp1 // log(x) + ret + +2: fstp %st(0) // x : log(2) + fyl2x // log(x) + ret + +3: fstp %st(1) + ret +END (__ieee754_logf) + +ENTRY(__logf_finite) + fldln2 // log(2) + flds 4(%esp) // x : log(2) +#ifdef PIC + LOAD_PIC_REG (dx) +#endif + fld %st // x : x : log(2) + fsubl MO(one) // x-1 : x : log(2) + fld %st // x-1 : x-1 : x : log(2) + fabs // |x-1| : x-1 : x : log(2) + fld MO(limit) // 0.29 : |x-1| : x-1 : x : log(2) + fcomip %st(1) // |x-1| : x-1 : x : log(2) + fstp %st(0) // x-1 : x : log(2) + jc 2b + fstp %st(1) // x-1 : log(2) + fyl2xp1 // log(x) + ret +END(__logf_finite) diff --git a/sysdeps/i386/i686/fpu/e_logl.S b/sysdeps/i386/i686/fpu/e_logl.S new file mode 100644 index 0000000..4e79a5a --- /dev/null +++ b/sysdeps/i386/i686/fpu/e_logl.S @@ -0,0 +1,81 @@ +/* + * Written by J.T. Conklin . + * Public domain. + * + * Adapted for `long double' by Ulrich Drepper . + * Changed to use fyl2xp1 for values near 1, . + * Adapted for i686 instructions. + */ + +#include + +#ifdef __ELF__ + .section .rodata.cst8,"aM",@progbits,8 +#else + .text +#endif + .p2align 3 + ASM_TYPE_DIRECTIVE(one,@object) +one: .double 1.0 + ASM_SIZE_DIRECTIVE(one) + /* It is not important that this constant is precise. It is only + a value which is known to be on the safe side for using the + fyl2xp1 instruction. */ + ASM_TYPE_DIRECTIVE(limit,@object) +limit: .double 0.29 + ASM_SIZE_DIRECTIVE(limit) + + +#ifdef PIC +# define MO(op) op##@GOTOFF(%edx) +#else +# define MO(op) op +#endif + + .text +ENTRY(__ieee754_logl) + fldln2 // log(2) + fldt 4(%esp) // x : log(2) + fucomi %st + jp 3f +#ifdef PIC + LOAD_PIC_REG (dx) +#endif + fld %st // x : x : log(2) + fsubl MO(one) // x-1 : x : log(2) + fld %st // x-1 : x-1 : x : log(2) + fabs // |x-1| : x-1 : x : log(2) + fld MO(limit) // 0.29 : |x-1| : x-1 : x : log(2) + fcomip %st(1) // |x-1| : x-1 : x : log(2) + fstp %st(0) // x-1 : x : log(2) + jc 2f + fstp %st(1) // x-1 : log(2) + fyl2xp1 // log(x) + ret + +2: fstp %st(0) // x : log(2) + fyl2x // log(x) + ret + +3: fstp %st(1) + ret +END (__ieee754_logl) + +ENTRY(__logl_finite) + fldln2 // log(2) + fldt 4(%esp) // x : log(2) +#ifdef PIC + LOAD_PIC_REG (dx) +#endif + fld %st // x : x : log(2) + fsubl MO(one) // x-1 : x : log(2) + fld %st // x-1 : x-1 : x : log(2) + fabs // |x-1| : x-1 : x : log(2) + fld MO(limit) // 0.29 : |x-1| : x-1 : x : log(2) + fcomip %st(1) // |x-1| : x-1 : x : log(2) + fstp %st(0) // x-1 : x : log(2) + jc 2b + fstp %st(1) // x-1 : log(2) + fyl2xp1 // log(x) + ret +END(__logl_finite)