From ad0f5cad15f1c76faf3843b3e189dead2c05cfcc Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Sun, 16 Oct 2011 20:58:17 -0400 Subject: [PATCH] Use rounds{s,d} for x86 rint, ceil, floor --- ChangeLog | 29 +++++++++ elf/do-rel.h | 73 +++++++++++++++++++++-- sysdeps/ieee754/dbl-64/s_ceil.c | 20 ++----- sysdeps/ieee754/dbl-64/s_floor.c | 14 ++--- sysdeps/ieee754/dbl-64/s_rint.c | 26 +++----- sysdeps/ieee754/dbl-64/wordsize-64/s_ceil.c | 16 ++--- sysdeps/ieee754/dbl-64/wordsize-64/s_floor.c | 4 +- sysdeps/ieee754/dbl-64/wordsize-64/s_rint.c | 17 ++---- sysdeps/ieee754/flt-32/s_ceilf.c | 27 +++------ sysdeps/ieee754/flt-32/s_floorf.c | 26 +++----- sysdeps/ieee754/flt-32/s_rintf.c | 28 +++------ sysdeps/x86_64/dl-machine.h | 4 ++ sysdeps/x86_64/fpu/bits/mathinline.h | 89 ++++++++++++++++++++++++---- sysdeps/x86_64/fpu/multiarch/Makefile | 4 ++ sysdeps/x86_64/fpu/multiarch/s_ceil-c.c | 2 + sysdeps/x86_64/fpu/multiarch/s_ceil.S | 40 +++++++++++++ sysdeps/x86_64/fpu/multiarch/s_ceilf-c.c | 2 + sysdeps/x86_64/fpu/multiarch/s_ceilf.S | 40 +++++++++++++ sysdeps/x86_64/fpu/multiarch/s_floor-c.c | 2 + sysdeps/x86_64/fpu/multiarch/s_floor.S | 40 +++++++++++++ sysdeps/x86_64/fpu/multiarch/s_floorf-c.c | 2 + sysdeps/x86_64/fpu/multiarch/s_floorf.S | 40 +++++++++++++ sysdeps/x86_64/fpu/multiarch/s_rint-c.c | 2 + sysdeps/x86_64/fpu/multiarch/s_rint.S | 40 +++++++++++++ sysdeps/x86_64/fpu/multiarch/s_rintf-c.c | 2 + sysdeps/x86_64/fpu/multiarch/s_rintf.S | 40 +++++++++++++ 26 files changed, 495 insertions(+), 134 deletions(-) create mode 100644 sysdeps/x86_64/fpu/multiarch/Makefile create mode 100644 sysdeps/x86_64/fpu/multiarch/s_ceil-c.c create mode 100644 sysdeps/x86_64/fpu/multiarch/s_ceil.S create mode 100644 sysdeps/x86_64/fpu/multiarch/s_ceilf-c.c create mode 100644 sysdeps/x86_64/fpu/multiarch/s_ceilf.S create mode 100644 sysdeps/x86_64/fpu/multiarch/s_floor-c.c create mode 100644 sysdeps/x86_64/fpu/multiarch/s_floor.S create mode 100644 sysdeps/x86_64/fpu/multiarch/s_floorf-c.c create mode 100644 sysdeps/x86_64/fpu/multiarch/s_floorf.S create mode 100644 sysdeps/x86_64/fpu/multiarch/s_rint-c.c create mode 100644 sysdeps/x86_64/fpu/multiarch/s_rint.S create mode 100644 sysdeps/x86_64/fpu/multiarch/s_rintf-c.c create mode 100644 sysdeps/x86_64/fpu/multiarch/s_rintf.S diff --git a/ChangeLog b/ChangeLog index 9a792f7..c71959c 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,34 @@ 2011-10-16 Ulrich Drepper + * sysdeps/ieee754/dbl-64/s_ceil.c: Avoid alias renamed. + * sysdeps/ieee754/dbl-64/s_floor.c: Likewise. + * sysdeps/ieee754/dbl-64/s_rint.c: Likewise. + * sysdeps/ieee754/dbl-64/wordsize-64/s_ceil.c: Likewise. + * sysdeps/ieee754/dbl-64/wordsize-64/s_floor.c: Likewise. + * sysdeps/ieee754/dbl-64/wordsize-64/s_rint.c: Likewise. + * sysdeps/ieee754/flt-32/s_ceilf.c: Likewise. + * sysdeps/ieee754/flt-32/s_floorf.c: Likewise. + * sysdeps/ieee754/flt-32/s_rintf.c: Likewise. + * sysdeps/x86_64/fpu/multiarch/Makefile: New file. + * sysdeps/x86_64/fpu/multiarch/s_ceil-c.c: New file. + * sysdeps/x86_64/fpu/multiarch/s_ceil.S: New file. + * sysdeps/x86_64/fpu/multiarch/s_ceilf-c.c: New file. + * sysdeps/x86_64/fpu/multiarch/s_ceilf.S: New file. + * sysdeps/x86_64/fpu/multiarch/s_floor-c.c: New file. + * sysdeps/x86_64/fpu/multiarch/s_floor.S: New file. + * sysdeps/x86_64/fpu/multiarch/s_floorf-c.c: New file. + * sysdeps/x86_64/fpu/multiarch/s_floorf.S: New file. + * sysdeps/x86_64/fpu/multiarch/s_rint-c.c: New file. + * sysdeps/x86_64/fpu/multiarch/s_rint.S: New file. + * sysdeps/x86_64/fpu/multiarch/s_rintf-c.c: New file. + * sysdeps/x86_64/fpu/multiarch/s_rintf.S: New file. + + * sysdeps/x86_64/fpu/bits/mathinline.h: Add inlines for rint, rintf, + ceil, ceilf, floor, floorf. + + * elf/do-rel.h (elf_dynamic_do_Rel): Work around linker problem. + Perform IRELATIVE relocations last. + * elf/do-rel.h: Add another parameter nrelative, replacing the local variable with the same name. Change name of the function to end in Rel or Rela (uppercase). diff --git a/elf/do-rel.h b/elf/do-rel.h index 05c03f7..3f8e7eb 100644 --- a/elf/do-rel.h +++ b/elf/do-rel.h @@ -55,6 +55,10 @@ elf_dynamic_do_Rel (struct link_map *map, const ElfW(Rel) *r = (const void *) reladdr; const ElfW(Rel) *end = (const void *) (reladdr + relsize); ElfW(Addr) l_addr = map->l_addr; +# if defined ELF_MACHINE_IRELATIVE && !defined RTLD_BOOTSTRAP + const ElfW(Rel) *r2 = NULL; + const ElfW(Rel) *end2 = NULL; +# endif #if (!defined DO_RELA || !defined ELF_MACHINE_PLT_REL) && !defined RTLD_BOOTSTRAP /* We never bind lazily during ld.so bootstrap. Unfortunately gcc is @@ -64,7 +68,23 @@ elf_dynamic_do_Rel (struct link_map *map, { /* Doing lazy PLT relocations; they need very little info. */ for (; r < end; ++r) - elf_machine_lazy_rel (map, l_addr, r, skip_ifunc); +# ifdef ELF_MACHINE_IRELATIVE + if (ELFW(R_TYPE) (r->r_info) == ELF_MACHINE_IRELATIVE) + { + if (r2 == NULL) + r2 = r; + end2 = r; + } + else +# endif + elf_machine_lazy_rel (map, l_addr, r, skip_ifunc); + +# ifdef ELF_MACHINE_IRELATIVE + if (r2 != NULL) + for (; r2 <= end2; ++r2) + if (ELFW(R_TYPE) (r2->r_info) == ELF_MACHINE_IRELATIVE) + elf_machine_lazy_rel (map, l_addr, r2, skip_ifunc); +# endif } else #endif @@ -112,17 +132,62 @@ elf_dynamic_do_Rel (struct link_map *map, for (; r < end; ++r) { +#if defined ELF_MACHINE_IRELATIVE && !defined RTLD_BOOTSTRAP + if (ELFW(R_TYPE) (r->r_info) == ELF_MACHINE_IRELATIVE) + { + if (r2 == NULL) + r2 = r; + end2 = r; + continue; + } +#endif + ElfW(Half) ndx = version[ELFW(R_SYM) (r->r_info)] & 0x7fff; elf_machine_rel (map, r, &symtab[ELFW(R_SYM) (r->r_info)], &map->l_versions[ndx], (void *) (l_addr + r->r_offset), skip_ifunc); } + +#if defined ELF_MACHINE_IRELATIVE && !defined RTLD_BOOTSTRAP + if (r2 != NULL) + for (; r2 <= end2; ++r2) + if (ELFW(R_TYPE) (r2->r_info) == ELF_MACHINE_IRELATIVE) + { + ElfW(Half) ndx + = version[ELFW(R_SYM) (r2->r_info)] & 0x7fff; + elf_machine_rel (map, r2, + &symtab[ELFW(R_SYM) (r2->r_info)], + &map->l_versions[ndx], + (void *) (l_addr + r2->r_offset), + skip_ifunc); + } +#endif } #ifndef RTLD_BOOTSTRAP else - for (; r < end; ++r) - elf_machine_rel (map, r, &symtab[ELFW(R_SYM) (r->r_info)], NULL, - (void *) (l_addr + r->r_offset), skip_ifunc); + { + for (; r < end; ++r) +# ifdef ELF_MACHINE_IRELATIVE + if (ELFW(R_TYPE) (r->r_info) == ELF_MACHINE_IRELATIVE) + { + if (r2 == NULL) + r2 = r; + end2 = r; + } + else +# endif + elf_machine_rel (map, r, &symtab[ELFW(R_SYM) (r->r_info)], NULL, + (void *) (l_addr + r->r_offset), skip_ifunc); + +# ifdef ELF_MACHINE_IRELATIVE + if (r2 != NULL) + for (; r2 <= end2; ++r2) + if (ELFW(R_TYPE) (r2->r_info) == ELF_MACHINE_IRELATIVE) + elf_machine_rel (map, r2, &symtab[ELFW(R_SYM) (r2->r_info)], + NULL, (void *) (l_addr + r2->r_offset), + skip_ifunc); +# endif + } #endif } } diff --git a/sysdeps/ieee754/dbl-64/s_ceil.c b/sysdeps/ieee754/dbl-64/s_ceil.c index 1b352a6..695cae5 100644 --- a/sysdeps/ieee754/dbl-64/s_ceil.c +++ b/sysdeps/ieee754/dbl-64/s_ceil.c @@ -10,10 +10,6 @@ * ==================================================== */ -#if defined(LIBM_SCCS) && !defined(lint) -static char rcsid[] = "$NetBSD: s_ceil.c,v 1.8 1995/05/10 20:46:53 jtc Exp $"; -#endif - /* * ceil(x) * Return x rounded toward -inf to integral value @@ -26,18 +22,10 @@ static char rcsid[] = "$NetBSD: s_ceil.c,v 1.8 1995/05/10 20:46:53 jtc Exp $"; #include "math.h" #include "math_private.h" -#ifdef __STDC__ static const double huge = 1.0e300; -#else -static double huge = 1.0e300; -#endif -#ifdef __STDC__ - double __ceil(double x) -#else - double __ceil(x) - double x; -#endif +double +__ceil(double x) { int32_t i0,i1,j0; u_int32_t i,j; @@ -78,8 +66,10 @@ static double huge = 1.0e300; INSERT_WORDS(x,i0,i1); return x; } +#ifndef __ceil weak_alias (__ceil, ceil) -#ifdef NO_LONG_DOUBLE +# ifdef NO_LONG_DOUBLE strong_alias (__ceil, __ceill) weak_alias (__ceil, ceill) +# endif #endif diff --git a/sysdeps/ieee754/dbl-64/s_floor.c b/sysdeps/ieee754/dbl-64/s_floor.c index 77db9ef..5b593ca 100644 --- a/sysdeps/ieee754/dbl-64/s_floor.c +++ b/sysdeps/ieee754/dbl-64/s_floor.c @@ -10,10 +10,6 @@ * ==================================================== */ -#if defined(LIBM_SCCS) && !defined(lint) -static char rcsid[] = "$NetBSD: s_floor.c,v 1.8 1995/05/10 20:47:20 jtc Exp $"; -#endif - /* * floor(x) * Return x rounded toward -inf to integral value @@ -44,7 +40,7 @@ static double huge = 1.0e300; EXTRACT_WORDS(i0,i1,x); j0 = ((i0>>20)&0x7ff)-0x3ff; if(j0<20) { - if(j0<0) { /* raise inexact if x != 0 */ + if(j0<0) { /* raise inexact if x != 0 */ if(huge+x>0.0) {/* return 0*sign(x) if |x|<1 */ if(i0>=0) {i0=i1=0;} else if(((i0&0x7fffffff)|i1)!=0) @@ -64,12 +60,12 @@ static double huge = 1.0e300; } else { i = ((u_int32_t)(0xffffffff))>>(j0-20); if((i1&i)==0) return x; /* x is integral */ - if(huge+x>0.0) { /* raise inexact flag */ + if(huge+x>0.0) { /* raise inexact flag */ if(i0<0) { if(j0==20) i0+=1; else { j = i1+(1<<(52-j0)); - if(j>12)&0x80000; SET_HIGH_WORD(x,i0); - w = TWO52[sx]+x; - t = w-TWO52[sx]; + w = TWO52[sx]+x; + t = w-TWO52[sx]; GET_HIGH_WORD(i0,t); SET_HIGH_WORD(t,(i0&0x7fffffff)|(sx<<31)); - return t; + return t; } else { i = (0x000fffff)>>j0; if(((i0&i)|i1)==0) return x; /* x is integral */ @@ -91,8 +79,10 @@ TWO52[2]={ w = TWO52[sx]+x; return w-TWO52[sx]; } +#ifndef __rint weak_alias (__rint, rint) -#ifdef NO_LONG_DOUBLE +# ifdef NO_LONG_DOUBLE strong_alias (__rint, __rintl) weak_alias (__rint, rintl) +# endif #endif diff --git a/sysdeps/ieee754/dbl-64/wordsize-64/s_ceil.c b/sysdeps/ieee754/dbl-64/wordsize-64/s_ceil.c index 9123fdc..e0e7155 100644 --- a/sysdeps/ieee754/dbl-64/wordsize-64/s_ceil.c +++ b/sysdeps/ieee754/dbl-64/wordsize-64/s_ceil.c @@ -22,18 +22,10 @@ #include "math.h" #include "math_private.h" -#ifdef __STDC__ static const double huge = 1.0e300; -#else -static double huge = 1.0e300; -#endif -#ifdef __STDC__ - double __ceil(double x) -#else - double __ceil(x) - double x; -#endif +double +__ceil(double x) { int64_t i0,i; int32_t j0; @@ -60,8 +52,10 @@ static double huge = 1.0e300; INSERT_WORDS64(x,i0); return x; } +#ifndef __ceil weak_alias (__ceil, ceil) -#ifdef NO_LONG_DOUBLE +# ifdef NO_LONG_DOUBLE strong_alias (__ceil, __ceill) weak_alias (__ceil, ceill) +# endif #endif diff --git a/sysdeps/ieee754/dbl-64/wordsize-64/s_floor.c b/sysdeps/ieee754/dbl-64/wordsize-64/s_floor.c index d52e4db..8b7300b 100644 --- a/sysdeps/ieee754/dbl-64/wordsize-64/s_floor.c +++ b/sysdeps/ieee754/dbl-64/wordsize-64/s_floor.c @@ -72,8 +72,10 @@ __floor (double x) return x+x; /* inf or NaN */ return x; } +#ifndef __floor weak_alias (__floor, floor) -#ifdef NO_LONG_DOUBLE +# ifdef NO_LONG_DOUBLE strong_alias (__floor, __floorl) weak_alias (__floor, floorl) +# endif #endif diff --git a/sysdeps/ieee754/dbl-64/wordsize-64/s_rint.c b/sysdeps/ieee754/dbl-64/wordsize-64/s_rint.c index 4a60aa3..571b381 100644 --- a/sysdeps/ieee754/dbl-64/wordsize-64/s_rint.c +++ b/sysdeps/ieee754/dbl-64/wordsize-64/s_rint.c @@ -1,4 +1,3 @@ -/* @(#)s_rint.c 5.1 93/09/24 */ /* * ==================================================== * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. @@ -23,22 +22,14 @@ #include "math.h" #include "math_private.h" -#ifdef __STDC__ static const double -#else -static double -#endif TWO52[2]={ 4.50359962737049600000e+15, /* 0x43300000, 0x00000000 */ -4.50359962737049600000e+15, /* 0xC3300000, 0x00000000 */ }; -#ifdef __STDC__ - double __rint(double x) -#else - double __rint(x) - double x; -#endif +double +__rint(double x) { int64_t i0,sx; int32_t j0; @@ -72,8 +63,10 @@ TWO52[2]={ double w = TWO52[sx]+x; return w-TWO52[sx]; } +#ifndef __rint weak_alias (__rint, rint) -#ifdef NO_LONG_DOUBLE +# ifdef NO_LONG_DOUBLE strong_alias (__rint, __rintl) weak_alias (__rint, rintl) +# endif #endif diff --git a/sysdeps/ieee754/flt-32/s_ceilf.c b/sysdeps/ieee754/flt-32/s_ceilf.c index 29ccadb..8a83201 100644 --- a/sysdeps/ieee754/flt-32/s_ceilf.c +++ b/sysdeps/ieee754/flt-32/s_ceilf.c @@ -8,30 +8,19 @@ * * Developed at SunPro, a Sun Microsystems, Inc. business. * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice + * software is freely granted, provided that this notice * is preserved. * ==================================================== */ -#if defined(LIBM_SCCS) && !defined(lint) -static char rcsid[] = "$NetBSD: s_ceilf.c,v 1.4 1995/05/10 20:46:55 jtc Exp $"; -#endif - #include "math.h" #include "math_private.h" -#ifdef __STDC__ + static const float huge = 1.0e30; -#else -static float huge = 1.0e30; -#endif -#ifdef __STDC__ - float __ceilf(float x) -#else - float __ceilf(x) - float x; -#endif +float +__ceilf(float x) { int32_t i0,j0; u_int32_t i; @@ -39,9 +28,9 @@ static float huge = 1.0e30; GET_FLOAT_WORD(i0,x); j0 = ((i0>>23)&0xff)-0x7f; if(j0<23) { - if(j0<0) { /* raise inexact if x != 0 */ + if(j0<0) { /* raise inexact if x != 0 */ if(huge+x>(float)0.0) {/* return 0*sign(x) if |x|<1 */ - if(i0<0) {i0=0x80000000;} + if(i0<0) {i0=0x80000000;} else if(i0!=0) { i0=0x3f800000;} } } else { @@ -53,10 +42,12 @@ static float huge = 1.0e30; } } } else { - if(j0==0x80) return x+x; /* inf or NaN */ + if(__builtin_expect(j0==0x80, 0)) return x+x; /* inf or NaN */ else return x; /* x is integral */ } SET_FLOAT_WORD(x,i0); return x; } +#ifndef __ceilf weak_alias (__ceilf, ceilf) +#endif diff --git a/sysdeps/ieee754/flt-32/s_floorf.c b/sysdeps/ieee754/flt-32/s_floorf.c index e8822b0..dd19c6b 100644 --- a/sysdeps/ieee754/flt-32/s_floorf.c +++ b/sysdeps/ieee754/flt-32/s_floorf.c @@ -8,15 +8,11 @@ * * Developed at SunPro, a Sun Microsystems, Inc. business. * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice + * software is freely granted, provided that this notice * is preserved. * ==================================================== */ -#if defined(LIBM_SCCS) && !defined(lint) -static char rcsid[] = "$NetBSD: s_floorf.c,v 1.4 1995/05/10 20:47:22 jtc Exp $"; -#endif - /* * floorf(x) * Return x rounded toward -inf to integral value @@ -29,27 +25,19 @@ static char rcsid[] = "$NetBSD: s_floorf.c,v 1.4 1995/05/10 20:47:22 jtc Exp $"; #include "math.h" #include "math_private.h" -#ifdef __STDC__ static const float huge = 1.0e30; -#else -static float huge = 1.0e30; -#endif -#ifdef __STDC__ - float __floorf(float x) -#else - float __floorf(x) - float x; -#endif +float +__floorf(float x) { int32_t i0,j0; u_int32_t i; GET_FLOAT_WORD(i0,x); j0 = ((i0>>23)&0xff)-0x7f; if(j0<23) { - if(j0<0) { /* raise inexact if x != 0 */ + if(j0<0) { /* raise inexact if x != 0 */ if(huge+x>(float)0.0) {/* return 0*sign(x) if |x|<1 */ - if(i0>=0) {i0=0;} + if(i0>=0) {i0=0;} else if((i0&0x7fffffff)!=0) { i0=0xbf800000;} } @@ -62,10 +50,12 @@ static float huge = 1.0e30; } } } else { - if(j0==0x80) return x+x; /* inf or NaN */ + if(__builtin_expect(j0==0x80, 0)) return x+x; /* inf or NaN */ else return x; /* x is integral */ } SET_FLOAT_WORD(x,i0); return x; } +#ifndef __floorf weak_alias (__floorf, floorf) +#endif diff --git a/sysdeps/ieee754/flt-32/s_rintf.c b/sysdeps/ieee754/flt-32/s_rintf.c index 4e5b409..9ea9b6f 100644 --- a/sysdeps/ieee754/flt-32/s_rintf.c +++ b/sysdeps/ieee754/flt-32/s_rintf.c @@ -8,34 +8,22 @@ * * Developed at SunPro, a Sun Microsystems, Inc. business. * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice + * software is freely granted, provided that this notice * is preserved. * ==================================================== */ -#if defined(LIBM_SCCS) && !defined(lint) -static char rcsid[] = "$NetBSD: s_rintf.c,v 1.4 1995/05/10 20:48:06 jtc Exp $"; -#endif - #include "math.h" #include "math_private.h" -#ifdef __STDC__ static const float -#else -static float -#endif TWO23[2]={ 8.3886080000e+06, /* 0x4b000000 */ -8.3886080000e+06, /* 0xcb000000 */ }; -#ifdef __STDC__ - float __rintf(float x) -#else - float __rintf(x) - float x; -#endif +float +__rintf(float x) { int32_t i0,j0,sx; u_int32_t i,i1; @@ -44,17 +32,17 @@ TWO23[2]={ sx = (i0>>31)&1; j0 = ((i0>>23)&0xff)-0x7f; if(j0<23) { - if(j0<0) { + if(j0<0) { if((i0&0x7fffffff)==0) return x; i1 = (i0&0x07fffff); i0 &= 0xfff00000; i0 |= ((i1|-i1)>>9)&0x400000; SET_FLOAT_WORD(x,i0); - w = TWO23[sx]+x; - t = w-TWO23[sx]; + w = TWO23[sx]+x; + t = w-TWO23[sx]; GET_FLOAT_WORD(i0,t); SET_FLOAT_WORD(t,(i0&0x7fffffff)|(sx<<31)); - return t; + return t; } else { i = (0x007fffff)>>j0; if((i0&i)==0) return x; /* x is integral */ @@ -69,4 +57,6 @@ TWO23[2]={ w = TWO23[sx]+x; return w-TWO23[sx]; } +#ifndef __rintf weak_alias (__rintf, rintf) +#endif diff --git a/sysdeps/x86_64/dl-machine.h b/sysdeps/x86_64/dl-machine.h index 6d66ff6..1068af6 100644 --- a/sysdeps/x86_64/dl-machine.h +++ b/sysdeps/x86_64/dl-machine.h @@ -207,6 +207,10 @@ _dl_start_user:\n\ /* A reloc type used for ld.so cmdline arg lookups to reject PLT entries. */ #define ELF_MACHINE_JMP_SLOT R_X86_64_JUMP_SLOT +/* The relative ifunc relocation. */ +// XXX This is a work-around for a broken linker. Remove! +#define ELF_MACHINE_IRELATIVE R_X86_64_IRELATIVE + /* The x86-64 never uses Elf64_Rel relocations. */ #define ELF_MACHINE_NO_REL 1 diff --git a/sysdeps/x86_64/fpu/bits/mathinline.h b/sysdeps/x86_64/fpu/bits/mathinline.h index 5bdf47e..c3e03e8 100644 --- a/sysdeps/x86_64/fpu/bits/mathinline.h +++ b/sysdeps/x86_64/fpu/bits/mathinline.h @@ -30,32 +30,34 @@ #endif -#if defined __USE_ISOC99 && defined __GNUC__ && __GNUC__ >= 2 +#if defined __GNUC__ && __GNUC__ >= 2 +# ifdef __USE_ISOC99 +__BEGIN_NAMESPACE_C99 /* Test for negative number. Used in the signbit() macro. */ __MATH_INLINE int __NTH (__signbitf (float __x)) { -# if __WORDSIZE == 32 +# if __WORDSIZE == 32 __extension__ union { float __f; int __i; } __u = { __f: __x }; return __u.__i < 0; -# else +# else int __m; __asm ("pmovmskb %1, %0" : "=r" (__m) : "x" (__x)); return __m & 0x8; -# endif +# endif } __MATH_INLINE int __NTH (__signbit (double __x)) { -# if __WORDSIZE == 32 +# if __WORDSIZE == 32 __extension__ union { double __d; int __i[2]; } __u = { __d: __x }; return __u.__i[1] < 0; -# else +# else int __m; __asm ("pmovmskb %1, %0" : "=r" (__m) : "x" (__x)); return __m & 0x80; -# endif +# endif } __MATH_INLINE int __NTH (__signbitl (long double __x)) @@ -64,9 +66,6 @@ __NTH (__signbitl (long double __x)) return (__u.__i[2] & 0x8000) != 0; } -#ifdef __USE_ISOC99 -__BEGIN_NAMESPACE_C99 - /* Round to nearest integer. */ # if __WORDSIZE == 64 || defined __SSE_MATH__ __MATH_INLINE long int @@ -101,10 +100,14 @@ __NTH (llrint (double __x)) __asm ("cvtsd2si %1, %0" : "=r" (__res) : "xm" (__x)); return __res; } + +__END_NAMESPACE_C99 # endif # if defined __FINITE_MATH_ONLY__ && __FINITE_MATH_ONLY__ > 0 \ && (__WORDSIZE == 64 || defined __SSE2_MATH__) +__BEGIN_NAMESPACE_C99 + /* Determine maximum of two values. */ __MATH_INLINE float __NTH (fmaxf (float __x, float __y)) @@ -137,4 +140,70 @@ __NTH (fmin (double __x, double __y)) __END_NAMESPACE_C99 # endif +# if defined __SSE4_1__ && (__WORDSIZE == 64 || defined __SSE2_MATH__) +# if defined __USE_MISC || defined __USE_XOPEN_EXTENDED || defined __USE_ISOC99 +__BEGIN_NAMESPACE_C99 + +/* Round to nearest integer. */ +__MATH_INLINE double +__NTH (rint (double __x)) +{ + double __res; + __asm ("roundsd $4, %1, %0" : "=x" (__res) : "x" (__x)); + return __res; +} +__MATH_INLINE float +__NTH (rintf (float __x)) +{ + float __res; + __asm ("roundss $4, %1, %0" : "=x" (__res) : "x" (__x)); + return __res; +} + +__END_NAMESPACE_C99 +# endif + +__BEGIN_NAMESPACE_STD +/* Smallest integral value not less than X. */ +__MATH_INLINE double +__NTH (ceil (double __x)) +{ + double __res; + __asm ("roundsd $2, %1, %0" : "=x" (__res) : "x" (__x)); + return __res; +} +__END_NAMESPACE_STD + +__BEGIN_NAMESPACE_C99 +__MATH_INLINE float +__NTH (ceilf (float __x)) +{ + float __res; + __asm ("roundss $2, %1, %0" : "=x" (__res) : "x" (__x)); + return __res; +} +__END_NAMESPACE_C99 + +__BEGIN_NAMESPACE_STD +/* Largest integer not greater than X. */ +__MATH_INLINE double +__NTH (ceil (double __x)) +{ + double __res; + __asm ("roundsd $1, %1, %0" : "=x" (__res) : "x" (__x)); + return __res; +} +__END_NAMESPACE_STD + +__BEGIN_NAMESPACE_C99 +__MATH_INLINE float +__NTH (ceilf (float __x)) +{ + float __res; + __asm ("roundss $1, %1, %0" : "=x" (__res) : "x" (__x)); + return __res; +} +__END_NAMESPACE_C99 +# endif + #endif diff --git a/sysdeps/x86_64/fpu/multiarch/Makefile b/sysdeps/x86_64/fpu/multiarch/Makefile new file mode 100644 index 0000000..b29feed --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/Makefile @@ -0,0 +1,4 @@ +ifeq ($(subdir),math) +libm-sysdep_routines += s_floor-c s_ceil-c s_floorf-c s_ceilf-c \ + s_rint-c s_rintf-c +endif diff --git a/sysdeps/x86_64/fpu/multiarch/s_ceil-c.c b/sysdeps/x86_64/fpu/multiarch/s_ceil-c.c new file mode 100644 index 0000000..6a5ea3f --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_ceil-c.c @@ -0,0 +1,2 @@ +#define __ceil __ceil_c +#include diff --git a/sysdeps/x86_64/fpu/multiarch/s_ceil.S b/sysdeps/x86_64/fpu/multiarch/s_ceil.S new file mode 100644 index 0000000..d0f8da3 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_ceil.S @@ -0,0 +1,40 @@ +/* Copyright (C) 2011 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper , 2011. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include +#include + + +ENTRY(__ceil) + .type __ceil, @gnu_indirect_function + call __get_cpu_features@plt + movq %rax, %rdx + leaq __ceil_sse41(%rip), %rax + testl $bit_SSE4_1, CPUID_OFFSET+index_SSE4_1(%rdx) + jnz 2f + leaq __ceil_c(%rip), %rax +2: ret +END(__ceil) +weak_alias (__ceil, ceil) + + +ENTRY(__ceil_sse41) + roundsd $2, %xmm0, %xmm0 + ret +END(__ceil_sse41) diff --git a/sysdeps/x86_64/fpu/multiarch/s_ceilf-c.c b/sysdeps/x86_64/fpu/multiarch/s_ceilf-c.c new file mode 100644 index 0000000..229a627 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_ceilf-c.c @@ -0,0 +1,2 @@ +#define __ceilf __ceilf_c +#include diff --git a/sysdeps/x86_64/fpu/multiarch/s_ceilf.S b/sysdeps/x86_64/fpu/multiarch/s_ceilf.S new file mode 100644 index 0000000..65ce252 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_ceilf.S @@ -0,0 +1,40 @@ +/* Copyright (C) 2011 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper , 2011. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include +#include + + +ENTRY(__ceilf) + .type __ceilf, @gnu_indirect_function + call __get_cpu_features@plt + movq %rax, %rdx + leaq __ceilf_sse41(%rip), %rax + testl $bit_SSE4_1, CPUID_OFFSET+index_SSE4_1(%rdx) + jnz 2f + leaq __ceilf_c(%rip), %rax +2: ret +END(__ceilf) +weak_alias (__ceilf, ceilf) + + +ENTRY(__ceilf_sse41) + roundss $2, %xmm0, %xmm0 + ret +END(__ceilf_sse41) diff --git a/sysdeps/x86_64/fpu/multiarch/s_floor-c.c b/sysdeps/x86_64/fpu/multiarch/s_floor-c.c new file mode 100644 index 0000000..8b8c31d --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_floor-c.c @@ -0,0 +1,2 @@ +#define __floor __floor_c +#include diff --git a/sysdeps/x86_64/fpu/multiarch/s_floor.S b/sysdeps/x86_64/fpu/multiarch/s_floor.S new file mode 100644 index 0000000..514ea95 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_floor.S @@ -0,0 +1,40 @@ +/* Copyright (C) 2011 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper , 2011. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include +#include + + +ENTRY(__floor) + .type __floor, @gnu_indirect_function + call __get_cpu_features@plt + movq %rax, %rdx + leaq __floor_sse41(%rip), %rax + testl $bit_SSE4_1, CPUID_OFFSET+index_SSE4_1(%rdx) + jnz 2f + leaq __floor_c(%rip), %rax +2: ret +END(__floor) +weak_alias (__floor, floor) + + +ENTRY(__floor_sse41) + roundsd $1, %xmm0, %xmm0 + ret +END(__floor_sse41) diff --git a/sysdeps/x86_64/fpu/multiarch/s_floorf-c.c b/sysdeps/x86_64/fpu/multiarch/s_floorf-c.c new file mode 100644 index 0000000..3f36786 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_floorf-c.c @@ -0,0 +1,2 @@ +#define __floorf __floorf_c +#include diff --git a/sysdeps/x86_64/fpu/multiarch/s_floorf.S b/sysdeps/x86_64/fpu/multiarch/s_floorf.S new file mode 100644 index 0000000..d8cd56e --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_floorf.S @@ -0,0 +1,40 @@ +/* Copyright (C) 2011 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper , 2011. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include +#include + + +ENTRY(__floorf) + .type __floorf, @gnu_indirect_function + call __get_cpu_features@plt + movq %rax, %rdx + leaq __floorf_sse41(%rip), %rax + testl $bit_SSE4_1, CPUID_OFFSET+index_SSE4_1(%rdx) + jnz 2f + leaq __floorf_c(%rip), %rax +2: ret +END(__floorf) +weak_alias (__floorf, floorf) + + +ENTRY(__floorf_sse41) + roundss $1, %xmm0, %xmm0 + ret +END(__floorf_sse41) diff --git a/sysdeps/x86_64/fpu/multiarch/s_rint-c.c b/sysdeps/x86_64/fpu/multiarch/s_rint-c.c new file mode 100644 index 0000000..f29f45b --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_rint-c.c @@ -0,0 +1,2 @@ +#define __rint __rint_c +#include diff --git a/sysdeps/x86_64/fpu/multiarch/s_rint.S b/sysdeps/x86_64/fpu/multiarch/s_rint.S new file mode 100644 index 0000000..75beffa --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_rint.S @@ -0,0 +1,40 @@ +/* Copyright (C) 2011 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper , 2011. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include +#include + + +ENTRY(__rint) + .type __rint, @gnu_indirect_function + call __get_cpu_features@plt + movq %rax, %rdx + leaq __rint_sse41(%rip), %rax + testl $bit_SSE4_1, CPUID_OFFSET+index_SSE4_1(%rdx) + jnz 2f + leaq __rint_c(%rip), %rax +2: ret +END(__rint) +weak_alias (__rint, rint) + + +ENTRY(__rint_sse41) + roundsd $4, %xmm0, %xmm0 + ret +END(__rint_sse41) diff --git a/sysdeps/x86_64/fpu/multiarch/s_rintf-c.c b/sysdeps/x86_64/fpu/multiarch/s_rintf-c.c new file mode 100644 index 0000000..30ed42a --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_rintf-c.c @@ -0,0 +1,2 @@ +#define __rintf __rintf_c +#include diff --git a/sysdeps/x86_64/fpu/multiarch/s_rintf.S b/sysdeps/x86_64/fpu/multiarch/s_rintf.S new file mode 100644 index 0000000..512d28c --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_rintf.S @@ -0,0 +1,40 @@ +/* Copyright (C) 2011 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper , 2011. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include +#include + + +ENTRY(__rintf) + .type __rintf, @gnu_indirect_function + call __get_cpu_features@plt + movq %rax, %rdx + leaq __rintf_sse41(%rip), %rax + testl $bit_SSE4_1, CPUID_OFFSET+index_SSE4_1(%rdx) + jnz 2f + leaq __rintf_c(%rip), %rax +2: ret +END(__rintf) +weak_alias (__rintf, rintf) + + +ENTRY(__rintf_sse41) + roundss $4, %xmm0, %xmm0 + ret +END(__rintf_sse41) -- 2.7.4