From ad0f5cad15f1c76faf3843b3e189dead2c05cfcc Mon Sep 17 00:00:00 2001
From: Ulrich Drepper <drepper@gmail.com>
Date: Sun, 16 Oct 2011 20:58:17 -0400
Subject: [PATCH] Use rounds{s,d} for x86 rint, ceil, floor

---
 ChangeLog                                    | 29 +++++++++
 elf/do-rel.h                                 | 73 +++++++++++++++++++++--
 sysdeps/ieee754/dbl-64/s_ceil.c              | 20 ++-----
 sysdeps/ieee754/dbl-64/s_floor.c             | 14 ++---
 sysdeps/ieee754/dbl-64/s_rint.c              | 26 +++-----
 sysdeps/ieee754/dbl-64/wordsize-64/s_ceil.c  | 16 ++---
 sysdeps/ieee754/dbl-64/wordsize-64/s_floor.c |  4 +-
 sysdeps/ieee754/dbl-64/wordsize-64/s_rint.c  | 17 ++----
 sysdeps/ieee754/flt-32/s_ceilf.c             | 27 +++------
 sysdeps/ieee754/flt-32/s_floorf.c            | 26 +++-----
 sysdeps/ieee754/flt-32/s_rintf.c             | 28 +++------
 sysdeps/x86_64/dl-machine.h                  |  4 ++
 sysdeps/x86_64/fpu/bits/mathinline.h         | 89 ++++++++++++++++++++++++----
 sysdeps/x86_64/fpu/multiarch/Makefile        |  4 ++
 sysdeps/x86_64/fpu/multiarch/s_ceil-c.c      |  2 +
 sysdeps/x86_64/fpu/multiarch/s_ceil.S        | 40 +++++++++++++
 sysdeps/x86_64/fpu/multiarch/s_ceilf-c.c     |  2 +
 sysdeps/x86_64/fpu/multiarch/s_ceilf.S       | 40 +++++++++++++
 sysdeps/x86_64/fpu/multiarch/s_floor-c.c     |  2 +
 sysdeps/x86_64/fpu/multiarch/s_floor.S       | 40 +++++++++++++
 sysdeps/x86_64/fpu/multiarch/s_floorf-c.c    |  2 +
 sysdeps/x86_64/fpu/multiarch/s_floorf.S      | 40 +++++++++++++
 sysdeps/x86_64/fpu/multiarch/s_rint-c.c      |  2 +
 sysdeps/x86_64/fpu/multiarch/s_rint.S        | 40 +++++++++++++
 sysdeps/x86_64/fpu/multiarch/s_rintf-c.c     |  2 +
 sysdeps/x86_64/fpu/multiarch/s_rintf.S       | 40 +++++++++++++
 26 files changed, 495 insertions(+), 134 deletions(-)
 create mode 100644 sysdeps/x86_64/fpu/multiarch/Makefile
 create mode 100644 sysdeps/x86_64/fpu/multiarch/s_ceil-c.c
 create mode 100644 sysdeps/x86_64/fpu/multiarch/s_ceil.S
 create mode 100644 sysdeps/x86_64/fpu/multiarch/s_ceilf-c.c
 create mode 100644 sysdeps/x86_64/fpu/multiarch/s_ceilf.S
 create mode 100644 sysdeps/x86_64/fpu/multiarch/s_floor-c.c
 create mode 100644 sysdeps/x86_64/fpu/multiarch/s_floor.S
 create mode 100644 sysdeps/x86_64/fpu/multiarch/s_floorf-c.c
 create mode 100644 sysdeps/x86_64/fpu/multiarch/s_floorf.S
 create mode 100644 sysdeps/x86_64/fpu/multiarch/s_rint-c.c
 create mode 100644 sysdeps/x86_64/fpu/multiarch/s_rint.S
 create mode 100644 sysdeps/x86_64/fpu/multiarch/s_rintf-c.c
 create mode 100644 sysdeps/x86_64/fpu/multiarch/s_rintf.S

diff --git a/ChangeLog b/ChangeLog
index 9a792f7..c71959c 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,34 @@
 2011-10-16  Ulrich Drepper  <drepper@gmail.com>
 
+	* sysdeps/ieee754/dbl-64/s_ceil.c: Avoid alias renamed.
+	* sysdeps/ieee754/dbl-64/s_floor.c: Likewise.
+	* sysdeps/ieee754/dbl-64/s_rint.c: Likewise.
+	* sysdeps/ieee754/dbl-64/wordsize-64/s_ceil.c: Likewise.
+	* sysdeps/ieee754/dbl-64/wordsize-64/s_floor.c: Likewise.
+	* sysdeps/ieee754/dbl-64/wordsize-64/s_rint.c: Likewise.
+	* sysdeps/ieee754/flt-32/s_ceilf.c: Likewise.
+	* sysdeps/ieee754/flt-32/s_floorf.c: Likewise.
+	* sysdeps/ieee754/flt-32/s_rintf.c: Likewise.
+	* sysdeps/x86_64/fpu/multiarch/Makefile: New file.
+	* sysdeps/x86_64/fpu/multiarch/s_ceil-c.c: New file.
+	* sysdeps/x86_64/fpu/multiarch/s_ceil.S: New file.
+	* sysdeps/x86_64/fpu/multiarch/s_ceilf-c.c: New file.
+	* sysdeps/x86_64/fpu/multiarch/s_ceilf.S: New file.
+	* sysdeps/x86_64/fpu/multiarch/s_floor-c.c: New file.
+	* sysdeps/x86_64/fpu/multiarch/s_floor.S: New file.
+	* sysdeps/x86_64/fpu/multiarch/s_floorf-c.c: New file.
+	* sysdeps/x86_64/fpu/multiarch/s_floorf.S: New file.
+	* sysdeps/x86_64/fpu/multiarch/s_rint-c.c: New file.
+	* sysdeps/x86_64/fpu/multiarch/s_rint.S: New file.
+	* sysdeps/x86_64/fpu/multiarch/s_rintf-c.c: New file.
+	* sysdeps/x86_64/fpu/multiarch/s_rintf.S: New file.
+
+	* sysdeps/x86_64/fpu/bits/mathinline.h: Add inlines for rint, rintf,
+	ceil, ceilf, floor, floorf.
+
+	* elf/do-rel.h (elf_dynamic_do_Rel): Work around linker problem.
+	Perform IRELATIVE relocations last.
+
 	* elf/do-rel.h: Add another parameter nrelative, replacing the
 	local variable with the same name.  Change name of the function
 	to end in Rel or Rela (uppercase).
diff --git a/elf/do-rel.h b/elf/do-rel.h
index 05c03f7..3f8e7eb 100644
--- a/elf/do-rel.h
+++ b/elf/do-rel.h
@@ -55,6 +55,10 @@ elf_dynamic_do_Rel (struct link_map *map,
   const ElfW(Rel) *r = (const void *) reladdr;
   const ElfW(Rel) *end = (const void *) (reladdr + relsize);
   ElfW(Addr) l_addr = map->l_addr;
+# if defined ELF_MACHINE_IRELATIVE && !defined RTLD_BOOTSTRAP
+  const ElfW(Rel) *r2 = NULL;
+  const ElfW(Rel) *end2 = NULL;
+# endif
 
 #if (!defined DO_RELA || !defined ELF_MACHINE_PLT_REL) && !defined RTLD_BOOTSTRAP
   /* We never bind lazily during ld.so bootstrap.  Unfortunately gcc is
@@ -64,7 +68,23 @@ elf_dynamic_do_Rel (struct link_map *map,
     {
       /* Doing lazy PLT relocations; they need very little info.  */
       for (; r < end; ++r)
-	elf_machine_lazy_rel (map, l_addr, r, skip_ifunc);
+# ifdef ELF_MACHINE_IRELATIVE
+	if (ELFW(R_TYPE) (r->r_info) == ELF_MACHINE_IRELATIVE)
+	  {
+	    if (r2 == NULL)
+	      r2 = r;
+	    end2 = r;
+	  }
+	else
+# endif
+	  elf_machine_lazy_rel (map, l_addr, r, skip_ifunc);
+
+# ifdef ELF_MACHINE_IRELATIVE
+      if (r2 != NULL)
+	for (; r2 <= end2; ++r2)
+	  if (ELFW(R_TYPE) (r2->r_info) == ELF_MACHINE_IRELATIVE)
+	    elf_machine_lazy_rel (map, l_addr, r2, skip_ifunc);
+# endif
     }
   else
 #endif
@@ -112,17 +132,62 @@ elf_dynamic_do_Rel (struct link_map *map,
 
 	  for (; r < end; ++r)
 	    {
+#if defined ELF_MACHINE_IRELATIVE && !defined RTLD_BOOTSTRAP
+	      if (ELFW(R_TYPE) (r->r_info) == ELF_MACHINE_IRELATIVE)
+		{
+		  if (r2 == NULL)
+		    r2 = r;
+		  end2 = r;
+		  continue;
+		}
+#endif
+
 	      ElfW(Half) ndx = version[ELFW(R_SYM) (r->r_info)] & 0x7fff;
 	      elf_machine_rel (map, r, &symtab[ELFW(R_SYM) (r->r_info)],
 			       &map->l_versions[ndx],
 			       (void *) (l_addr + r->r_offset), skip_ifunc);
 	    }
+
+#if defined ELF_MACHINE_IRELATIVE && !defined RTLD_BOOTSTRAP
+	  if (r2 != NULL)
+	    for (; r2 <= end2; ++r2)
+	      if (ELFW(R_TYPE) (r2->r_info) == ELF_MACHINE_IRELATIVE)
+		{
+		  ElfW(Half) ndx
+		    = version[ELFW(R_SYM) (r2->r_info)] & 0x7fff;
+		  elf_machine_rel (map, r2,
+				   &symtab[ELFW(R_SYM) (r2->r_info)],
+				   &map->l_versions[ndx],
+				   (void *) (l_addr + r2->r_offset),
+				   skip_ifunc);
+		}
+#endif
 	}
 #ifndef RTLD_BOOTSTRAP
       else
-	for (; r < end; ++r)
-	  elf_machine_rel (map, r, &symtab[ELFW(R_SYM) (r->r_info)], NULL,
-			   (void *) (l_addr + r->r_offset), skip_ifunc);
+	{
+	  for (; r < end; ++r)
+# ifdef ELF_MACHINE_IRELATIVE
+	    if (ELFW(R_TYPE) (r->r_info) == ELF_MACHINE_IRELATIVE)
+	      {
+		if (r2 == NULL)
+		  r2 = r;
+		end2 = r;
+	      }
+	    else
+# endif
+	      elf_machine_rel (map, r, &symtab[ELFW(R_SYM) (r->r_info)], NULL,
+			       (void *) (l_addr + r->r_offset), skip_ifunc);
+
+# ifdef ELF_MACHINE_IRELATIVE
+	  if (r2 != NULL)
+	    for (; r2 <= end2; ++r2)
+	      if (ELFW(R_TYPE) (r2->r_info) == ELF_MACHINE_IRELATIVE)
+		elf_machine_rel (map, r2, &symtab[ELFW(R_SYM) (r2->r_info)],
+				 NULL, (void *) (l_addr + r2->r_offset),
+				 skip_ifunc);
+# endif
+	}
 #endif
     }
 }
diff --git a/sysdeps/ieee754/dbl-64/s_ceil.c b/sysdeps/ieee754/dbl-64/s_ceil.c
index 1b352a6..695cae5 100644
--- a/sysdeps/ieee754/dbl-64/s_ceil.c
+++ b/sysdeps/ieee754/dbl-64/s_ceil.c
@@ -10,10 +10,6 @@
  * ====================================================
  */
 
-#if defined(LIBM_SCCS) && !defined(lint)
-static char rcsid[] = "$NetBSD: s_ceil.c,v 1.8 1995/05/10 20:46:53 jtc Exp $";
-#endif
-
 /*
  * ceil(x)
  * Return x rounded toward -inf to integral value
@@ -26,18 +22,10 @@ static char rcsid[] = "$NetBSD: s_ceil.c,v 1.8 1995/05/10 20:46:53 jtc Exp $";
 #include "math.h"
 #include "math_private.h"
 
-#ifdef __STDC__
 static const double huge = 1.0e300;
-#else
-static double huge = 1.0e300;
-#endif
 
-#ifdef __STDC__
-	double __ceil(double x)
-#else
-	double __ceil(x)
-	double x;
-#endif
+double
+__ceil(double x)
 {
 	int32_t i0,i1,j0;
 	u_int32_t i,j;
@@ -78,8 +66,10 @@ static double huge = 1.0e300;
 	INSERT_WORDS(x,i0,i1);
 	return x;
 }
+#ifndef __ceil
 weak_alias (__ceil, ceil)
-#ifdef NO_LONG_DOUBLE
+# ifdef NO_LONG_DOUBLE
 strong_alias (__ceil, __ceill)
 weak_alias (__ceil, ceill)
+# endif
 #endif
diff --git a/sysdeps/ieee754/dbl-64/s_floor.c b/sysdeps/ieee754/dbl-64/s_floor.c
index 77db9ef..5b593ca 100644
--- a/sysdeps/ieee754/dbl-64/s_floor.c
+++ b/sysdeps/ieee754/dbl-64/s_floor.c
@@ -10,10 +10,6 @@
  * ====================================================
  */
 
-#if defined(LIBM_SCCS) && !defined(lint)
-static char rcsid[] = "$NetBSD: s_floor.c,v 1.8 1995/05/10 20:47:20 jtc Exp $";
-#endif
-
 /*
  * floor(x)
  * Return x rounded toward -inf to integral value
@@ -44,7 +40,7 @@ static double huge = 1.0e300;
 	EXTRACT_WORDS(i0,i1,x);
 	j0 = ((i0>>20)&0x7ff)-0x3ff;
 	if(j0<20) {
-	    if(j0<0) { 	/* raise inexact if x != 0 */
+	    if(j0<0) {	/* raise inexact if x != 0 */
 		if(huge+x>0.0) {/* return 0*sign(x) if |x|<1 */
 		    if(i0>=0) {i0=i1=0;}
 		    else if(((i0&0x7fffffff)|i1)!=0)
@@ -64,12 +60,12 @@ static double huge = 1.0e300;
 	} else {
 	    i = ((u_int32_t)(0xffffffff))>>(j0-20);
 	    if((i1&i)==0) return x;	/* x is integral */
-	    if(huge+x>0.0) { 		/* raise inexact flag */
+	    if(huge+x>0.0) {		/* raise inexact flag */
 		if(i0<0) {
 		    if(j0==20) i0+=1;
 		    else {
 			j = i1+(1<<(52-j0));
-			if(j<i1) i0 +=1 ; 	/* got a carry */
+			if(j<i1) i0 +=1 ;	/* got a carry */
 			i1=j;
 		    }
 		}
@@ -79,8 +75,10 @@ static double huge = 1.0e300;
 	INSERT_WORDS(x,i0,i1);
 	return x;
 }
+#ifndef __floor
 weak_alias (__floor, floor)
-#ifdef NO_LONG_DOUBLE
+# ifdef NO_LONG_DOUBLE
 strong_alias (__floor, __floorl)
 weak_alias (__floor, floorl)
+# endif
 #endif
diff --git a/sysdeps/ieee754/dbl-64/s_rint.c b/sysdeps/ieee754/dbl-64/s_rint.c
index 4e6381e..a671a62 100644
--- a/sysdeps/ieee754/dbl-64/s_rint.c
+++ b/sysdeps/ieee754/dbl-64/s_rint.c
@@ -10,10 +10,6 @@
  * ====================================================
  */
 
-#if defined(LIBM_SCCS) && !defined(lint)
-static char rcsid[] = "$NetBSD: s_rint.c,v 1.8 1995/05/10 20:48:04 jtc Exp $";
-#endif
-
 /*
  * rint(x)
  * Return x rounded to integral value according to the prevailing
@@ -27,22 +23,14 @@ static char rcsid[] = "$NetBSD: s_rint.c,v 1.8 1995/05/10 20:48:04 jtc Exp $";
 #include "math.h"
 #include "math_private.h"
 
-#ifdef __STDC__
 static const double
-#else
-static double
-#endif
 TWO52[2]={
   4.50359962737049600000e+15, /* 0x43300000, 0x00000000 */
  -4.50359962737049600000e+15, /* 0xC3300000, 0x00000000 */
 };
 
-#ifdef __STDC__
-	double __rint(double x)
-#else
-	double __rint(x)
-	double x;
-#endif
+double
+__rint(double x)
 {
 	int32_t i0,j0,sx;
 	u_int32_t i,i1;
@@ -57,11 +45,11 @@ TWO52[2]={
 		i0 &= 0xfffe0000;
 		i0 |= ((i1|-i1)>>12)&0x80000;
 		SET_HIGH_WORD(x,i0);
-	        w = TWO52[sx]+x;
-	        t =  w-TWO52[sx];
+		w = TWO52[sx]+x;
+		t =  w-TWO52[sx];
 		GET_HIGH_WORD(i0,t);
 		SET_HIGH_WORD(t,(i0&0x7fffffff)|(sx<<31));
-	        return t;
+		return t;
 	    } else {
 		i = (0x000fffff)>>j0;
 		if(((i0&i)|i1)==0) return x; /* x is integral */
@@ -91,8 +79,10 @@ TWO52[2]={
 	w = TWO52[sx]+x;
 	return w-TWO52[sx];
 }
+#ifndef __rint
 weak_alias (__rint, rint)
-#ifdef NO_LONG_DOUBLE
+# ifdef NO_LONG_DOUBLE
 strong_alias (__rint, __rintl)
 weak_alias (__rint, rintl)
+# endif
 #endif
diff --git a/sysdeps/ieee754/dbl-64/wordsize-64/s_ceil.c b/sysdeps/ieee754/dbl-64/wordsize-64/s_ceil.c
index 9123fdc..e0e7155 100644
--- a/sysdeps/ieee754/dbl-64/wordsize-64/s_ceil.c
+++ b/sysdeps/ieee754/dbl-64/wordsize-64/s_ceil.c
@@ -22,18 +22,10 @@
 #include "math.h"
 #include "math_private.h"
 
-#ifdef __STDC__
 static const double huge = 1.0e300;
-#else
-static double huge = 1.0e300;
-#endif
 
-#ifdef __STDC__
-	double __ceil(double x)
-#else
-	double __ceil(x)
-	double x;
-#endif
+double
+__ceil(double x)
 {
 	int64_t i0,i;
 	int32_t j0;
@@ -60,8 +52,10 @@ static double huge = 1.0e300;
 	INSERT_WORDS64(x,i0);
 	return x;
 }
+#ifndef __ceil
 weak_alias (__ceil, ceil)
-#ifdef NO_LONG_DOUBLE
+# ifdef NO_LONG_DOUBLE
 strong_alias (__ceil, __ceill)
 weak_alias (__ceil, ceill)
+# endif
 #endif
diff --git a/sysdeps/ieee754/dbl-64/wordsize-64/s_floor.c b/sysdeps/ieee754/dbl-64/wordsize-64/s_floor.c
index d52e4db..8b7300b 100644
--- a/sysdeps/ieee754/dbl-64/wordsize-64/s_floor.c
+++ b/sysdeps/ieee754/dbl-64/wordsize-64/s_floor.c
@@ -72,8 +72,10 @@ __floor (double x)
 	    return x+x;	/* inf or NaN */
 	return x;
 }
+#ifndef __floor
 weak_alias (__floor, floor)
-#ifdef NO_LONG_DOUBLE
+# ifdef NO_LONG_DOUBLE
 strong_alias (__floor, __floorl)
 weak_alias (__floor, floorl)
+# endif
 #endif
diff --git a/sysdeps/ieee754/dbl-64/wordsize-64/s_rint.c b/sysdeps/ieee754/dbl-64/wordsize-64/s_rint.c
index 4a60aa3..571b381 100644
--- a/sysdeps/ieee754/dbl-64/wordsize-64/s_rint.c
+++ b/sysdeps/ieee754/dbl-64/wordsize-64/s_rint.c
@@ -1,4 +1,3 @@
-/* @(#)s_rint.c 5.1 93/09/24 */
 /*
  * ====================================================
  * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
@@ -23,22 +22,14 @@
 #include "math.h"
 #include "math_private.h"
 
-#ifdef __STDC__
 static const double
-#else
-static double
-#endif
 TWO52[2]={
   4.50359962737049600000e+15, /* 0x43300000, 0x00000000 */
  -4.50359962737049600000e+15, /* 0xC3300000, 0x00000000 */
 };
 
-#ifdef __STDC__
-	double __rint(double x)
-#else
-	double __rint(x)
-	double x;
-#endif
+double
+__rint(double x)
 {
 	int64_t i0,sx;
 	int32_t j0;
@@ -72,8 +63,10 @@ TWO52[2]={
 	double w = TWO52[sx]+x;
 	return w-TWO52[sx];
 }
+#ifndef __rint
 weak_alias (__rint, rint)
-#ifdef NO_LONG_DOUBLE
+# ifdef NO_LONG_DOUBLE
 strong_alias (__rint, __rintl)
 weak_alias (__rint, rintl)
+# endif
 #endif
diff --git a/sysdeps/ieee754/flt-32/s_ceilf.c b/sysdeps/ieee754/flt-32/s_ceilf.c
index 29ccadb..8a83201 100644
--- a/sysdeps/ieee754/flt-32/s_ceilf.c
+++ b/sysdeps/ieee754/flt-32/s_ceilf.c
@@ -8,30 +8,19 @@
  *
  * Developed at SunPro, a Sun Microsystems, Inc. business.
  * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice 
+ * software is freely granted, provided that this notice
  * is preserved.
  * ====================================================
  */
 
-#if defined(LIBM_SCCS) && !defined(lint)
-static char rcsid[] = "$NetBSD: s_ceilf.c,v 1.4 1995/05/10 20:46:55 jtc Exp $";
-#endif
-
 #include "math.h"
 #include "math_private.h"
 
-#ifdef __STDC__
+
 static const float huge = 1.0e30;
-#else
-static float huge = 1.0e30;
-#endif
 
-#ifdef __STDC__
-	float __ceilf(float x)
-#else
-	float __ceilf(x)
-	float x;
-#endif
+float
+__ceilf(float x)
 {
 	int32_t i0,j0;
 	u_int32_t i;
@@ -39,9 +28,9 @@ static float huge = 1.0e30;
 	GET_FLOAT_WORD(i0,x);
 	j0 = ((i0>>23)&0xff)-0x7f;
 	if(j0<23) {
-	    if(j0<0) { 	/* raise inexact if x != 0 */
+	    if(j0<0) {	/* raise inexact if x != 0 */
 		if(huge+x>(float)0.0) {/* return 0*sign(x) if |x|<1 */
-		    if(i0<0) {i0=0x80000000;} 
+		    if(i0<0) {i0=0x80000000;}
 		    else if(i0!=0) { i0=0x3f800000;}
 		}
 	    } else {
@@ -53,10 +42,12 @@ static float huge = 1.0e30;
 		}
 	    }
 	} else {
-	    if(j0==0x80) return x+x;	/* inf or NaN */
+	    if(__builtin_expect(j0==0x80, 0)) return x+x; /* inf or NaN */
 	    else return x;		/* x is integral */
 	}
 	SET_FLOAT_WORD(x,i0);
 	return x;
 }
+#ifndef __ceilf
 weak_alias (__ceilf, ceilf)
+#endif
diff --git a/sysdeps/ieee754/flt-32/s_floorf.c b/sysdeps/ieee754/flt-32/s_floorf.c
index e8822b0..dd19c6b 100644
--- a/sysdeps/ieee754/flt-32/s_floorf.c
+++ b/sysdeps/ieee754/flt-32/s_floorf.c
@@ -8,15 +8,11 @@
  *
  * Developed at SunPro, a Sun Microsystems, Inc. business.
  * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice 
+ * software is freely granted, provided that this notice
  * is preserved.
  * ====================================================
  */
 
-#if defined(LIBM_SCCS) && !defined(lint)
-static char rcsid[] = "$NetBSD: s_floorf.c,v 1.4 1995/05/10 20:47:22 jtc Exp $";
-#endif
-
 /*
  * floorf(x)
  * Return x rounded toward -inf to integral value
@@ -29,27 +25,19 @@ static char rcsid[] = "$NetBSD: s_floorf.c,v 1.4 1995/05/10 20:47:22 jtc Exp $";
 #include "math.h"
 #include "math_private.h"
 
-#ifdef __STDC__
 static const float huge = 1.0e30;
-#else
-static float huge = 1.0e30;
-#endif
 
-#ifdef __STDC__
-	float __floorf(float x)
-#else
-	float __floorf(x)
-	float x;
-#endif
+float
+__floorf(float x)
 {
 	int32_t i0,j0;
 	u_int32_t i;
 	GET_FLOAT_WORD(i0,x);
 	j0 = ((i0>>23)&0xff)-0x7f;
 	if(j0<23) {
-	    if(j0<0) { 	/* raise inexact if x != 0 */
+	    if(j0<0) {	/* raise inexact if x != 0 */
 		if(huge+x>(float)0.0) {/* return 0*sign(x) if |x|<1 */
-		    if(i0>=0) {i0=0;} 
+		    if(i0>=0) {i0=0;}
 		    else if((i0&0x7fffffff)!=0)
 			{ i0=0xbf800000;}
 		}
@@ -62,10 +50,12 @@ static float huge = 1.0e30;
 		}
 	    }
 	} else {
-	    if(j0==0x80) return x+x;	/* inf or NaN */
+	    if(__builtin_expect(j0==0x80, 0)) return x+x; /* inf or NaN */
 	    else return x;		/* x is integral */
 	}
 	SET_FLOAT_WORD(x,i0);
 	return x;
 }
+#ifndef __floorf
 weak_alias (__floorf, floorf)
+#endif
diff --git a/sysdeps/ieee754/flt-32/s_rintf.c b/sysdeps/ieee754/flt-32/s_rintf.c
index 4e5b409..9ea9b6f 100644
--- a/sysdeps/ieee754/flt-32/s_rintf.c
+++ b/sysdeps/ieee754/flt-32/s_rintf.c
@@ -8,34 +8,22 @@
  *
  * Developed at SunPro, a Sun Microsystems, Inc. business.
  * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice 
+ * software is freely granted, provided that this notice
  * is preserved.
  * ====================================================
  */
 
-#if defined(LIBM_SCCS) && !defined(lint)
-static char rcsid[] = "$NetBSD: s_rintf.c,v 1.4 1995/05/10 20:48:06 jtc Exp $";
-#endif
-
 #include "math.h"
 #include "math_private.h"
 
-#ifdef __STDC__
 static const float
-#else
-static float 
-#endif
 TWO23[2]={
   8.3886080000e+06, /* 0x4b000000 */
  -8.3886080000e+06, /* 0xcb000000 */
 };
 
-#ifdef __STDC__
-	float __rintf(float x)
-#else
-	float __rintf(x)
-	float x;
-#endif
+float
+__rintf(float x)
 {
 	int32_t i0,j0,sx;
 	u_int32_t i,i1;
@@ -44,17 +32,17 @@ TWO23[2]={
 	sx = (i0>>31)&1;
 	j0 = ((i0>>23)&0xff)-0x7f;
 	if(j0<23) {
-	    if(j0<0) { 	
+	    if(j0<0) {
 		if((i0&0x7fffffff)==0) return x;
 		i1 = (i0&0x07fffff);
 		i0 &= 0xfff00000;
 		i0 |= ((i1|-i1)>>9)&0x400000;
 		SET_FLOAT_WORD(x,i0);
-	        w = TWO23[sx]+x;
-	        t =  w-TWO23[sx];
+		w = TWO23[sx]+x;
+		t =  w-TWO23[sx];
 		GET_FLOAT_WORD(i0,t);
 		SET_FLOAT_WORD(t,(i0&0x7fffffff)|(sx<<31));
-	        return t;
+		return t;
 	    } else {
 		i = (0x007fffff)>>j0;
 		if((i0&i)==0) return x; /* x is integral */
@@ -69,4 +57,6 @@ TWO23[2]={
 	w = TWO23[sx]+x;
 	return w-TWO23[sx];
 }
+#ifndef __rintf
 weak_alias (__rintf, rintf)
+#endif
diff --git a/sysdeps/x86_64/dl-machine.h b/sysdeps/x86_64/dl-machine.h
index 6d66ff6..1068af6 100644
--- a/sysdeps/x86_64/dl-machine.h
+++ b/sysdeps/x86_64/dl-machine.h
@@ -207,6 +207,10 @@ _dl_start_user:\n\
 /* A reloc type used for ld.so cmdline arg lookups to reject PLT entries.  */
 #define ELF_MACHINE_JMP_SLOT	R_X86_64_JUMP_SLOT
 
+/* The relative ifunc relocation.  */
+// XXX This is a work-around for a broken linker.  Remove!
+#define ELF_MACHINE_IRELATIVE	R_X86_64_IRELATIVE
+
 /* The x86-64 never uses Elf64_Rel relocations.  */
 #define ELF_MACHINE_NO_REL 1
 
diff --git a/sysdeps/x86_64/fpu/bits/mathinline.h b/sysdeps/x86_64/fpu/bits/mathinline.h
index 5bdf47e..c3e03e8 100644
--- a/sysdeps/x86_64/fpu/bits/mathinline.h
+++ b/sysdeps/x86_64/fpu/bits/mathinline.h
@@ -30,32 +30,34 @@
 #endif
 
 
-#if defined __USE_ISOC99 && defined __GNUC__ && __GNUC__ >= 2
+#if defined __GNUC__ && __GNUC__ >= 2
+# ifdef __USE_ISOC99
+__BEGIN_NAMESPACE_C99
 
 /* Test for negative number.  Used in the signbit() macro.  */
 __MATH_INLINE int
 __NTH (__signbitf (float __x))
 {
-# if __WORDSIZE == 32
+#  if __WORDSIZE == 32
   __extension__ union { float __f; int __i; } __u = { __f: __x };
   return __u.__i < 0;
-# else
+#  else
   int __m;
   __asm ("pmovmskb %1, %0" : "=r" (__m) : "x" (__x));
   return __m & 0x8;
-# endif
+#  endif
 }
 __MATH_INLINE int
 __NTH (__signbit (double __x))
 {
-# if __WORDSIZE == 32
+#  if __WORDSIZE == 32
   __extension__ union { double __d; int __i[2]; } __u = { __d: __x };
   return __u.__i[1] < 0;
-# else
+#  else
   int __m;
   __asm ("pmovmskb %1, %0" : "=r" (__m) : "x" (__x));
   return __m & 0x80;
-# endif
+#  endif
 }
 __MATH_INLINE int
 __NTH (__signbitl (long double __x))
@@ -64,9 +66,6 @@ __NTH (__signbitl (long double __x))
   return (__u.__i[2] & 0x8000) != 0;
 }
 
-#ifdef __USE_ISOC99
-__BEGIN_NAMESPACE_C99
-
 /* Round to nearest integer.  */
 #  if __WORDSIZE == 64 || defined __SSE_MATH__
 __MATH_INLINE long int
@@ -101,10 +100,14 @@ __NTH (llrint (double __x))
   __asm ("cvtsd2si %1, %0" : "=r" (__res) : "xm" (__x));
   return __res;
 }
+
+__END_NAMESPACE_C99
 #  endif
 
 #  if defined __FINITE_MATH_ONLY__ && __FINITE_MATH_ONLY__ > 0 \
       && (__WORDSIZE == 64 || defined __SSE2_MATH__)
+__BEGIN_NAMESPACE_C99
+
 /* Determine maximum of two values.  */
 __MATH_INLINE float
 __NTH (fmaxf (float __x, float __y))
@@ -137,4 +140,70 @@ __NTH (fmin (double __x, double __y))
 __END_NAMESPACE_C99
 # endif
 
+# if defined __SSE4_1__ && (__WORDSIZE == 64 || defined __SSE2_MATH__)
+#  if defined __USE_MISC || defined __USE_XOPEN_EXTENDED || defined __USE_ISOC99
+__BEGIN_NAMESPACE_C99
+
+/* Round to nearest integer.  */
+__MATH_INLINE double
+__NTH (rint (double __x))
+{
+  double __res;
+  __asm ("roundsd $4, %1, %0" : "=x" (__res) : "x" (__x));
+  return __res;
+}
+__MATH_INLINE float
+__NTH (rintf (float __x))
+{
+  float __res;
+  __asm ("roundss $4, %1, %0" : "=x" (__res) : "x" (__x));
+  return __res;
+}
+
+__END_NAMESPACE_C99
+#  endif
+
+__BEGIN_NAMESPACE_STD
+/* Smallest integral value not less than X.  */
+__MATH_INLINE double
+__NTH (ceil (double __x))
+{
+  double __res;
+  __asm ("roundsd $2, %1, %0" : "=x" (__res) : "x" (__x));
+  return __res;
+}
+__END_NAMESPACE_STD
+
+__BEGIN_NAMESPACE_C99
+__MATH_INLINE float
+__NTH (ceilf (float __x))
+{
+  float __res;
+  __asm ("roundss $2, %1, %0" : "=x" (__res) : "x" (__x));
+  return __res;
+}
+__END_NAMESPACE_C99
+
+__BEGIN_NAMESPACE_STD
+/* Largest integer not greater than X.  */
+__MATH_INLINE double
+__NTH (ceil (double __x))
+{
+  double __res;
+  __asm ("roundsd $1, %1, %0" : "=x" (__res) : "x" (__x));
+  return __res;
+}
+__END_NAMESPACE_STD
+
+__BEGIN_NAMESPACE_C99
+__MATH_INLINE float
+__NTH (ceilf (float __x))
+{
+  float __res;
+  __asm ("roundss $1, %1, %0" : "=x" (__res) : "x" (__x));
+  return __res;
+}
+__END_NAMESPACE_C99
+# endif
+
 #endif
diff --git a/sysdeps/x86_64/fpu/multiarch/Makefile b/sysdeps/x86_64/fpu/multiarch/Makefile
new file mode 100644
index 0000000..b29feed
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/Makefile
@@ -0,0 +1,4 @@
+ifeq ($(subdir),math)
+libm-sysdep_routines += s_floor-c s_ceil-c s_floorf-c s_ceilf-c \
+			s_rint-c s_rintf-c
+endif
diff --git a/sysdeps/x86_64/fpu/multiarch/s_ceil-c.c b/sysdeps/x86_64/fpu/multiarch/s_ceil-c.c
new file mode 100644
index 0000000..6a5ea3f
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_ceil-c.c
@@ -0,0 +1,2 @@
+#define __ceil __ceil_c
+#include <sysdeps/ieee754/dbl-64/wordsize-64/s_ceil.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/s_ceil.S b/sysdeps/x86_64/fpu/multiarch/s_ceil.S
new file mode 100644
index 0000000..d0f8da3
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_ceil.S
@@ -0,0 +1,40 @@
+/* Copyright (C) 2011 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Ulrich Drepper <drepper@gmail.come>, 2011.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <machine/asm.h>
+#include <init-arch.h>
+
+
+ENTRY(__ceil)
+	.type	__ceil, @gnu_indirect_function
+	call	__get_cpu_features@plt
+	movq	%rax, %rdx
+	leaq	__ceil_sse41(%rip), %rax
+	testl	$bit_SSE4_1, CPUID_OFFSET+index_SSE4_1(%rdx)
+	jnz	2f
+	leaq	__ceil_c(%rip), %rax
+2:	ret
+END(__ceil)
+weak_alias (__ceil, ceil)
+
+
+ENTRY(__ceil_sse41)
+	roundsd	$2, %xmm0, %xmm0
+	ret
+END(__ceil_sse41)
diff --git a/sysdeps/x86_64/fpu/multiarch/s_ceilf-c.c b/sysdeps/x86_64/fpu/multiarch/s_ceilf-c.c
new file mode 100644
index 0000000..229a627
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_ceilf-c.c
@@ -0,0 +1,2 @@
+#define __ceilf __ceilf_c
+#include <sysdeps/ieee754/flt-32/s_ceilf.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/s_ceilf.S b/sysdeps/x86_64/fpu/multiarch/s_ceilf.S
new file mode 100644
index 0000000..65ce252
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_ceilf.S
@@ -0,0 +1,40 @@
+/* Copyright (C) 2011 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Ulrich Drepper <drepper@gmail.come>, 2011.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <machine/asm.h>
+#include <init-arch.h>
+
+
+ENTRY(__ceilf)
+	.type	__ceilf, @gnu_indirect_function
+	call	__get_cpu_features@plt
+	movq	%rax, %rdx
+	leaq	__ceilf_sse41(%rip), %rax
+	testl	$bit_SSE4_1, CPUID_OFFSET+index_SSE4_1(%rdx)
+	jnz	2f
+	leaq	__ceilf_c(%rip), %rax
+2:	ret
+END(__ceilf)
+weak_alias (__ceilf, ceilf)
+
+
+ENTRY(__ceilf_sse41)
+	roundss	$2, %xmm0, %xmm0
+	ret
+END(__ceilf_sse41)
diff --git a/sysdeps/x86_64/fpu/multiarch/s_floor-c.c b/sysdeps/x86_64/fpu/multiarch/s_floor-c.c
new file mode 100644
index 0000000..8b8c31d
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_floor-c.c
@@ -0,0 +1,2 @@
+#define __floor __floor_c
+#include <sysdeps/ieee754/dbl-64/wordsize-64/s_floor.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/s_floor.S b/sysdeps/x86_64/fpu/multiarch/s_floor.S
new file mode 100644
index 0000000..514ea95
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_floor.S
@@ -0,0 +1,40 @@
+/* Copyright (C) 2011 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Ulrich Drepper <drepper@gmail.come>, 2011.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <machine/asm.h>
+#include <init-arch.h>
+
+
+ENTRY(__floor)
+	.type	__floor, @gnu_indirect_function
+	call	__get_cpu_features@plt
+	movq	%rax, %rdx
+	leaq	__floor_sse41(%rip), %rax
+	testl	$bit_SSE4_1, CPUID_OFFSET+index_SSE4_1(%rdx)
+	jnz	2f
+	leaq	__floor_c(%rip), %rax
+2:	ret
+END(__floor)
+weak_alias (__floor, floor)
+
+
+ENTRY(__floor_sse41)
+	roundsd	$1, %xmm0, %xmm0
+	ret
+END(__floor_sse41)
diff --git a/sysdeps/x86_64/fpu/multiarch/s_floorf-c.c b/sysdeps/x86_64/fpu/multiarch/s_floorf-c.c
new file mode 100644
index 0000000..3f36786
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_floorf-c.c
@@ -0,0 +1,2 @@
+#define __floorf __floorf_c
+#include <sysdeps/ieee754/flt-32/s_floorf.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/s_floorf.S b/sysdeps/x86_64/fpu/multiarch/s_floorf.S
new file mode 100644
index 0000000..d8cd56e
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_floorf.S
@@ -0,0 +1,40 @@
+/* Copyright (C) 2011 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Ulrich Drepper <drepper@gmail.come>, 2011.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <machine/asm.h>
+#include <init-arch.h>
+
+
+ENTRY(__floorf)
+	.type	__floorf, @gnu_indirect_function
+	call	__get_cpu_features@plt
+	movq	%rax, %rdx
+	leaq	__floorf_sse41(%rip), %rax
+	testl	$bit_SSE4_1, CPUID_OFFSET+index_SSE4_1(%rdx)
+	jnz	2f
+	leaq	__floorf_c(%rip), %rax
+2:	ret
+END(__floorf)
+weak_alias (__floorf, floorf)
+
+
+ENTRY(__floorf_sse41)
+	roundss	$1, %xmm0, %xmm0
+	ret
+END(__floorf_sse41)
diff --git a/sysdeps/x86_64/fpu/multiarch/s_rint-c.c b/sysdeps/x86_64/fpu/multiarch/s_rint-c.c
new file mode 100644
index 0000000..f29f45b
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_rint-c.c
@@ -0,0 +1,2 @@
+#define __rint __rint_c
+#include <sysdeps/ieee754/dbl-64/wordsize-64/s_rint.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/s_rint.S b/sysdeps/x86_64/fpu/multiarch/s_rint.S
new file mode 100644
index 0000000..75beffa
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_rint.S
@@ -0,0 +1,40 @@
+/* Copyright (C) 2011 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Ulrich Drepper <drepper@gmail.come>, 2011.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <machine/asm.h>
+#include <init-arch.h>
+
+
+ENTRY(__rint)
+	.type	__rint, @gnu_indirect_function
+	call	__get_cpu_features@plt
+	movq	%rax, %rdx
+	leaq	__rint_sse41(%rip), %rax
+	testl	$bit_SSE4_1, CPUID_OFFSET+index_SSE4_1(%rdx)
+	jnz	2f
+	leaq	__rint_c(%rip), %rax
+2:	ret
+END(__rint)
+weak_alias (__rint, rint)
+
+
+ENTRY(__rint_sse41)
+	roundsd	$4, %xmm0, %xmm0
+	ret
+END(__rint_sse41)
diff --git a/sysdeps/x86_64/fpu/multiarch/s_rintf-c.c b/sysdeps/x86_64/fpu/multiarch/s_rintf-c.c
new file mode 100644
index 0000000..30ed42a
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_rintf-c.c
@@ -0,0 +1,2 @@
+#define __rintf __rintf_c
+#include <sysdeps/ieee754/flt-32/s_rintf.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/s_rintf.S b/sysdeps/x86_64/fpu/multiarch/s_rintf.S
new file mode 100644
index 0000000..512d28c
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_rintf.S
@@ -0,0 +1,40 @@
+/* Copyright (C) 2011 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Ulrich Drepper <drepper@gmail.come>, 2011.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <machine/asm.h>
+#include <init-arch.h>
+
+
+ENTRY(__rintf)
+	.type	__rintf, @gnu_indirect_function
+	call	__get_cpu_features@plt
+	movq	%rax, %rdx
+	leaq	__rintf_sse41(%rip), %rax
+	testl	$bit_SSE4_1, CPUID_OFFSET+index_SSE4_1(%rdx)
+	jnz	2f
+	leaq	__rintf_c(%rip), %rax
+2:	ret
+END(__rintf)
+weak_alias (__rintf, rintf)
+
+
+ENTRY(__rintf_sse41)
+	roundss	$4, %xmm0, %xmm0
+	ret
+END(__rintf_sse41)
-- 
2.7.4