Provide internal optimizations on x86-64 with SSE4.1

author Ulrich Drepper <drepper@gmail.com>

Mon, 17 Oct 2011 15:23:40 +0000 (11:23 -0400)

committer Ulrich Drepper <drepper@gmail.com>

Mon, 17 Oct 2011 15:23:40 +0000 (11:23 -0400)
author Ulrich Drepper <drepper@gmail.com>
Mon, 17 Oct 2011 15:23:40 +0000 (11:23 -0400)
committer Ulrich Drepper <drepper@gmail.com>
Mon, 17 Oct 2011 15:23:40 +0000 (11:23 -0400)
diff --git a/ChangeLog b/ChangeLog

index 289475c..af72e63 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,15 @@
  2011-10-17  Ulrich Drepper  <drepper@gmail.com>
  
+       * sysdeps/x86_64/fpu/bits/mathinline.h: Don't define inlines if
+       __NO_MATH_INLINES is defined.  Cleanups.
+
+       * sysdeps/x86_64/fpu/math_private.h: Define __rint, __rintf, __floor,
+       and __floorf is target has SSE4.1.
+       * sysdeps/x86_64/fpu/multiarch/s_floor-c.c: Undef first.
+       * sysdeps/x86_64/fpu/multiarch/s_floorf-c.: Likewise.
+       * sysdeps/x86_64/fpu/multiarch/s_rint-c.c: Likewise.
+       * sysdeps/x86_64/fpu/multiarch/s_rintf-c.c: Likewise.
+
         * sysdeps/x86_64/fpu/bits/mathinline.h (floor): Use correct function
         name.
         (floorf): Likewise.
diff --git a/sysdeps/x86_64/fpu/bits/mathinline.h b/sysdeps/x86_64/fpu/bits/mathinline.h

index 210bef8..721f6e4 100644 (file)
--- a/sysdeps/x86_64/fpu/bits/mathinline.h
+++ b/sysdeps/x86_64/fpu/bits/mathinline.h
@@ -30,34 +30,35 @@
  #endif
  
  
-#if defined __GNUC__ && __GNUC__ >= 2
-# ifdef __USE_ISOC99
+/* The gcc, version 2.7 or below, has problems with all this inlining
+   code.  So disable it for this version of the compiler.  */
+#if __GNUC_PREREQ (2, 8) && defined __USE_ISOC99
  __BEGIN_NAMESPACE_C99
  
  /* Test for negative number.  Used in the signbit() macro.  */
  __MATH_INLINE int
  __NTH (__signbitf (float __x))
  {
-#  if __WORDSIZE == 32
+# if __WORDSIZE == 32
    __extension__ union { float __f; int __i; } __u = { __f: __x };
    return __u.__i < 0;
-#  else
+# else
    int __m;
    __asm ("pmovmskb %1, %0" : "=r" (__m) : "x" (__x));
    return __m & 0x8;
-#  endif
+# endif
  }
  __MATH_INLINE int
  __NTH (__signbit (double __x))
  {
-#  if __WORDSIZE == 32
+# if __WORDSIZE == 32
    __extension__ union { double __d; int __i[2]; } __u = { __d: __x };
    return __u.__i[1] < 0;
-#  else
+# else
    int __m;
    __asm ("pmovmskb %1, %0" : "=r" (__m) : "x" (__x));
    return __m & 0x80;
-#  endif
+# endif
  }
  __MATH_INLINE int
  __NTH (__signbitl (long double __x))
@@ -66,6 +67,16 @@ __NTH (__signbitl (long double __x))
    return (__u.__i[2] & 0x8000) != 0;
  }
  
+__END_NAMESPACE_C99
+#endif
+
+
+#if (__GNUC_PREREQ (2, 8) && !defined __NO_MATH_INLINES \
+     && defined __OPTIMIZE__)
+
+# ifdef __USE_ISOC99
+__BEGIN_NAMESPACE_C99
+
  /* Round to nearest integer.  */
  #  if __WORDSIZE == 64 || defined __SSE_MATH__
  __MATH_INLINE long int
@@ -100,14 +111,10 @@ __NTH (llrint (double __x))
    __asm ("cvtsd2si %1, %0" : "=r" (__res) : "xm" (__x));
    return __res;
  }
-
-__END_NAMESPACE_C99
  #  endif
  
  #  if defined __FINITE_MATH_ONLY__ && __FINITE_MATH_ONLY__ > 0 \
        && (__WORDSIZE == 64 || defined __SSE2_MATH__)
-__BEGIN_NAMESPACE_C99
-
  /* Determine maximum of two values.  */
  __MATH_INLINE float
  __NTH (fmaxf (float __x, float __y))
diff --git a/sysdeps/x86_64/fpu/math_private.h b/sysdeps/x86_64/fpu/math_private.h

index 523ec54..71eb416 100644 (file)
--- a/sysdeps/x86_64/fpu/math_private.h
+++ b/sysdeps/x86_64/fpu/math_private.h
@@ -90,3 +90,31 @@ do {                                                         \
    ({ long double __res;                                                              \
       asm ("fsqrt" : "=t" (__res) : "0" ((long double) d));                   \
       __res; })
+
+#ifdef __SSE4_1__
+# ifndef __rint
+#  define __rint(d) \
+  ({ double __res; \
+     asm ("roundsd $4, %1, %0" : "=x" (__res) : "x" ((double) d));           \
+     __res; })
+# endif
+# ifndef __rintf
+#  define __rintf(d) \
+  ({ float __res; \
+     asm ("roundss $4, %1, %0" : "=x" (__res) : "x" ((float) d));            \
+     __res; })
+# endif
+
+# ifndef __floor
+#  define __floor(d) \
+  ({ double __res; \
+     asm ("roundsd $1, %1, %0" : "=x" (__res) : "x" ((double) d));           \
+     __res; })
+# endif
+# ifndef __floorf
+#  define __floorf(d) \
+  ({ float __res; \
+     asm ("roundss $1, %1, %0" : "=x" (__res) : "x" ((float) d));            \
+     __res; })
+# endif
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/s_floor-c.c b/sysdeps/x86_64/fpu/multiarch/s_floor-c.c

index 8b8c31d..68733b6 100644 (file)
--- a/sysdeps/x86_64/fpu/multiarch/s_floor-c.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_floor-c.c
@@ -1,2 +1,3 @@
+#undef __floor
  #define __floor __floor_c
  #include <sysdeps/ieee754/dbl-64/wordsize-64/s_floor.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/s_floorf-c.c b/sysdeps/x86_64/fpu/multiarch/s_floorf-c.c

index 3f36786..2386362 100644 (file)
--- a/sysdeps/x86_64/fpu/multiarch/s_floorf-c.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_floorf-c.c
@@ -1,2 +1,3 @@
+#undef __floorf
  #define __floorf __floorf_c
  #include <sysdeps/ieee754/flt-32/s_floorf.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/s_rint-c.c b/sysdeps/x86_64/fpu/multiarch/s_rint-c.c

index f29f45b..162a630 100644 (file)
--- a/sysdeps/x86_64/fpu/multiarch/s_rint-c.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_rint-c.c
@@ -1,2 +1,3 @@
+#undef __rint
  #define __rint __rint_c
  #include <sysdeps/ieee754/dbl-64/wordsize-64/s_rint.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/s_rintf-c.c b/sysdeps/x86_64/fpu/multiarch/s_rintf-c.c

index 30ed42a..8505249 100644 (file)
--- a/sysdeps/x86_64/fpu/multiarch/s_rintf-c.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_rintf-c.c
@@ -1,2 +1,3 @@
+#undef __rintf
  #define __rintf __rintf_c
  #include <sysdeps/ieee754/flt-32/s_rintf.c>
author	Ulrich Drepper <drepper@gmail.com>
	Mon, 17 Oct 2011 15:23:40 +0000 (11:23 -0400)
committer	Ulrich Drepper <drepper@gmail.com>
	Mon, 17 Oct 2011 15:23:40 +0000 (11:23 -0400)
ChangeLog		patch \| blob \| history
sysdeps/x86_64/fpu/bits/mathinline.h		patch \| blob \| history
sysdeps/x86_64/fpu/math_private.h		patch \| blob \| history
sysdeps/x86_64/fpu/multiarch/s_floor-c.c		patch \| blob \| history
sysdeps/x86_64/fpu/multiarch/s_floorf-c.c		patch \| blob \| history
sysdeps/x86_64/fpu/multiarch/s_rint-c.c		patch \| blob \| history
sysdeps/x86_64/fpu/multiarch/s_rintf-c.c		patch \| blob \| history