PowerPC: Arithmetic function optimizations for POWER
authorAdhemerval Zanella <azanella@linux.vnet.ibm.com>
Fri, 11 Nov 2011 18:33:38 +0000 (13:33 -0500)
committerUlrich Drepper <drepper@gmail.com>
Fri, 11 Nov 2011 18:33:38 +0000 (13:33 -0500)
ChangeLog
sysdeps/powerpc/fpu/e_sqrt.c
sysdeps/powerpc/fpu/e_sqrtf.c
sysdeps/powerpc/fpu/math_private.h
sysdeps/powerpc/powerpc64/fpu/e_sqrt.c
sysdeps/powerpc/powerpc64/fpu/e_sqrtf.c

index cfb381a..0fe09d0 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,14 @@
+2011-11-07  Adhemerval Zanella  <azanella@linux.vnet.ibm.com>
+
+       * sysdeps/powerpc/fpu/math_private.h: Using inline assembly version
+       of math functions ceil, trunc, floor, round, and sqrt, when
+       avaliable on the platform.
+       * sysdeps/powerpc/fpu/e_sqrt.c: Undefine __ieee754_sqrt to avoid
+       name clash.
+       * sysdeps/powerpc/fpu/e_sqrtf.c: Likewise.
+       * sysdeps/powerpc/powerpc64/fpu/e_sqrt.c: Likewise.
+       * sysdeps/powerpc/powerpc64/fpu/e_sqrtf.c: Likewise.
+
 2011-10-30  Marek Polacek  <mpolacek@redhat.com>
 
        * libio/wfileops.c (_IO_wfile_underflow_mmap): Remove unused variable.
index f9ded25..d59bd08 100644 (file)
@@ -154,6 +154,7 @@ __slow_ieee754_sqrt (double x)
   return f_wash (x);
 }
 
+#undef __ieee754_sqrt
 double
 __ieee754_sqrt (double x)
 {
index 965faee..9c6b860 100644 (file)
@@ -130,7 +130,7 @@ __slow_ieee754_sqrtf (float x)
   return f_washf (x);
 }
 
-
+#undef __ieee754_sqrtf
 float
 __ieee754_sqrtf (float x)
 {
index 90021c6..c4dd217 100644 (file)
@@ -1,5 +1,5 @@
 /* Private inline math functions for powerpc.
-   Copyright (C) 2006
+   Copyright (C) 2006, 2011
    Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
 #include <ldsodefs.h>
 #include <dl-procinfo.h>
 
+#include <math/math_private.h>
+
 # if __WORDSIZE == 64 || defined _ARCH_PWR4
 #  define __CPU_HAS_FSQRT 1
+
+#ifndef __ieee754_sqrt
+# define __ieee754_sqrt(x)             \
+  ({ double __z;                       \
+     __asm __volatile (                        \
+       "       fsqrt %0,%1\n"          \
+               : "=f" (__z)            \
+               : "f"(x));              \
+     __z; })
+#endif
+#ifndef __ieee754_sqrtf
+# define __ieee754_sqrtf(x)            \
+  ({ float __z;                                \
+     __asm __volatile (                        \
+       "       fsqrts %0,%1\n"         \
+               : "=f" (__z)            \
+               : "f"(x));              \
+     __z; })
+#endif
+
 # else
 #  define __CPU_HAS_FSQRT ((GLRO(dl_hwcap) & PPC_FEATURE_64) != 0)
+# endif        // __WORDSIZE == 64 || defined _ARCH_PWR4
+
+
+#if defined _ARCH_PWR5X
+
+# ifndef __round
+#  define __round(x)                   \
+    ({ double __z;                     \
+      __asm __volatile (               \
+       "       frin %0,%1\n"           \
+               : "=f" (__z)            \
+               : "f" (x));             \
+     __z; })
+# endif
+# ifndef __roundf
+#  define __roundf(x)                  \
+    ({ float __z;                      \
+     __asm __volatile (                        \
+       "       frin %0,%1\n"           \
+       "       frsp %0,%0\n"           \
+               : "=f" (__z)            \
+               : "f" (x));             \
+     __z; })
+# endif
+
+# ifndef __trunc
+#  define __trunc(x)                   \
+    ({ double __z;                     \
+     __asm __volatile (                        \
+       "       friz %0,%1\n"           \
+               : "=f" (__z)            \
+               : "f" (x));             \
+     __z; })
+# endif
+# ifndef __truncf
+#  define __truncf(x)                  \
+    ({ float __z;                      \
+     __asm __volatile (                        \
+       "       friz %0,%1\n"           \
+       "       frsp %0,%0\n"           \
+               : "=f" (__z)            \
+               : "f" (x));             \
+     __z; })
+# endif
+
+# ifndef __ceil
+#  define __ceil(x)                    \
+    ({ double __z;                     \
+     __asm __volatile (                        \
+       "       frip %0,%1\n"           \
+               : "=f" (__z)            \
+               : "f" (x));             \
+     __z; })
+# endif
+# ifndef __ceilf
+#  define __ceilf(x)                   \
+    ({ float __z;                      \
+     __asm __volatile (                        \
+       "       frip %0,%1\n"           \
+       "       frsp %0,%0\n"           \
+               : "=f" (__z)            \
+               : "f" (x));             \
+     __z; })
 # endif
 
+# ifndef __floor
+#  define __floor(x)                   \
+    ({ double __z;                     \
+     __asm __volatile (                        \
+       "       frim %0,%1\n"           \
+               : "=f" (__z)            \
+               : "f" (x));             \
+     __z; })
+# endif
+# ifndef __floorf
+#  define __floorf(x)                  \
+    ({ float __z;                      \
+     __asm __volatile (                        \
+       "       frim %0,%1\n"           \
+       "       frsp %0,%0\n"           \
+               : "=f" (__z)            \
+               : "f" (x));             \
+     __z; })
+# endif
+
+#endif /* defined _ARCH_PWR5X */
+
+
+#if defined _ARCH_PWR6
+
+# ifndef __copysign
+#  define __copysign(x, y)             \
+    ({ double __z;                     \
+     __asm __volatile (                        \
+       "       fcpsgn %0,%1,%2\n"      \
+               : "=f" (__z)            \
+               : "f" (y), "f" (x));    \
+     __z; })
+# endif
+# ifndef __copysignf
+#  define __copysignf(x, y)            \
+    ({ float __z;                      \
+     __asm __volatile (                        \
+       "       fcpsgn %0,%1,%2\n"      \
+       "       frsp %0,%0\n"           \
+               : "=f" (__z)            \
+               : "f" (y), "f" (x));    \
+     __z; })
+# endif
+
+#endif /* defined _ARCH_PWR6 */
+
+
 # ifndef __LIBC_INTERNAL_MATH_INLINES
 extern double __slow_ieee754_sqrt (double);
 __inline double
@@ -78,6 +211,4 @@ __ieee754_sqrtf (float __x)
 }
 #endif /* __LIBC_INTERNAL_MATH_INLINES */
 
-#include <math/math_private.h>
-
 #endif /* _PPC_MATH_PRIVATE_H_ */
index 314abba..66d04ce 100644 (file)
@@ -20,6 +20,7 @@
 #include <math.h>
 #include <math_private.h>
 
+#undef __ieee754_sqrt
 double
 __ieee754_sqrt (double x)
 {
index 7157214..847a2e4 100644 (file)
@@ -20,6 +20,7 @@
 #include <math.h>
 #include <math_private.h>
 
+#undef __ieee754_sqrtf
 float
 __ieee754_sqrtf (float x)
 {