PowerPC: Arithmetic function optimizations for POWER

author Adhemerval Zanella <azanella@linux.vnet.ibm.com>

Fri, 11 Nov 2011 18:33:38 +0000 (13:33 -0500)

committer Ulrich Drepper <drepper@gmail.com>

Fri, 11 Nov 2011 18:33:38 +0000 (13:33 -0500)
author Adhemerval Zanella <azanella@linux.vnet.ibm.com>
Fri, 11 Nov 2011 18:33:38 +0000 (13:33 -0500)
committer Ulrich Drepper <drepper@gmail.com>
Fri, 11 Nov 2011 18:33:38 +0000 (13:33 -0500)
diff --git a/ChangeLog b/ChangeLog

index cfb381a..0fe09d0 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,14 @@
+2011-11-07  Adhemerval Zanella  <azanella@linux.vnet.ibm.com>
+
+       * sysdeps/powerpc/fpu/math_private.h: Using inline assembly version
+       of math functions ceil, trunc, floor, round, and sqrt, when
+       avaliable on the platform.
+       * sysdeps/powerpc/fpu/e_sqrt.c: Undefine __ieee754_sqrt to avoid
+       name clash.
+       * sysdeps/powerpc/fpu/e_sqrtf.c: Likewise.
+       * sysdeps/powerpc/powerpc64/fpu/e_sqrt.c: Likewise.
+       * sysdeps/powerpc/powerpc64/fpu/e_sqrtf.c: Likewise.
+
  2011-10-30  Marek Polacek  <mpolacek@redhat.com>
  
         * libio/wfileops.c (_IO_wfile_underflow_mmap): Remove unused variable.
diff --git a/sysdeps/powerpc/fpu/e_sqrt.c b/sysdeps/powerpc/fpu/e_sqrt.c

index f9ded25..d59bd08 100644 (file)
--- a/sysdeps/powerpc/fpu/e_sqrt.c
+++ b/sysdeps/powerpc/fpu/e_sqrt.c
@@ -154,6 +154,7 @@ __slow_ieee754_sqrt (double x)
    return f_wash (x);
  }
  
+#undef __ieee754_sqrt
  double
  __ieee754_sqrt (double x)
  {
diff --git a/sysdeps/powerpc/fpu/e_sqrtf.c b/sysdeps/powerpc/fpu/e_sqrtf.c

index 965faee..9c6b860 100644 (file)
--- a/sysdeps/powerpc/fpu/e_sqrtf.c
+++ b/sysdeps/powerpc/fpu/e_sqrtf.c
@@ -130,7 +130,7 @@ __slow_ieee754_sqrtf (float x)
    return f_washf (x);
  }
  
-
+#undef __ieee754_sqrtf
  float
  __ieee754_sqrtf (float x)
  {
diff --git a/sysdeps/powerpc/fpu/math_private.h b/sysdeps/powerpc/fpu/math_private.h

index 90021c6..c4dd217 100644 (file)
--- a/sysdeps/powerpc/fpu/math_private.h
+++ b/sysdeps/powerpc/fpu/math_private.h
@@ -1,5 +1,5 @@
  /* Private inline math functions for powerpc.
-   Copyright (C) 2006
+   Copyright (C) 2006, 2011
     Free Software Foundation, Inc.
     This file is part of the GNU C Library.
  
@@ -25,12 +25,145 @@
  #include <ldsodefs.h>
  #include <dl-procinfo.h>
  
+#include <math/math_private.h>
+
  # if __WORDSIZE == 64 || defined _ARCH_PWR4
  #  define __CPU_HAS_FSQRT 1
+
+#ifndef __ieee754_sqrt
+# define __ieee754_sqrt(x)             \
+  ({ double __z;                       \
+     __asm __volatile (                        \
+       "       fsqrt %0,%1\n"          \
+               : "=f" (__z)            \
+               : "f"(x));              \
+     __z; })
+#endif
+#ifndef __ieee754_sqrtf
+# define __ieee754_sqrtf(x)            \
+  ({ float __z;                                \
+     __asm __volatile (                        \
+       "       fsqrts %0,%1\n"         \
+               : "=f" (__z)            \
+               : "f"(x));              \
+     __z; })
+#endif
+
  # else
  #  define __CPU_HAS_FSQRT ((GLRO(dl_hwcap) & PPC_FEATURE_64) != 0)
+# endif        // __WORDSIZE == 64 || defined _ARCH_PWR4
+
+
+#if defined _ARCH_PWR5X
+
+# ifndef __round
+#  define __round(x)                   \
+    ({ double __z;                     \
+      __asm __volatile (               \
+       "       frin %0,%1\n"           \
+               : "=f" (__z)            \
+               : "f" (x));             \
+     __z; })
+# endif
+# ifndef __roundf
+#  define __roundf(x)                  \
+    ({ float __z;                      \
+     __asm __volatile (                        \
+       "       frin %0,%1\n"           \
+       "       frsp %0,%0\n"           \
+               : "=f" (__z)            \
+               : "f" (x));             \
+     __z; })
+# endif
+
+# ifndef __trunc
+#  define __trunc(x)                   \
+    ({ double __z;                     \
+     __asm __volatile (                        \
+       "       friz %0,%1\n"           \
+               : "=f" (__z)            \
+               : "f" (x));             \
+     __z; })
+# endif
+# ifndef __truncf
+#  define __truncf(x)                  \
+    ({ float __z;                      \
+     __asm __volatile (                        \
+       "       friz %0,%1\n"           \
+       "       frsp %0,%0\n"           \
+               : "=f" (__z)            \
+               : "f" (x));             \
+     __z; })
+# endif
+
+# ifndef __ceil
+#  define __ceil(x)                    \
+    ({ double __z;                     \
+     __asm __volatile (                        \
+       "       frip %0,%1\n"           \
+               : "=f" (__z)            \
+               : "f" (x));             \
+     __z; })
+# endif
+# ifndef __ceilf
+#  define __ceilf(x)                   \
+    ({ float __z;                      \
+     __asm __volatile (                        \
+       "       frip %0,%1\n"           \
+       "       frsp %0,%0\n"           \
+               : "=f" (__z)            \
+               : "f" (x));             \
+     __z; })
  # endif
  
+# ifndef __floor
+#  define __floor(x)                   \
+    ({ double __z;                     \
+     __asm __volatile (                        \
+       "       frim %0,%1\n"           \
+               : "=f" (__z)            \
+               : "f" (x));             \
+     __z; })
+# endif
+# ifndef __floorf
+#  define __floorf(x)                  \
+    ({ float __z;                      \
+     __asm __volatile (                        \
+       "       frim %0,%1\n"           \
+       "       frsp %0,%0\n"           \
+               : "=f" (__z)            \
+               : "f" (x));             \
+     __z; })
+# endif
+
+#endif /* defined _ARCH_PWR5X */
+
+
+#if defined _ARCH_PWR6
+
+# ifndef __copysign
+#  define __copysign(x, y)             \
+    ({ double __z;                     \
+     __asm __volatile (                        \
+       "       fcpsgn %0,%1,%2\n"      \
+               : "=f" (__z)            \
+               : "f" (y), "f" (x));    \
+     __z; })
+# endif
+# ifndef __copysignf
+#  define __copysignf(x, y)            \
+    ({ float __z;                      \
+     __asm __volatile (                        \
+       "       fcpsgn %0,%1,%2\n"      \
+       "       frsp %0,%0\n"           \
+               : "=f" (__z)            \
+               : "f" (y), "f" (x));    \
+     __z; })
+# endif
+
+#endif /* defined _ARCH_PWR6 */
+
+
  # ifndef __LIBC_INTERNAL_MATH_INLINES
  extern double __slow_ieee754_sqrt (double);
  __inline double
@@ -78,6 +211,4 @@ __ieee754_sqrtf (float __x)
  }
  #endif /* __LIBC_INTERNAL_MATH_INLINES */
  
-#include <math/math_private.h>
-
  #endif /* _PPC_MATH_PRIVATE_H_ */
diff --git a/sysdeps/powerpc/powerpc64/fpu/e_sqrt.c b/sysdeps/powerpc/powerpc64/fpu/e_sqrt.c

index 314abba..66d04ce 100644 (file)
--- a/sysdeps/powerpc/powerpc64/fpu/e_sqrt.c
+++ b/sysdeps/powerpc/powerpc64/fpu/e_sqrt.c
@@ -20,6 +20,7 @@
  #include <math.h>
  #include <math_private.h>
  
+#undef __ieee754_sqrt
  double
  __ieee754_sqrt (double x)
  {
diff --git a/sysdeps/powerpc/powerpc64/fpu/e_sqrtf.c b/sysdeps/powerpc/powerpc64/fpu/e_sqrtf.c

index 7157214..847a2e4 100644 (file)
--- a/sysdeps/powerpc/powerpc64/fpu/e_sqrtf.c
+++ b/sysdeps/powerpc/powerpc64/fpu/e_sqrtf.c
@@ -20,6 +20,7 @@
  #include <math.h>
  #include <math_private.h>
  
+#undef __ieee754_sqrtf
  float
  __ieee754_sqrtf (float x)
  {
author	Adhemerval Zanella <azanella@linux.vnet.ibm.com>
	Fri, 11 Nov 2011 18:33:38 +0000 (13:33 -0500)
committer	Ulrich Drepper <drepper@gmail.com>
	Fri, 11 Nov 2011 18:33:38 +0000 (13:33 -0500)
ChangeLog		patch \| blob \| history
sysdeps/powerpc/fpu/e_sqrt.c		patch \| blob \| history
sysdeps/powerpc/fpu/e_sqrtf.c		patch \| blob \| history
sysdeps/powerpc/fpu/math_private.h		patch \| blob \| history
sysdeps/powerpc/powerpc64/fpu/e_sqrt.c		patch \| blob \| history
sysdeps/powerpc/powerpc64/fpu/e_sqrtf.c		patch \| blob \| history