From af968f62f24c5c0ef4e7e5ab41acae946908c112 Mon Sep 17 00:00:00 2001
From: Ulrich Drepper <drepper@gmail.com>
Date: Mon, 24 Oct 2011 20:19:17 -0400
Subject: [PATCH] Optimize accurate 64-bit routines for FMA4 on x86-64

---
 ChangeLog                                          |  57 ++++++
 config.make.in                                     |   1 +
 configure                                          |   2 +
 configure.in                                       |   1 +
 math/Makefile                                      |   2 +-
 sysdeps/ieee754/dbl-64/dosincos.c                  |  23 ++-
 sysdeps/ieee754/dbl-64/e_asin.c                    |   4 +
 sysdeps/ieee754/dbl-64/e_atan2.c                   |  13 +-
 sysdeps/ieee754/dbl-64/e_exp.c                     |   2 +
 sysdeps/ieee754/dbl-64/e_log.c                     |   2 +
 sysdeps/ieee754/dbl-64/e_pow.c                     |   2 +
 sysdeps/ieee754/dbl-64/mpa.c                       |  58 +++---
 sysdeps/ieee754/dbl-64/mpa.h                       |  27 ++-
 sysdeps/ieee754/dbl-64/mpsqrt.c                    |  15 +-
 sysdeps/ieee754/dbl-64/s_atan.c                    |  16 +-
 sysdeps/ieee754/dbl-64/s_sin.c                     | 210 +++++++++++----------
 sysdeps/ieee754/dbl-64/{sincos.tbl => sincostab.c} |   9 +-
 sysdeps/x86_64/fpu/multiarch/Makefile              |  32 ++++
 sysdeps/x86_64/fpu/multiarch/brandred-fma4.c       |   3 +
 sysdeps/x86_64/fpu/multiarch/doasin-fma4.c         |   3 +
 sysdeps/x86_64/fpu/multiarch/dosincos-fma4.c       |   5 +
 sysdeps/x86_64/fpu/multiarch/e_asin-fma4.c         |  10 +
 sysdeps/x86_64/fpu/multiarch/e_asin.c              |  23 +++
 sysdeps/x86_64/fpu/multiarch/e_atan2-fma4.c        |   9 +
 sysdeps/x86_64/fpu/multiarch/e_atan2.c             |  16 ++
 sysdeps/x86_64/fpu/multiarch/e_exp-fma4.c          |   5 +
 sysdeps/x86_64/fpu/multiarch/e_exp.c               |  15 ++
 sysdeps/x86_64/fpu/multiarch/e_log-fma4.c          |   7 +
 sysdeps/x86_64/fpu/multiarch/e_log.c               |  15 ++
 sysdeps/x86_64/fpu/multiarch/e_pow-fma4.c          |   5 +
 sysdeps/x86_64/fpu/multiarch/e_pow.c               |  15 ++
 sysdeps/x86_64/fpu/multiarch/halfulp-fma4.c        |   3 +
 sysdeps/x86_64/fpu/multiarch/mpa-fma4.c            |  10 +
 sysdeps/x86_64/fpu/multiarch/mpatan-fma4.c         |   8 +
 sysdeps/x86_64/fpu/multiarch/mpatan2-fma4.c        |   8 +
 sysdeps/x86_64/fpu/multiarch/mpexp-fma4.c          |   7 +
 sysdeps/x86_64/fpu/multiarch/mplog-fma4.c          |   7 +
 sysdeps/x86_64/fpu/multiarch/mpsqrt-fma4.c         |   6 +
 sysdeps/x86_64/fpu/multiarch/mptan-fma4.c          |   6 +
 sysdeps/x86_64/fpu/multiarch/s_atan-fma4.c         |   8 +
 sysdeps/x86_64/fpu/multiarch/s_atan.c              |  14 ++
 sysdeps/x86_64/fpu/multiarch/s_sin-fma4.c          |  11 ++
 sysdeps/x86_64/fpu/multiarch/s_sin.c               |  22 +++
 sysdeps/x86_64/fpu/multiarch/s_tan-fma4.c          |   9 +
 sysdeps/x86_64/fpu/multiarch/s_tan.c               |  14 ++
 sysdeps/x86_64/fpu/multiarch/sincos32-fma4.c       |  14 ++
 sysdeps/x86_64/fpu/multiarch/slowexp-fma4.c        |   8 +
 sysdeps/x86_64/fpu/multiarch/slowpow-fma4.c        |  10 +
 48 files changed, 595 insertions(+), 177 deletions(-)
 rename sysdeps/ieee754/dbl-64/{sincos.tbl => sincostab.c} (99%)
 create mode 100644 sysdeps/x86_64/fpu/multiarch/brandred-fma4.c
 create mode 100644 sysdeps/x86_64/fpu/multiarch/doasin-fma4.c
 create mode 100644 sysdeps/x86_64/fpu/multiarch/dosincos-fma4.c
 create mode 100644 sysdeps/x86_64/fpu/multiarch/e_asin-fma4.c
 create mode 100644 sysdeps/x86_64/fpu/multiarch/e_asin.c
 create mode 100644 sysdeps/x86_64/fpu/multiarch/e_atan2-fma4.c
 create mode 100644 sysdeps/x86_64/fpu/multiarch/e_atan2.c
 create mode 100644 sysdeps/x86_64/fpu/multiarch/e_exp-fma4.c
 create mode 100644 sysdeps/x86_64/fpu/multiarch/e_exp.c
 create mode 100644 sysdeps/x86_64/fpu/multiarch/e_log-fma4.c
 create mode 100644 sysdeps/x86_64/fpu/multiarch/e_log.c
 create mode 100644 sysdeps/x86_64/fpu/multiarch/e_pow-fma4.c
 create mode 100644 sysdeps/x86_64/fpu/multiarch/e_pow.c
 create mode 100644 sysdeps/x86_64/fpu/multiarch/halfulp-fma4.c
 create mode 100644 sysdeps/x86_64/fpu/multiarch/mpa-fma4.c
 create mode 100644 sysdeps/x86_64/fpu/multiarch/mpatan-fma4.c
 create mode 100644 sysdeps/x86_64/fpu/multiarch/mpatan2-fma4.c
 create mode 100644 sysdeps/x86_64/fpu/multiarch/mpexp-fma4.c
 create mode 100644 sysdeps/x86_64/fpu/multiarch/mplog-fma4.c
 create mode 100644 sysdeps/x86_64/fpu/multiarch/mpsqrt-fma4.c
 create mode 100644 sysdeps/x86_64/fpu/multiarch/mptan-fma4.c
 create mode 100644 sysdeps/x86_64/fpu/multiarch/s_atan-fma4.c
 create mode 100644 sysdeps/x86_64/fpu/multiarch/s_atan.c
 create mode 100644 sysdeps/x86_64/fpu/multiarch/s_sin-fma4.c
 create mode 100644 sysdeps/x86_64/fpu/multiarch/s_sin.c
 create mode 100644 sysdeps/x86_64/fpu/multiarch/s_tan-fma4.c
 create mode 100644 sysdeps/x86_64/fpu/multiarch/s_tan.c
 create mode 100644 sysdeps/x86_64/fpu/multiarch/sincos32-fma4.c
 create mode 100644 sysdeps/x86_64/fpu/multiarch/slowexp-fma4.c
 create mode 100644 sysdeps/x86_64/fpu/multiarch/slowpow-fma4.c

diff --git a/ChangeLog b/ChangeLog
index d4052a6..a2e155a 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,62 @@
 2011-10-24  Ulrich Drepper  <drepper@gmail.com>
 
+	* config.make.in: Add have-mfma4 entry.
+	* configure.in: Substitute libc_cv_cc_fma4.
+	* math/Makefile (dbl-only-routines): Add sincostab.
+	* sysdeps/ieee754/dbl-64/dosincos.c: Don't include sincos.tbl.
+	Use __sincostab not sincos.
+	* sysdeps/ieee754/dbl-64/e_asin.c: Don't define aliases when function
+	name is a macro.
+	* sysdeps/ieee754/dbl-64/e_exp.c: Likewise.
+	* sysdeps/ieee754/dbl-64/e_log.c: Likewise.
+	* sysdeps/ieee754/dbl-64/e_pow.c: Likewise.
+	* sysdeps/ieee754/dbl-64/e_atan2.c: Likewise.  Define singArctan2
+	using __copysign.
+	* sysdeps/ieee754/dbl-64/mpa.c: Don't export __acr.  Don't define
+	__cr and __cpymn.  Define __cpy unless NO___CPY is defined.  Define
+	norm, denorm, and __mp_dbl unless NO___MP_DBL is defined.
+	* sysdeps/ieee754/dbl-64/mpa.h: Don't declare __acr, __cr, __cpymn,
+	and __inv.
+	* sysdeps/ieee754/dbl-64/mpsqrt.c: Make fastiroot static.
+	* sysdeps/ieee754/dbl-64/s_atan.c: Define __signArctan using
+	__copysign.
+	* sysdeps/ieee754/dbl-64/s_sin.c: Use __sincostab not sincos.  Don't
+	define aliases when function name is a macro.
+	* sysdeps/ieee754/dbl-64/sincostab.c: Renamed from
+	sysdeps/ieee754/dbl-64/sincos.tbl.
+	* sysdeps/x86_64/fpu/multiarch/Makefile: Add entries to build
+	fma4-enabled routines.
+	* sysdeps/x86_64/fpu/multiarch/brandred-fma4.c: New file.
+	* sysdeps/x86_64/fpu/multiarch/doasin-fma4.c: New file.
+	* sysdeps/x86_64/fpu/multiarch/dosincos-fma4.c: New file.
+	* sysdeps/x86_64/fpu/multiarch/e_asin-fma4.c: New file.
+	* sysdeps/x86_64/fpu/multiarch/e_asin.c: New file.
+	* sysdeps/x86_64/fpu/multiarch/e_atan2-fma4.c: New file.
+	* sysdeps/x86_64/fpu/multiarch/e_atan2.c: New file.
+	* sysdeps/x86_64/fpu/multiarch/e_exp-fma4.c: New file.
+	* sysdeps/x86_64/fpu/multiarch/e_exp.c: New file.
+	* sysdeps/x86_64/fpu/multiarch/e_log-fma4.c: New file.
+	* sysdeps/x86_64/fpu/multiarch/e_log.c: New file.
+	* sysdeps/x86_64/fpu/multiarch/e_pow-fma4.c: New file.
+	* sysdeps/x86_64/fpu/multiarch/e_pow.c: New file.
+	* sysdeps/x86_64/fpu/multiarch/halfulp-fma4.c: New file.
+	* sysdeps/x86_64/fpu/multiarch/mpa-fma4.c: New file.
+	* sysdeps/x86_64/fpu/multiarch/mpatan-fma4.c: New file.
+	* sysdeps/x86_64/fpu/multiarch/mpatan2-fma4.c: New file.
+	* sysdeps/x86_64/fpu/multiarch/mpexp-fma4.c: New file.
+	* sysdeps/x86_64/fpu/multiarch/mplog-fma4.c: New file.
+	* sysdeps/x86_64/fpu/multiarch/mpsqrt-fma4.c: New file.
+	* sysdeps/x86_64/fpu/multiarch/mptan-fma4.c: New file.
+	* sysdeps/x86_64/fpu/multiarch/s_atan-fma4.c: New file.
+	* sysdeps/x86_64/fpu/multiarch/s_atan.c: New file.
+	* sysdeps/x86_64/fpu/multiarch/s_sin-fma4.c: New file.
+	* sysdeps/x86_64/fpu/multiarch/s_sin.c: New file.
+	* sysdeps/x86_64/fpu/multiarch/s_tan-fma4.c: New file.
+	* sysdeps/x86_64/fpu/multiarch/s_tan.c: New file.
+	* sysdeps/x86_64/fpu/multiarch/sincos32-fma4.c: New file.
+	* sysdeps/x86_64/fpu/multiarch/slowexp-fma4.c: New file.
+	* sysdeps/x86_64/fpu/multiarch/slowpow-fma4.c: New file.
+
 	* sysdeps/ieee754/dbl-64/doasin.c: Adjust for DLA_FMA -> DLA_FMS
 	rename.
 	* sysdeps/ieee754/dbl-64/dosincos.c: Likewise.
diff --git a/config.make.in b/config.make.in
index 2181d05..d2baf6d 100644
--- a/config.make.in
+++ b/config.make.in
@@ -59,6 +59,7 @@ have-cpp-asm-debuginfo = @libc_cv_cpp_asm_debuginfo@
 enable-check-abi = @enable_check_abi@
 have-forced-unwind = @libc_cv_forced_unwind@
 have-fpie = @libc_cv_fpie@
+have-mfma4 = @libc_cv_cc_fma4@
 gnu89-inline-CFLAGS = @gnu89_inline@
 have-ssp = @libc_cv_ssp@
 have-selinux = @have_selinux@
diff --git a/configure b/configure
index ec1d651..55cb008 100755
--- a/configure
+++ b/configure
@@ -623,6 +623,7 @@ elf
 ldd_rewrite_script
 use_ldconfig
 libc_cv_as_i686
+libc_cv_cc_fma4
 libc_cv_cc_novzeroupper
 libc_cv_cc_avx
 libc_cv_cc_sse4
@@ -7944,6 +7945,7 @@ fi
 
 
 
+
 if test $elf = yes; then
   $as_echo "#define HAVE_ELF 1" >>confdefs.h
 
diff --git a/configure.in b/configure.in
index 6977fe1..9678cbe 100644
--- a/configure.in
+++ b/configure.in
@@ -2339,6 +2339,7 @@ AC_SUBST(libc_cv_cpp_asm_debuginfo)
 AC_SUBST(libc_cv_cc_sse4)
 AC_SUBST(libc_cv_cc_avx)
 AC_SUBST(libc_cv_cc_novzeroupper)
+AC_SUBST(libc_cv_cc_fma4)
 AC_SUBST(libc_cv_as_i686)
 
 AC_SUBST(use_ldconfig)
diff --git a/math/Makefile b/math/Makefile
index 431eb5a..41340da 100644
--- a/math/Makefile
+++ b/math/Makefile
@@ -66,7 +66,7 @@ include ../Makeconfig
 
 dbl-only-routines := branred doasin dosincos halfulp mpa mpatan2	\
 		     mpatan mpexp mplog mpsqrt mptan sincos32 slowexp	\
-		     slowpow
+		     slowpow sincostab
 libm-routines = $(strip $(libm-support) $(libm-calls) \
 			$(patsubst %_rf,%f_r,$(libm-calls:=f))	\
 			$(long-m-$(long-double-fcts))) \
diff --git a/sysdeps/ieee754/dbl-64/dosincos.c b/sysdeps/ieee754/dbl-64/dosincos.c
index d5c6a14..712d585 100644
--- a/sysdeps/ieee754/dbl-64/dosincos.c
+++ b/sysdeps/ieee754/dbl-64/dosincos.c
@@ -35,11 +35,16 @@
 
 #include "endian.h"
 #include "mydefs.h"
-#include "sincos.tbl"
 #include <dla.h>
 #include "dosincos.h"
 #include "math_private.h"
 
+extern const union
+{
+  int4 i[880];
+  double x[440];
+} __sincostab attribute_hidden;
+
 /***********************************************************************/
 /* Routine receive Double-Length number (x+dx) and computing sin(x+dx) */
 /* as Double-Length number and store it at array v .It computes it by  */
@@ -66,10 +71,10 @@ void __dubsin(double x, double dx, double v[]) {
   dd=(x-d)+dx;
 	 /* sin(x+dx)=sin(Xi+t)=sin(Xi)*cos(t) + cos(Xi)sin(t) where t ->0 */
   MUL2(d,dd,d,dd,d2,dd2,p,hx,tx,hy,ty,q,c,cc);
-  sn=sincos.x[k];     /*                                  */
-  ssn=sincos.x[k+1];  /*      sin(Xi) and cos(Xi)         */
-  cs=sincos.x[k+2];   /*                                  */
-  ccs=sincos.x[k+3];  /*                                  */
+  sn=__sincostab.x[k];     /*                                  */
+  ssn=__sincostab.x[k+1];  /*      sin(Xi) and cos(Xi)         */
+  cs=__sincostab.x[k+2];   /*                                  */
+  ccs=__sincostab.x[k+3];  /*                                  */
   MUL2(d2,dd2,s7.x,ss7.x,ds,dss,p,hx,tx,hy,ty,q,c,cc);  /* Taylor    */
   ADD2(ds,dss,s5.x,ss5.x,ds,dss,r,s);
   MUL2(d2,dd2,ds,dss,ds,dss,p,hx,tx,hy,ty,q,c,cc);      /* series    */
@@ -118,10 +123,10 @@ void __dubcos(double x, double dx, double v[]) {
   d=x+dx;
   dd=(x-d)+dx;  /* cos(x+dx)=cos(Xi+t)=cos(Xi)cos(t) - sin(Xi)sin(t) */
   MUL2(d,dd,d,dd,d2,dd2,p,hx,tx,hy,ty,q,c,cc);
-  sn=sincos.x[k];     /*                                  */
-  ssn=sincos.x[k+1];  /*      sin(Xi) and cos(Xi)         */
-  cs=sincos.x[k+2];   /*                                  */
-  ccs=sincos.x[k+3];  /*                                  */
+  sn=__sincostab.x[k];     /*                                  */
+  ssn=__sincostab.x[k+1];  /*      sin(Xi) and cos(Xi)         */
+  cs=__sincostab.x[k+2];   /*                                  */
+  ccs=__sincostab.x[k+3];  /*                                  */
   MUL2(d2,dd2,s7.x,ss7.x,ds,dss,p,hx,tx,hy,ty,q,c,cc);
   ADD2(ds,dss,s5.x,ss5.x,ds,dss,r,s);
   MUL2(d2,dd2,ds,dss,ds,dss,p,hx,tx,hy,ty,q,c,cc);
diff --git a/sysdeps/ieee754/dbl-64/e_asin.c b/sysdeps/ieee754/dbl-64/e_asin.c
index 02efb7a..cd4cc2e 100644
--- a/sysdeps/ieee754/dbl-64/e_asin.c
+++ b/sysdeps/ieee754/dbl-64/e_asin.c
@@ -324,7 +324,9 @@ double __ieee754_asin(double x){
     return u.x/v.x;  /* NaN */
  }
 }
+#ifndef __ieee754_asin
 strong_alias (__ieee754_asin, __asin_finite)
+#endif
 
 /*******************************************************************/
 /*                                                                 */
@@ -636,4 +638,6 @@ double __ieee754_acos(double x)
     return u.x/v.x;
   }
 }
+#ifndef __ieee754_acos
 strong_alias (__ieee754_acos, __acos_finite)
+#endif
diff --git a/sysdeps/ieee754/dbl-64/e_atan2.c b/sysdeps/ieee754/dbl-64/e_atan2.c
index 264791e..9caaccc 100644
--- a/sysdeps/ieee754/dbl-64/e_atan2.c
+++ b/sysdeps/ieee754/dbl-64/e_atan2.c
@@ -51,7 +51,11 @@
 /* round to nearest mode of IEEE 754 standard.                          */
 /************************************************************************/
 static double atan2Mp(double ,double ,const int[]);
-static double signArctan2(double ,double);
+  /* Fix the sign and return after stage 1 or stage 2 */
+static double signArctan2(double y,double z)
+{
+  return __copysign(z, y);
+}
 static double normalized(double ,double,double ,double);
 void __mpatan2(mp_no *,mp_no *,mp_no *,int);
 
@@ -375,7 +379,9 @@ double __ieee754_atan2(double y,double x) {
     }
   }
 }
+#ifndef __ieee754_atan2
 strong_alias (__ieee754_atan2, __atan2_finite)
+#endif
 
   /* Treat the Denormalized case */
 static double  normalized(double ax,double ay,double y, double z)
@@ -387,11 +393,6 @@ static double  normalized(double ax,double ay,double y, double z)
   __sub(&mpz,&mperr,&mpz2,p);  __mp_dbl(&mpz2,&z,p);
   return signArctan2(y,z);
 }
-  /* Fix the sign and return after stage 1 or stage 2 */
-static double signArctan2(double y,double z)
-{
-  return ((y<ZERO) ? -z : z);
-}
   /* Stage 3: Perform a multi-Precision computation */
 static double  atan2Mp(double x,double y,const int pr[])
 {
diff --git a/sysdeps/ieee754/dbl-64/e_exp.c b/sysdeps/ieee754/dbl-64/e_exp.c
index f4b34a6..48bbb05 100644
--- a/sysdeps/ieee754/dbl-64/e_exp.c
+++ b/sysdeps/ieee754/dbl-64/e_exp.c
@@ -145,7 +145,9 @@ double __ieee754_exp(double x) {
     else return __slowexp(x);
   }
 }
+#ifndef __ieee754_exp
 strong_alias (__ieee754_exp, __exp_finite)
+#endif
 
 /************************************************************************/
 /* Compute e^(x+xx)(Double-Length number) .The routine also receive     */
diff --git a/sysdeps/ieee754/dbl-64/e_log.c b/sysdeps/ieee754/dbl-64/e_log.c
index b7df81b..7a0a26f 100644
--- a/sysdeps/ieee754/dbl-64/e_log.c
+++ b/sysdeps/ieee754/dbl-64/e_log.c
@@ -207,4 +207,6 @@ double __ieee754_log(double x) {
   }
   return y1;
 }
+#ifndef __ieee754_log
 strong_alias (__ieee754_log, __log_finite)
+#endif
diff --git a/sysdeps/ieee754/dbl-64/e_pow.c b/sysdeps/ieee754/dbl-64/e_pow.c
index 0c7abb6..94b1ab8 100644
--- a/sysdeps/ieee754/dbl-64/e_pow.c
+++ b/sysdeps/ieee754/dbl-64/e_pow.c
@@ -153,7 +153,9 @@ double __ieee754_pow(double x, double y) {
   if (y<0) return (x<1.0)?INF.x:0;
   return 0;     /* unreachable, to make the compiler happy */
 }
+#ifndef __ieee754_pow
 strong_alias (__ieee754_pow, __pow_finite)
+#endif
 
 /**************************************************************************/
 /* Computing x^y using more accurate but more slow log routine            */
diff --git a/sysdeps/ieee754/dbl-64/mpa.c b/sysdeps/ieee754/dbl-64/mpa.c
index 68647ba..ad5a639 100644
--- a/sysdeps/ieee754/dbl-64/mpa.c
+++ b/sysdeps/ieee754/dbl-64/mpa.c
@@ -1,8 +1,7 @@
-
 /*
  * IBM Accurate Mathematical Library
  * written by International Business Machines Corp.
- * Copyright (C) 2001 Free Software Foundation
+ * Copyright (C) 2001, 2011 Free Software Foundation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -64,7 +63,7 @@ static int mcr(const mp_no *x, const mp_no *y, int p) {
 
 
 /* acr() compares the absolute values of two multiple precision numbers */
-int __acr(const mp_no *x, const mp_no *y, int p) {
+static int __acr(const mp_no *x, const mp_no *y, int p) {
   int i;
 
   if      (X[0] == ZERO) {
@@ -82,8 +81,9 @@ int __acr(const mp_no *x, const mp_no *y, int p) {
 }
 
 
+#if 0
 /* cr90 compares the values of two multiple precision numbers           */
-int  __cr(const mp_no *x, const mp_no *y, int p) {
+static int  __cr(const mp_no *x, const mp_no *y, int p) {
   int i;
 
   if      (X[0] > Y[0])  i= 1;
@@ -93,26 +93,26 @@ int  __cr(const mp_no *x, const mp_no *y, int p) {
 
   return i;
 }
+#endif
 
 
+#ifndef NO___CPY
 /* Copy a multiple precision number. Set *y=*x. x=y is permissible.      */
 void __cpy(const mp_no *x, mp_no *y, int p) {
-  int i;
-
   EY = EX;
-  for (i=0; i <= p; i++)    Y[i] = X[i];
-
-  return;
+  for (int i=0; i <= p; i++)    Y[i] = X[i];
 }
+#endif
 
 
+#if 0
 /* Copy a multiple precision number x of precision m into a */
 /* multiple precision number y of precision n. In case n>m, */
 /* the digits of y beyond the m'th are set to zero. In case */
 /* n<m, the digits of x beyond the n'th are ignored.        */
 /* x=y is permissible.                                      */
 
-void __cpymn(const mp_no *x, int m, mp_no *y, int n) {
+static void __cpymn(const mp_no *x, int m, mp_no *y, int n) {
 
   int i,k;
 
@@ -122,7 +122,10 @@ void __cpymn(const mp_no *x, int m, mp_no *y, int n) {
 
   return;
 }
+#endif
+
 
+#ifndef NO___MP_DBL
 /* Convert a multiple precision number *x into a double precision */
 /* number *y, normalized case  (|x| >= 2**(-1022))) */
 static void norm(const mp_no *x, double *y, int p)
@@ -141,7 +144,7 @@ static void norm(const mp_no *x, double *y, int p)
   }
   else {
     for (a=ONE, z[1]=X[1]; z[1] < TWO23; )
-        {a *= TWO;   z[1] *= TWO; }
+	{a *= TWO;   z[1] *= TWO; }
 
     for (i=2; i<5; i++) {
       z[i] = X[i]*a;
@@ -157,10 +160,10 @@ static void norm(const mp_no *x, double *y, int p)
 
     if (v == TWO18) {
       if (z[4] == ZERO) {
-        for (i=5; i <= p; i++) {
-          if (X[i] == ZERO)   continue;
-          else                {z[3] += ONE;   break; }
-        }
+	for (i=5; i <= p; i++) {
+	  if (X[i] == ZERO)   continue;
+	  else                {z[3] += ONE;   break; }
+	}
       }
       else              z[3] += ONE;
     }
@@ -242,6 +245,7 @@ void __mp_dbl(const mp_no *x, double *y, int p) {
   else if (EX==-42 && X[1]>=TWO10)  norm(x,y,p);
   else                              denorm(x,y,p);
 }
+#endif
 
 
 /* dbl_mp() converts a double precision number x into a multiple precision  */
@@ -336,11 +340,11 @@ static void sub_magnitudes(const mp_no *x, const mp_no *y, mp_no *z, int p) {
     else {
       i=p;   j=p+1-j;   k=p;
       if (Y[j] > ZERO) {
-        Z[k+1] = RADIX - Y[j--];
-        Z[k]   = MONE; }
+	Z[k+1] = RADIX - Y[j--];
+	Z[k]   = MONE; }
       else {
-        Z[k+1] = ZERO;
-        Z[k]   = ZERO;   j--;}
+	Z[k+1] = ZERO;
+	Z[k]   = ZERO;   j--;}
     }
   }
 
@@ -431,11 +435,11 @@ void __mul(const mp_no *x, const mp_no *y, mp_no *z, int p) {
   int i, i1, i2, j, k, k2;
   double u;
 
-                      /* Is z=0? */
+		      /* Is z=0? */
   if (X[0]*Y[0]==ZERO)
      { Z[0]=ZERO;  return; }
 
-                       /* Multiply, add and carry */
+		       /* Multiply, add and carry */
   k2 = (p<3) ? p+p : p+3;
   Z[k2]=ZERO;
   for (k=k2; k>1; ) {
@@ -449,7 +453,7 @@ void __mul(const mp_no *x, const mp_no *y, mp_no *z, int p) {
     Z[--k] = u*RADIXI;
   }
 
-                 /* Is there a carry beyond the most significant digit? */
+		 /* Is there a carry beyond the most significant digit? */
   if (Z[1] == ZERO) {
     for (i=1; i<=p; i++)  Z[i]=Z[i+1];
     EZ = EX + EY - 1; }
@@ -466,7 +470,7 @@ void __mul(const mp_no *x, const mp_no *y, mp_no *z, int p) {
 /* 2.001*r**(1-p) for p>3.                                                  */
 /* *x=0 is not permissible. *x is left unchanged.                           */
 
-void __inv(const mp_no *x, mp_no *y, int p) {
+static void __inv(const mp_no *x, mp_no *y, int p) {
   int i;
 #if 0
   int l;
@@ -474,11 +478,11 @@ void __inv(const mp_no *x, mp_no *y, int p) {
   double t;
   mp_no z,w;
   static const int np1[] = {0,0,0,0,1,2,2,2,2,3,3,3,3,3,3,3,3,3,
-                            4,4,4,4,4,4,4,4,4,4,4,4,4,4,4};
+			    4,4,4,4,4,4,4,4,4,4,4,4,4,4,4};
   const mp_no mptwo = {1,{1.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
-                         0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
-                         0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
-                         0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0}};
+			 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
+			 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
+			 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0}};
 
   __cpy(x,&z,p);  z.e=0;  __mp_dbl(&z,&t,p);
   t=ONE/t;   __dbl_mp(t,y,p);    EY -= EX;
diff --git a/sysdeps/ieee754/dbl-64/mpa.h b/sysdeps/ieee754/dbl-64/mpa.h
index 4aec48e..3ca0ca5 100644
--- a/sysdeps/ieee754/dbl-64/mpa.h
+++ b/sysdeps/ieee754/dbl-64/mpa.h
@@ -1,8 +1,7 @@
-
 /*
  * IBM Accurate Mathematical Library
  * Written by International Business Machines Corp.
- * Copyright (C) 2001 Free Software Foundation, Inc.
+ * Copyright (C) 2001, 2011 Free Software Foundation, Inc.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -45,14 +44,14 @@ typedef struct {/* This structure holds the details of a multi-precision     */
   int e;        /* floating point number, x: d[0] holds its sign (-1,0 or 1) */
   double d[40]; /* e holds its exponent (...,-2,-1,0,1,2,...) and            */
 } mp_no;        /* d[1]...d[p] hold its mantissa digits. The value of x is,  */
-                /* x = d[1]*r**(e-1) + d[2]*r**(e-2) + ... + d[p]*r**(e-p).  */
-                /* Here   r = 2**24,   0 <= d[i] < r  and  1 <= p <= 32.     */
-                /* p is a global variable. A multi-precision number is       */
-                /* always normalized. Namely, d[1] > 0. An exception is      */
-                /* a zero which is characterized by d[0] = 0. The terms      */
-                /* d[p+1], d[p+2], ... of a none zero number have no         */
-                /* significance and so are the terms e, d[1],d[2],...        */
-                /* of a zero.                                                */
+		/* x = d[1]*r**(e-1) + d[2]*r**(e-2) + ... + d[p]*r**(e-p).  */
+		/* Here   r = 2**24,   0 <= d[i] < r  and  1 <= p <= 32.     */
+		/* p is a global variable. A multi-precision number is       */
+		/* always normalized. Namely, d[1] > 0. An exception is      */
+		/* a zero which is characterized by d[0] = 0. The terms      */
+		/* d[p+1], d[p+2], ... of a none zero number have no         */
+		/* significance and so are the terms e, d[1],d[2],...        */
+		/* of a zero.                                                */
 
 typedef union { int i[2]; double d; } number;
 
@@ -65,16 +64,16 @@ typedef union { int i[2]; double d; } number;
 
 #define ABS(x)   ((x) <  0  ? -(x) : (x))
 
-int __acr(const mp_no *, const mp_no *, int);
-int  __cr(const mp_no *, const mp_no *, int);
+// int __acr(const mp_no *, const mp_no *, int);
+// int  __cr(const mp_no *, const mp_no *, int);
 void __cpy(const mp_no *, mp_no *, int);
-void __cpymn(const mp_no *, int, mp_no *, int);
+// void __cpymn(const mp_no *, int, mp_no *, int);
 void __mp_dbl(const mp_no *, double *, int);
 void __dbl_mp(double, mp_no *, int);
 void __add(const mp_no *, const mp_no *, mp_no *, int);
 void __sub(const mp_no *, const mp_no *, mp_no *, int);
 void __mul(const mp_no *, const mp_no *, mp_no *, int);
-void __inv(const mp_no *, mp_no *, int);
+// void __inv(const mp_no *, mp_no *, int);
 void __dvd(const mp_no *, const mp_no *, mp_no *, int);
 
 extern void __mpatan (mp_no *, mp_no *, int);
diff --git a/sysdeps/ieee754/dbl-64/mpsqrt.c b/sysdeps/ieee754/dbl-64/mpsqrt.c
index 9945de3..bea6232 100644
--- a/sysdeps/ieee754/dbl-64/mpsqrt.c
+++ b/sysdeps/ieee754/dbl-64/mpsqrt.c
@@ -1,8 +1,7 @@
-
 /*
  * IBM Accurate Mathematical Library
  * written by International Business Machines Corp.
- * Copyright (C) 2001 Free Software Foundation
+ * Copyright (C) 2001, 2011 Free Software Foundation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -41,7 +40,7 @@
 /* p as integer. Routine computes sqrt(*x) and stores result in *y          */
 /****************************************************************************/
 
-double fastiroot(double);
+static double fastiroot(double);
 
 void __mpsqrt(mp_no *x, mp_no *y, int p) {
 #include "mpsqrt.h"
@@ -50,11 +49,11 @@ void __mpsqrt(mp_no *x, mp_no *y, int p) {
   double dx,dy;
   mp_no
     mphalf   = {0,{0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
-                   0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
-                   0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0}},
+		   0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
+		   0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0}},
     mp3halfs = {0,{0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
-                   0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
-                   0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0}};
+		   0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
+		   0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0}};
   mp_no mpxn,mpz,mpu,mpt1,mpt2;
 
   /* Prepare multi-precision 1/2 and 3/2 */
@@ -82,7 +81,7 @@ void __mpsqrt(mp_no *x, mp_no *y, int p) {
 /* Compute a double precision approximation for 1/sqrt(x)  */
 /* with the relative error bounded by 2**-51.              */
 /***********************************************************/
-double fastiroot(double x) {
+static double fastiroot(double x) {
   union {int i[2]; double d;} p,q;
   double y,z, t;
   int n;
diff --git a/sysdeps/ieee754/dbl-64/s_atan.c b/sysdeps/ieee754/dbl-64/s_atan.c
index c7f5f3e..65369ff 100644
--- a/sysdeps/ieee754/dbl-64/s_atan.c
+++ b/sysdeps/ieee754/dbl-64/s_atan.c
@@ -46,7 +46,13 @@
 
 void __mpatan(mp_no *,mp_no *,int);          /* see definition in mpatan.c */
 static double atanMp(double,const int[]);
-double __signArctan(double,double);
+
+  /* Fix the sign of y and return */
+static double  __signArctan(double x,double y){
+  return __copysign(y, x);
+}
+
+
 /* An ultimate atan() routine. Given an IEEE double machine number x,    */
 /* routine computes the correctly rounded (to nearest) value of atan(x). */
 double atan(double x) {
@@ -203,14 +209,6 @@ double atan(double x) {
 
 }
 
-
-  /* Fix the sign of y and return */
-double  __signArctan(double x,double y){
-
-    if (x<ZERO) return -y;
-    else        return  y;
-}
-
  /* Final stages. Compute atan(x) by multiple precision arithmetic */
 static double atanMp(double x,const int pr[]){
   mp_no mpx,mpy,mpy2,mperr,mpt1,mpy1;
diff --git a/sysdeps/ieee754/dbl-64/s_sin.c b/sysdeps/ieee754/dbl-64/s_sin.c
index b40776f..02d428c 100644
--- a/sysdeps/ieee754/dbl-64/s_sin.c
+++ b/sysdeps/ieee754/dbl-64/s_sin.c
@@ -1,7 +1,7 @@
 /*
  * IBM Accurate Mathematical Library
  * written by International Business Machines Corp.
- * Copyright (C) 2001, 2009 Free Software Foundation
+ * Copyright (C) 2001, 2009, 2011 Free Software Foundation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -53,15 +53,20 @@
 #include "mydefs.h"
 #include "usncs.h"
 #include "MathLib.h"
-#include "sincos.tbl"
 #include "math_private.h"
 
+extern const union
+{
+  int4 i[880];
+  double x[440];
+} __sincostab attribute_hidden;
+
 static const double
-          sn3 = -1.66666666666664880952546298448555E-01,
-          sn5 =  8.33333214285722277379541354343671E-03,
-          cs2 =  4.99999999999999999999950396842453E-01,
-          cs4 = -4.16666666666664434524222570944589E-02,
-          cs6 =  1.38888874007937613028114285595617E-03;
+	  sn3 = -1.66666666666664880952546298448555E-01,
+	  sn5 =  8.33333214285722277379541354343671E-03,
+	  cs2 =  4.99999999999999999999950396842453E-01,
+	  cs4 = -4.16666666666664434524222570944589E-02,
+	  cs6 =  1.38888874007937613028114285595617E-03;
 
 void __dubsin(double x, double dx, double w[]);
 void __docos(double x, double dx, double w[]);
@@ -120,10 +125,10 @@ double __sin(double x){
 	  s = y + y*xx*(sn3 +xx*sn5);
 	  c = xx*(cs2 +xx*(cs4 + xx*cs6));
 	  k=u.i[LOW_HALF]<<2;
-	  sn=(m>0)?sincos.x[k]:-sincos.x[k];
-	  ssn=(m>0)?sincos.x[k+1]:-sincos.x[k+1];
-	  cs=sincos.x[k+2];
-	  ccs=sincos.x[k+3];
+	  sn=(m>0)?__sincostab.x[k]:-__sincostab.x[k];
+	  ssn=(m>0)?__sincostab.x[k+1]:-__sincostab.x[k+1];
+	  cs=__sincostab.x[k+2];
+	  ccs=__sincostab.x[k+3];
 	  cor=(ssn+s*ccs-sn*c)+cs*s;
 	  res=sn+cor;
 	  cor=(sn-res)+cor;
@@ -146,10 +151,10 @@ double __sin(double x){
 	  s = y + y*xx*(sn3 +xx*sn5);
 	  c = xx*(cs2 +xx*(cs4 + xx*cs6));
 	  k=u.i[LOW_HALF]<<2;
-	  sn=sincos.x[k];
-	  ssn=sincos.x[k+1];
-	  cs=sincos.x[k+2];
-	  ccs=sincos.x[k+3];
+	  sn=__sincostab.x[k];
+	  ssn=__sincostab.x[k+1];
+	  cs=__sincostab.x[k+2];
+	  ccs=__sincostab.x[k+3];
 	  cor=(ccs-s*ssn-cs*c)-sn*s;
 	  res=cs+cor;
 	  cor=(cs-res)+cor;
@@ -174,7 +179,7 @@ double __sin(double x){
 	    xx = a*a;
 	    if (n) {a=-a;da=-da;}
 	    if (xx < 0.01588) {
-                      /*Taylor series */
+		      /*Taylor series */
 	      t = (((((s5.x*xx + s4.x)*xx + s3.x)*xx + s2.x)*xx + s1.x)*a - 0.5*da)*xx+da;
 	      res = a+t;
 	      cor = (a-res)+t;
@@ -192,10 +197,10 @@ double __sin(double x){
 	      s = y + (db+y*xx*(sn3 +xx*sn5));
 	      c = y*db+xx*(cs2 +xx*(cs4 + xx*cs6));
 	      k=u.i[LOW_HALF]<<2;
-	      sn=sincos.x[k];
-	      ssn=sincos.x[k+1];
-	      cs=sincos.x[k+2];
-	      ccs=sincos.x[k+3];
+	      sn=__sincostab.x[k];
+	      ssn=__sincostab.x[k+1];
+	      cs=__sincostab.x[k+2];
+	      ccs=__sincostab.x[k+3];
 	      cor=(ssn+s*ccs-sn*c)+cs*s;
 	      res=sn+cor;
 	      cor=(sn-res)+cor;
@@ -212,10 +217,10 @@ double __sin(double x){
 	    y=a-(u.x-big.x)+da;
 	    xx=y*y;
 	    k=u.i[LOW_HALF]<<2;
-	    sn=sincos.x[k];
-	    ssn=sincos.x[k+1];
-	    cs=sincos.x[k+2];
-	    ccs=sincos.x[k+3];
+	    sn=__sincostab.x[k];
+	    ssn=__sincostab.x[k+1];
+	    cs=__sincostab.x[k+2];
+	    ccs=__sincostab.x[k+3];
 	    s = y + y*xx*(sn3 +xx*sn5);
 	    c = xx*(cs2 +xx*(cs4 + xx*cs6));
 	    cor=(ccs-s*ssn-cs*c)-sn*s;
@@ -253,7 +258,7 @@ double __sin(double x){
 	    xx = a*a;
 	    if (n) {a=-a;da=-da;}
 	    if (xx < 0.01588) {
-              /* Taylor series */
+	      /* Taylor series */
 	      t = (((((s5.x*xx + s4.x)*xx + s3.x)*xx + s2.x)*xx + s1.x)*a - 0.5*da)*xx+da;
 	      res = a+t;
 	      cor = (a-res)+t;
@@ -269,10 +274,10 @@ double __sin(double x){
 	      s = y + (db+y*xx*(sn3 +xx*sn5));
 	      c = y*db+xx*(cs2 +xx*(cs4 + xx*cs6));
 	      k=u.i[LOW_HALF]<<2;
-	      sn=sincos.x[k];
-	      ssn=sincos.x[k+1];
-	      cs=sincos.x[k+2];
-	      ccs=sincos.x[k+3];
+	      sn=__sincostab.x[k];
+	      ssn=__sincostab.x[k+1];
+	      cs=__sincostab.x[k+2];
+	      ccs=__sincostab.x[k+3];
 	      cor=(ssn+s*ccs-sn*c)+cs*s;
 	      res=sn+cor;
 	      cor=(sn-res)+cor;
@@ -289,10 +294,10 @@ double __sin(double x){
 	    y=a-(u.x-big.x)+da;
 	    xx=y*y;
 	    k=u.i[LOW_HALF]<<2;
-	    sn=sincos.x[k];
-	    ssn=sincos.x[k+1];
-	    cs=sincos.x[k+2];
-	    ccs=sincos.x[k+3];
+	    sn=__sincostab.x[k];
+	    ssn=__sincostab.x[k+1];
+	    cs=__sincostab.x[k+2];
+	    ccs=__sincostab.x[k+3];
 	    s = y + y*xx*(sn3 +xx*sn5);
 	    c = xx*(cs2 +xx*(cs4 + xx*cs6));
 	    cor=(ccs-s*ssn-cs*c)-sn*s;
@@ -364,10 +369,10 @@ double __cos(double x)
     s = y + y*xx*(sn3 +xx*sn5);
     c = xx*(cs2 +xx*(cs4 + xx*cs6));
     k=u.i[LOW_HALF]<<2;
-    sn=sincos.x[k];
-    ssn=sincos.x[k+1];
-    cs=sincos.x[k+2];
-    ccs=sincos.x[k+3];
+    sn=__sincostab.x[k];
+    ssn=__sincostab.x[k+1];
+    cs=__sincostab.x[k+2];
+    ccs=__sincostab.x[k+3];
     cor=(ccs-s*ssn-cs*c)-sn*s;
     res=cs+cor;
     cor=(cs-res)+cor;
@@ -396,10 +401,10 @@ double __cos(double x)
       s = y + (db+y*xx*(sn3 +xx*sn5));
       c = y*db+xx*(cs2 +xx*(cs4 + xx*cs6));
       k=u.i[LOW_HALF]<<2;
-      sn=sincos.x[k];
-      ssn=sincos.x[k+1];
-      cs=sincos.x[k+2];
-      ccs=sincos.x[k+3];
+      sn=__sincostab.x[k];
+      ssn=__sincostab.x[k+1];
+      cs=__sincostab.x[k+2];
+      ccs=__sincostab.x[k+3];
       cor=(ssn+s*ccs-sn*c)+cs*s;
       res=sn+cor;
       cor=(sn-res)+cor;
@@ -442,10 +447,10 @@ double __cos(double x)
 	s = y + (db+y*xx*(sn3 +xx*sn5));
 	c = y*db+xx*(cs2 +xx*(cs4 + xx*cs6));
 	k=u.i[LOW_HALF]<<2;
-	sn=sincos.x[k];
-	ssn=sincos.x[k+1];
-	cs=sincos.x[k+2];
-	ccs=sincos.x[k+3];
+	sn=__sincostab.x[k];
+	ssn=__sincostab.x[k+1];
+	cs=__sincostab.x[k+2];
+	ccs=__sincostab.x[k+3];
 	cor=(ssn+s*ccs-sn*c)+cs*s;
 	res=sn+cor;
 	cor=(sn-res)+cor;
@@ -461,10 +466,10 @@ double __cos(double x)
       y=a-(u.x-big.x)+da;
       xx=y*y;
       k=u.i[LOW_HALF]<<2;
-      sn=sincos.x[k];
-      ssn=sincos.x[k+1];
-      cs=sincos.x[k+2];
-      ccs=sincos.x[k+3];
+      sn=__sincostab.x[k];
+      ssn=__sincostab.x[k+1];
+      cs=__sincostab.x[k+2];
+      ccs=__sincostab.x[k+3];
       s = y + y*xx*(sn3 +xx*sn5);
       c = xx*(cs2 +xx*(cs4 + xx*cs6));
       cor=(ccs-s*ssn-cs*c)-sn*s;
@@ -473,7 +478,7 @@ double __cos(double x)
       cor = (cor>0)? 1.025*cor+eps : 1.025*cor-eps;
       return (res==res+cor)? ((n)?-res:res) : csloww2(a,da,x,n);
 
-           break;
+	   break;
 
     }
 
@@ -517,10 +522,10 @@ double __cos(double x)
 	s = y + (db+y*xx*(sn3 +xx*sn5));
 	c = y*db+xx*(cs2 +xx*(cs4 + xx*cs6));
 	k=u.i[LOW_HALF]<<2;
-	sn=sincos.x[k];
-	ssn=sincos.x[k+1];
-	cs=sincos.x[k+2];
-	ccs=sincos.x[k+3];
+	sn=__sincostab.x[k];
+	ssn=__sincostab.x[k+1];
+	cs=__sincostab.x[k+2];
+	ccs=__sincostab.x[k+3];
 	cor=(ssn+s*ccs-sn*c)+cs*s;
 	res=sn+cor;
 	cor=(sn-res)+cor;
@@ -536,10 +541,10 @@ double __cos(double x)
       y=a-(u.x-big.x)+da;
       xx=y*y;
       k=u.i[LOW_HALF]<<2;
-      sn=sincos.x[k];
-      ssn=sincos.x[k+1];
-      cs=sincos.x[k+2];
-      ccs=sincos.x[k+3];
+      sn=__sincostab.x[k];
+      ssn=__sincostab.x[k+1];
+      cs=__sincostab.x[k+2];
+      ccs=__sincostab.x[k+3];
       s = y + y*xx*(sn3 +xx*sn5);
       c = xx*(cs2 +xx*(cs4 + xx*cs6));
       cor=(ccs-s*ssn-cs*c)-sn*s;
@@ -611,7 +616,7 @@ static const double th2_36 = 206158430208.0;   /*    1.5*2**37   */
  }
 }
 /*******************************************************************************/
-/* Routine compute sin(x) for   0.25<|x|< 0.855469 by  sincos.tbl   and Taylor */
+/* Routine compute sin(x) for   0.25<|x|< 0.855469 by  __sincostab.tbl   and Taylor */
 /* and if result still doesn't accurate enough by mpsin   or dubsin            */
 /*******************************************************************************/
 
@@ -627,10 +632,10 @@ static double slow1(double x) {
   s = y*xx*(sn3 +xx*sn5);
   c = xx*(cs2 +xx*(cs4 + xx*cs6));
   k=u.i[LOW_HALF]<<2;
-  sn=sincos.x[k];          /* Data          */
-  ssn=sincos.x[k+1];       /*  from         */
-  cs=sincos.x[k+2];        /*   tables      */
-  ccs=sincos.x[k+3];       /*    sincos.tbl */
+  sn=__sincostab.x[k];          /* Data          */
+  ssn=__sincostab.x[k+1];       /*  from         */
+  cs=__sincostab.x[k+2];        /*   tables      */
+  ccs=__sincostab.x[k+3];       /*    __sincostab.tbl */
   y1 = (y+t22)-t22;
   y2 = y - y1;
   c1 = (cs+t22)-t22;
@@ -648,7 +653,7 @@ static double slow1(double x) {
   }
 }
 /**************************************************************************/
-/*  Routine compute sin(x) for   0.855469  <|x|<2.426265  by  sincos.tbl  */
+/*  Routine compute sin(x) for   0.855469  <|x|<2.426265  by  __sincostab.tbl  */
 /* and if result still doesn't accurate enough by mpsin   or dubsin       */
 /**************************************************************************/
 static double slow2(double x) {
@@ -672,10 +677,10 @@ static double slow2(double x) {
   s = y*xx*(sn3 +xx*sn5);
   c = y*del+xx*(cs2 +xx*(cs4 + xx*cs6));
   k=u.i[LOW_HALF]<<2;
-  sn=sincos.x[k];
-  ssn=sincos.x[k+1];
-  cs=sincos.x[k+2];
-  ccs=sincos.x[k+3];
+  sn=__sincostab.x[k];
+  ssn=__sincostab.x[k+1];
+  cs=__sincostab.x[k+2];
+  ccs=__sincostab.x[k+3];
   y1 = (y+t22)-t22;
   y2 = (y - y1)+del;
   e1 = (sn+t22)-t22;
@@ -763,10 +768,10 @@ static double sloww1(double x, double dx, double orig) {
   s = y*xx*(sn3 +xx*sn5);
   c = xx*(cs2 +xx*(cs4 + xx*cs6));
   k=u.i[LOW_HALF]<<2;
-  sn=sincos.x[k];
-  ssn=sincos.x[k+1];
-  cs=sincos.x[k+2];
-  ccs=sincos.x[k+3];
+  sn=__sincostab.x[k];
+  ssn=__sincostab.x[k+1];
+  cs=__sincostab.x[k+2];
+  ccs=__sincostab.x[k+3];
   y1 = (y+t22)-t22;
   y2 = (y - y1)+dx;
   c1 = (cs+t22)-t22;
@@ -805,10 +810,10 @@ static double sloww2(double x, double dx, double orig, int n) {
   s = y*xx*(sn3 +xx*sn5);
   c = y*dx+xx*(cs2 +xx*(cs4 + xx*cs6));
   k=u.i[LOW_HALF]<<2;
-  sn=sincos.x[k];
-  ssn=sincos.x[k+1];
-  cs=sincos.x[k+2];
-  ccs=sincos.x[k+3];
+  sn=__sincostab.x[k];
+  ssn=__sincostab.x[k+1];
+  cs=__sincostab.x[k+2];
+  ccs=__sincostab.x[k+3];
 
   y1 = (y+t22)-t22;
   y2 = (y - y1)+dx;
@@ -882,10 +887,10 @@ mynumber u;
  s = y*xx*(sn3 +xx*sn5);
  c = xx*(cs2 +xx*(cs4 + xx*cs6));
  k=u.i[LOW_HALF]<<2;
- sn=sincos.x[k];
- ssn=sincos.x[k+1];
- cs=sincos.x[k+2];
- ccs=sincos.x[k+3];
+ sn=__sincostab.x[k];
+ ssn=__sincostab.x[k+1];
+ cs=__sincostab.x[k+2];
+ ccs=__sincostab.x[k+3];
  y1 = (y+t22)-t22;
  y2 = (y - y1)+dx;
  c1 = (cs+t22)-t22;
@@ -925,10 +930,10 @@ mynumber u;
  s = y*xx*(sn3 +xx*sn5);
  c = y*dx+xx*(cs2 +xx*(cs4 + xx*cs6));
  k=u.i[LOW_HALF]<<2;
- sn=sincos.x[k];
- ssn=sincos.x[k+1];
- cs=sincos.x[k+2];
- ccs=sincos.x[k+3];
+ sn=__sincostab.x[k];
+ ssn=__sincostab.x[k+1];
+ cs=__sincostab.x[k+2];
+ ccs=__sincostab.x[k+3];
 
  y1 = (y+t22)-t22;
  y2 = (y - y1)+dx;
@@ -966,10 +971,10 @@ static double cslow2(double x) {
   s = y*xx*(sn3 +xx*sn5);
   c = xx*(cs2 +xx*(cs4 + xx*cs6));
   k=u.i[LOW_HALF]<<2;
-  sn=sincos.x[k];
-  ssn=sincos.x[k+1];
-  cs=sincos.x[k+2];
-  ccs=sincos.x[k+3];
+  sn=__sincostab.x[k];
+  ssn=__sincostab.x[k+1];
+  cs=__sincostab.x[k+2];
+  ccs=__sincostab.x[k+3];
   y1 = (y+t22)-t22;
   y2 = y - y1;
   e1 = (sn+t22)-t22;
@@ -1059,10 +1064,10 @@ static double csloww1(double x, double dx, double orig) {
   s = y*xx*(sn3 +xx*sn5);
   c = xx*(cs2 +xx*(cs4 + xx*cs6));
   k=u.i[LOW_HALF]<<2;
-  sn=sincos.x[k];
-  ssn=sincos.x[k+1];
-  cs=sincos.x[k+2];
-  ccs=sincos.x[k+3];
+  sn=__sincostab.x[k];
+  ssn=__sincostab.x[k+1];
+  cs=__sincostab.x[k+2];
+  ccs=__sincostab.x[k+3];
   y1 = (y+t22)-t22;
   y2 = (y - y1)+dx;
   c1 = (cs+t22)-t22;
@@ -1103,10 +1108,10 @@ static double csloww2(double x, double dx, double orig, int n) {
   s = y*xx*(sn3 +xx*sn5);
   c = y*dx+xx*(cs2 +xx*(cs4 + xx*cs6));
   k=u.i[LOW_HALF]<<2;
-  sn=sincos.x[k];
-  ssn=sincos.x[k+1];
-  cs=sincos.x[k+2];
-  ccs=sincos.x[k+3];
+  sn=__sincostab.x[k];
+  ssn=__sincostab.x[k+1];
+  cs=__sincostab.x[k+2];
+  ccs=__sincostab.x[k+3];
 
   y1 = (y+t22)-t22;
   y2 = (y - y1)+dx;
@@ -1127,12 +1132,17 @@ static double csloww2(double x, double dx, double orig, int n) {
   }
 }
 
+#ifndef __cos
 weak_alias (__cos, cos)
+# ifdef NO_LONG_DOUBLE
+strong_alias (__cos, __cosl)
+weak_alias (__cos, cosl)
+# endif
+#endif
+#ifndef __sin
 weak_alias (__sin, sin)
-
-#ifdef NO_LONG_DOUBLE
+# ifdef NO_LONG_DOUBLE
 strong_alias (__sin, __sinl)
 weak_alias (__sin, sinl)
-strong_alias (__cos, __cosl)
-weak_alias (__cos, cosl)
+# endif
 #endif
diff --git a/sysdeps/ieee754/dbl-64/sincos.tbl b/sysdeps/ieee754/dbl-64/sincostab.c
similarity index 99%
rename from sysdeps/ieee754/dbl-64/sincos.tbl
rename to sysdeps/ieee754/dbl-64/sincostab.c
index 9343f24..49fccac 100644
--- a/sysdeps/ieee754/dbl-64/sincos.tbl
+++ b/sysdeps/ieee754/dbl-64/sincostab.c
@@ -1,7 +1,7 @@
 /*
  * IBM Accurate Mathematical Library
  * Written by International Business Machines Corp.
- * Copyright (C) 2001, 2007 Free Software Foundation, Inc.
+ * Copyright (C) 2001, 2007, 2011 Free Software Foundation, Inc.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -18,13 +18,16 @@
  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  */
 
+#include <mydefs.h>
+#include <endian.h>
+
 /****************************************************************/
 /* TABLES FOR THE usin() and ucos()   FUNCTION                  */
 /****************************************************************/
 
 
 #ifdef BIG_ENDI
-static const union {int4 i[880]; double x[440];}sincos = { .i = {
+const union {int4 i[880]; double x[440];}__sincostab = { .i = {
 /**/                   0x00000000, 0x00000000,
 /**/                   0x00000000, 0x00000000,
 /**/                   0x3FF00000, 0x00000000,
@@ -467,7 +470,7 @@ static const union {int4 i[880]; double x[440];}sincos = { .i = {
 /**/                   0x3C747A10, 0x8073C259 } };
 #else
 #ifdef LITTLE_ENDI
-static const union {int4 i[880]; double x[440];} sincos = { .i = {
+const union {int4 i[880]; double x[440];} __sincostab = { .i = {
 /**/                   0x00000000, 0x00000000,
 /**/                   0x00000000, 0x00000000,
 /**/                   0x00000000, 0x3FF00000,
diff --git a/sysdeps/x86_64/fpu/multiarch/Makefile b/sysdeps/x86_64/fpu/multiarch/Makefile
index bd07e98..70cb740 100644
--- a/sysdeps/x86_64/fpu/multiarch/Makefile
+++ b/sysdeps/x86_64/fpu/multiarch/Makefile
@@ -1,4 +1,36 @@
 ifeq ($(subdir),math)
 libm-sysdep_routines += s_floor-c s_ceil-c s_floorf-c s_ceilf-c \
 			s_rint-c s_rintf-c s_nearbyint-c s_nearbyintf-c
+
+ifeq ($(have-mfma4),yes)
+libm-sysdep_routines += e_exp-fma4 e_log-fma4 e_pow-fma4 s_atan-fma4 \
+			e_asin-fma4 e_atan2-fma4 s_sin-fma4 s_tan-fma4 \
+			mplog-fma4 mpa-fma4 slowexp-fma4 slowpow-fma4 \
+			sincos32-fma4 doasin-fma4 dosincos-fma4 \
+			brandred-fma4 halfulp-fma4 mpexp-fma4 \
+			mpatan2-fma4 mpatan-fma4 mpsqrt-fma4 mptan-fma4
+
+CFLAGS-brandred-fma4.c = -mfma4
+CFLAGS-doasin-fma4.c = -mfma4
+CFLAGS-dosincos-fma4.c = -mfma4
+CFLAGS-e_asin-fma4.c = -mfma4
+CFLAGS-e_atan2-fma4.c = -mfma4
+CFLAGS-e_exp-fma4.c = -mfma4
+CFLAGS-e_log-fma4.c = -mfma4
+CFLAGS-e_pow-fma4.c = -mfma4
+CFLAGS-halfulp-fma4.c = -mfma4
+CFLAGS-mpa-fma4.c = -mfma4
+CFLAGS-mpatan-fma4.c = -mfma4
+CFLAGS-mpatan2-fma4.c = -mfma4
+CFLAGS-mpexp-fma4.c = -mfma4
+CFLAGS-mplog-fma4.c = -mfma4
+CFLAGS-mpsqrt-fma4.c = -mfma4
+CFLAGS-mptan-fma4.c = -mfma4
+CFLAGS-s_atan-fma4.c = -mfma4
+CFLAGS-sincos32-fma4.c = -mfma4
+CFLAGS-slowexp-fma4.c = -mfma4
+CFLAGS-slowpow-fma4.c = -mfma4
+CLFAGS-s_sin-fma4.c = -mfma4
+CLFAGS-s_tan-fma4.c = -mfma4
+endif
 endif
diff --git a/sysdeps/x86_64/fpu/multiarch/brandred-fma4.c b/sysdeps/x86_64/fpu/multiarch/brandred-fma4.c
new file mode 100644
index 0000000..93fb5a1
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/brandred-fma4.c
@@ -0,0 +1,3 @@
+#define __branred __branred_fma4
+
+#include <sysdeps/ieee754/dbl-64/branred.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/doasin-fma4.c b/sysdeps/x86_64/fpu/multiarch/doasin-fma4.c
new file mode 100644
index 0000000..d7ba67e
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/doasin-fma4.c
@@ -0,0 +1,3 @@
+#define __doasin __doasin_fma4
+
+#include <sysdeps/ieee754/dbl-64/doasin.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/dosincos-fma4.c b/sysdeps/x86_64/fpu/multiarch/dosincos-fma4.c
new file mode 100644
index 0000000..02b420b
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/dosincos-fma4.c
@@ -0,0 +1,5 @@
+#define __docos __docos_fma4
+#define __dubcos __dubcos_fma4
+#define __dubsin __dubsin_fma4
+
+#include <sysdeps/ieee754/dbl-64/dosincos.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/e_asin-fma4.c b/sysdeps/x86_64/fpu/multiarch/e_asin-fma4.c
new file mode 100644
index 0000000..938bc84
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/e_asin-fma4.c
@@ -0,0 +1,10 @@
+#define __ieee754_acos __ieee754_acos_fma4
+#define __ieee754_asin __ieee754_asin_fma4
+#define __cos32 __cos32_fma4
+#define __doasin __doasin_fma4
+#define __docos __docos_fma4
+#define __dubcos __dubcos_fma4
+#define __dubsin __dubsin_fma4
+#define __sin32 __sin32_fma4
+
+#include <sysdeps/ieee754/dbl-64/e_asin.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/e_asin.c b/sysdeps/x86_64/fpu/multiarch/e_asin.c
new file mode 100644
index 0000000..8882cea
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/e_asin.c
@@ -0,0 +1,23 @@
+#ifdef HAVE_FMA4_SUPPORT
+# include <init-arch.h>
+# include <math_private.h>
+
+extern double __ieee754_acos_sse2 (double);
+extern double __ieee754_acos_fma4 (double);
+extern double __ieee754_asin_sse2 (double);
+extern double __ieee754_asin_fma4 (double);
+
+libm_ifunc (__ieee754_acos,
+	    HAS_FMA4 ? __ieee754_acos_fma4 : __ieee754_acos_sse2);
+strong_alias (__ieee754_acos, __acos_finite)
+
+libm_ifunc (__ieee754_asin,
+	    HAS_FMA4 ? __ieee754_asin_fma4 : __ieee754_asin_sse2);
+strong_alias (__ieee754_asin, __asin_finite)
+
+# define __ieee754_acos __ieee754_acos_sse2
+# define __ieee754_asin __ieee754_asin_sse2
+#endif
+
+
+#include <sysdeps/ieee754/dbl-64/e_asin.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/e_atan2-fma4.c b/sysdeps/x86_64/fpu/multiarch/e_atan2-fma4.c
new file mode 100644
index 0000000..84a6f86
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/e_atan2-fma4.c
@@ -0,0 +1,9 @@
+#define __ieee754_atan2 __ieee754_atan2_fma4
+#define __add __add_fma4
+#define __dbl_mp __dbl_mp_fma4
+#define __dvd __dvd_fma4
+#define __mpatan2 __mpatan2_fma4
+#define __mul __mul_fma4
+#define __sub __sub_fma4
+
+#include <sysdeps/ieee754/dbl-64/e_atan2.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/e_atan2.c b/sysdeps/x86_64/fpu/multiarch/e_atan2.c
new file mode 100644
index 0000000..12fc929
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/e_atan2.c
@@ -0,0 +1,16 @@
+#ifdef HAVE_FMA4_SUPPORT
+# include <init-arch.h>
+# include <math_private.h>
+
+extern double __ieee754_atan2_sse2 (double, double);
+extern double __ieee754_atan2_fma4 (double, double);
+
+libm_ifunc (__ieee754_atan2,
+	    HAS_FMA4 ? __ieee754_atan2_fma4 : __ieee754_atan2_sse2);
+strong_alias (__ieee754_atan2, __atan2_finite)
+
+# define __ieee754_atan2 __ieee754_atan2_sse2
+#endif
+
+
+#include <sysdeps/ieee754/dbl-64/e_atan2.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/e_exp-fma4.c b/sysdeps/x86_64/fpu/multiarch/e_exp-fma4.c
new file mode 100644
index 0000000..942dfff
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/e_exp-fma4.c
@@ -0,0 +1,5 @@
+#define __ieee754_exp __ieee754_exp_fma4
+#define __exp1 __exp1_fma4
+#define __slowexp __slowexp_fma4
+
+#include <sysdeps/ieee754/dbl-64/e_exp.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/e_exp.c b/sysdeps/x86_64/fpu/multiarch/e_exp.c
new file mode 100644
index 0000000..fc1096b
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/e_exp.c
@@ -0,0 +1,15 @@
+#ifdef HAVE_FMA4_SUPPORT
+# include <init-arch.h>
+# include <math_private.h>
+
+extern double __ieee754_exp_sse2 (double);
+extern double __ieee754_exp_fma4 (double);
+
+libm_ifunc (__ieee754_exp, HAS_FMA4 ? __ieee754_exp_fma4 : __ieee754_exp_sse2);
+strong_alias (__ieee754_exp, __exp_finite)
+
+# define __ieee754_exp __ieee754_exp_sse2
+#endif
+
+
+#include <sysdeps/ieee754/dbl-64/e_exp.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/e_log-fma4.c b/sysdeps/x86_64/fpu/multiarch/e_log-fma4.c
new file mode 100644
index 0000000..0be66d1
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/e_log-fma4.c
@@ -0,0 +1,7 @@
+#define __ieee754_log __ieee754_log_fma4
+#define __mplog __mplog_fma4
+#define __add __add_fma4
+#define __dbl_mp __dbl_mp_fma4
+#define __sub __sub_fma4
+
+#include <sysdeps/ieee754/dbl-64/e_log.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/e_log.c b/sysdeps/x86_64/fpu/multiarch/e_log.c
new file mode 100644
index 0000000..c542646
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/e_log.c
@@ -0,0 +1,15 @@
+#ifdef HAVE_FMA4_SUPPORT
+# include <init-arch.h>
+# include <math_private.h>
+
+extern double __ieee754_log_sse2 (double);
+extern double __ieee754_log_fma4 (double);
+
+libm_ifunc (__ieee754_log, HAS_FMA4 ? __ieee754_log_fma4 : __ieee754_log_sse2);
+strong_alias (__ieee754_log, __log_finite)
+
+# define __ieee754_log __ieee754_log_sse2
+#endif
+
+
+#include <sysdeps/ieee754/dbl-64/e_log.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/e_pow-fma4.c b/sysdeps/x86_64/fpu/multiarch/e_pow-fma4.c
new file mode 100644
index 0000000..20313be
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/e_pow-fma4.c
@@ -0,0 +1,5 @@
+#define __ieee754_pow __ieee754_pow_fma4
+#define __exp1 __exp1_fma4
+#define __slowpow __slowpow_fma4
+
+#include <sysdeps/ieee754/dbl-64/e_pow.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/e_pow.c b/sysdeps/x86_64/fpu/multiarch/e_pow.c
new file mode 100644
index 0000000..a740b6c
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/e_pow.c
@@ -0,0 +1,15 @@
+#ifdef HAVE_FMA4_SUPPORT
+# include <init-arch.h>
+# include <math_private.h>
+
+extern double __ieee754_pow_sse2 (double, double);
+extern double __ieee754_pow_fma4 (double, double);
+
+libm_ifunc (__ieee754_pow, HAS_FMA4 ? __ieee754_pow_fma4 : __ieee754_pow_sse2);
+strong_alias (__ieee754_pow, __pow_finite)
+
+# define __ieee754_pow __ieee754_pow_sse2
+#endif
+
+
+#include <sysdeps/ieee754/dbl-64/e_pow.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/halfulp-fma4.c b/sysdeps/x86_64/fpu/multiarch/halfulp-fma4.c
new file mode 100644
index 0000000..3fc223e
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/halfulp-fma4.c
@@ -0,0 +1,3 @@
+#define __halfulp __halfulp_fma4
+
+#include <sysdeps/ieee754/dbl-64/halfulp.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/mpa-fma4.c b/sysdeps/x86_64/fpu/multiarch/mpa-fma4.c
new file mode 100644
index 0000000..7b9e2ef
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/mpa-fma4.c
@@ -0,0 +1,10 @@
+#define __add __add_fma4
+#define __mul __mul_fma4
+#define __sub __sub_fma4
+#define __dbl_mp __dbl_mp_fma4
+#define __dvd __dvd_fma4
+
+#define NO___CPY 1
+#define NO___MP_DBL 1
+
+#include <sysdeps/ieee754/dbl-64/mpa.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/mpatan-fma4.c b/sysdeps/x86_64/fpu/multiarch/mpatan-fma4.c
new file mode 100644
index 0000000..942974b
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/mpatan-fma4.c
@@ -0,0 +1,8 @@
+#define __mpatan __mpatan_fma4
+#define __add __add_fma4
+#define __dvd __dvd_fma4
+#define __mpsqrt __mpsqrt_fma4
+#define __mul __mul_fma4
+#define __sub __sub_fma4
+
+#include <sysdeps/ieee754/dbl-64/mpatan.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/mpatan2-fma4.c b/sysdeps/x86_64/fpu/multiarch/mpatan2-fma4.c
new file mode 100644
index 0000000..e7c469e
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/mpatan2-fma4.c
@@ -0,0 +1,8 @@
+#define __mpatan2 __mpatan2_fma4
+#define __add __add_fma4
+#define __dvd __dvd_fma4
+#define __mpatan __mpatan_fma4
+#define __mpsqrt __mpsqrt_fma4
+#define __mul __mul_fma4
+
+#include <sysdeps/ieee754/dbl-64/mpatan2.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/mpexp-fma4.c b/sysdeps/x86_64/fpu/multiarch/mpexp-fma4.c
new file mode 100644
index 0000000..021970c
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/mpexp-fma4.c
@@ -0,0 +1,7 @@
+#define __mpexp __mpexp_fma4
+#define __add __add_fma4
+#define __dbl_mp __dbl_mp_fma4
+#define __dvd __dvd_fma4
+#define __mul __mul_fma4
+
+#include <sysdeps/ieee754/dbl-64/mpexp.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/mplog-fma4.c b/sysdeps/x86_64/fpu/multiarch/mplog-fma4.c
new file mode 100644
index 0000000..9581eaf
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/mplog-fma4.c
@@ -0,0 +1,7 @@
+#define __mplog __mplog_fma4
+#define __add __add_fma4
+#define __mpexp __mpexp_fma4
+#define __mul __mul_fma4
+#define __sub __sub_fma4
+
+#include <sysdeps/ieee754/dbl-64/mplog.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/mpsqrt-fma4.c b/sysdeps/x86_64/fpu/multiarch/mpsqrt-fma4.c
new file mode 100644
index 0000000..43b6493
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/mpsqrt-fma4.c
@@ -0,0 +1,6 @@
+#define __mpsqrt __mpsqrt_fma4
+#define __dbl_mp __dbl_mp_fma4
+#define __mul __mul_fma4
+#define __sub __sub_fma4
+
+#include <sysdeps/ieee754/dbl-64/mpsqrt.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/mptan-fma4.c b/sysdeps/x86_64/fpu/multiarch/mptan-fma4.c
new file mode 100644
index 0000000..767924e
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/mptan-fma4.c
@@ -0,0 +1,6 @@
+#define __mptan __mptan_fma4
+#define __c32 __c32_fma4
+#define __dvd __dvd_fma4
+#define __mpranred __mpranred_fma4
+
+#include <sysdeps/ieee754/dbl-64/mptan.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/s_atan-fma4.c b/sysdeps/x86_64/fpu/multiarch/s_atan-fma4.c
new file mode 100644
index 0000000..a8f0977
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_atan-fma4.c
@@ -0,0 +1,8 @@
+#define atan __atan_fma4
+#define __add __add_fma4
+#define __dbl_mp __dbl_mp_fma4
+#define __mpatan __mpatan_fma4
+#define __mul __mul_fma4
+#define __sub __sub_fma4
+
+#include <sysdeps/ieee754/dbl-64/s_atan.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/s_atan.c b/sysdeps/x86_64/fpu/multiarch/s_atan.c
new file mode 100644
index 0000000..ffc4a56
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_atan.c
@@ -0,0 +1,14 @@
+#ifdef HAVE_FMA4_SUPPORT
+# include <init-arch.h>
+# include <math.h>
+
+extern double __atan_sse2 (double);
+extern double __atan_fma4 (double);
+
+libm_ifunc (atan, HAS_FMA4 ? __atan_fma4 : __atan_sse2);
+
+# define atan __atan_sse2
+#endif
+
+
+#include <sysdeps/ieee754/dbl-64/s_atan.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/s_sin-fma4.c b/sysdeps/x86_64/fpu/multiarch/s_sin-fma4.c
new file mode 100644
index 0000000..97cef8b
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_sin-fma4.c
@@ -0,0 +1,11 @@
+#define __cos __cos_fma4
+#define __sin __sin_fma4
+#define __branred __branred_fma4
+#define __docos __docos_fma4
+#define __dubsin __dubsin_fma4
+#define __mpcos __mpcos_fma4
+#define __mpcos1 __mpcos1_fma4
+#define __mpsin __mpsin_fma4
+#define __mpsin1 __mpsin1_fma4
+
+#include <sysdeps/ieee754/dbl-64/s_sin.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/s_sin.c b/sysdeps/x86_64/fpu/multiarch/s_sin.c
new file mode 100644
index 0000000..a7c35dc
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_sin.c
@@ -0,0 +1,22 @@
+#ifdef HAVE_FMA4_SUPPORT
+# include <init-arch.h>
+# include <math.h>
+# undef NAN
+
+extern double __cos_sse2 (double);
+extern double __cos_fma4 (double);
+extern double __sin_sse2 (double);
+extern double __sin_fma4 (double);
+
+libm_ifunc (__cos, HAS_FMA4 ? __cos_fma4 : __cos_sse2);
+weak_alias (__cos, cos)
+
+libm_ifunc (__sin, HAS_FMA4 ? __sin_fma4 : __sin_sse2);
+weak_alias (__sin, sin)
+
+# define __cos __cos_sse2
+# define __sin __sin_sse2
+#endif
+
+
+#include <sysdeps/ieee754/dbl-64/s_sin.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/s_tan-fma4.c b/sysdeps/x86_64/fpu/multiarch/s_tan-fma4.c
new file mode 100644
index 0000000..c3cefc2
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_tan-fma4.c
@@ -0,0 +1,9 @@
+#define tan __tan_fma4
+#define __branred __branred_fma4
+#define __dbl_mp __dbl_mp_fma4
+#define __mpranred __mpranred_fma4
+#define __mptan __mptan_fma4
+#define __sub __sub_fma4
+
+
+#include <sysdeps/ieee754/dbl-64/s_tan.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/s_tan.c b/sysdeps/x86_64/fpu/multiarch/s_tan.c
new file mode 100644
index 0000000..cca02b5
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_tan.c
@@ -0,0 +1,14 @@
+#ifdef HAVE_FMA4_SUPPORT
+# include <init-arch.h>
+# include <math.h>
+
+extern double __tan_sse2 (double);
+extern double __tan_fma4 (double);
+
+libm_ifunc (tan, HAS_FMA4 ? __tan_fma4 : __tan_sse2);
+
+# define tan __tan_sse2
+#endif
+
+
+#include <sysdeps/ieee754/dbl-64/s_tan.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/sincos32-fma4.c b/sysdeps/x86_64/fpu/multiarch/sincos32-fma4.c
new file mode 100644
index 0000000..f0d2d27
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/sincos32-fma4.c
@@ -0,0 +1,14 @@
+#define __cos32 __cos32_fma4
+#define __sin32 __sin32_fma4
+#define __c32 __c32_fma4
+#define __mpsin __mpsin_fma4
+#define __mpsin1 __mpsin1_fma4
+#define __mpcos __mpcos_fma4
+#define __mpcos1 __mpcos1_fma4
+#define __mpranred __mpranred_fma4
+#define __add __add_fma4
+#define __dbl_mp __dbl_mp_fma4
+#define __mul __mul_fma4
+#define __sub __sub_fma4
+
+#include <sysdeps/ieee754/dbl-64/sincos32.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/slowexp-fma4.c b/sysdeps/x86_64/fpu/multiarch/slowexp-fma4.c
new file mode 100644
index 0000000..83cb359
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/slowexp-fma4.c
@@ -0,0 +1,8 @@
+#define __slowexp __slowexp_fma4
+#define __add __add_fma4
+#define __dbl_mp __dbl_mp_fma4
+#define __mpexp __mpexp_fma4
+#define __mul __mul_fma4
+#define __sub __sub_fma4
+
+#include <sysdeps/ieee754/dbl-64/slowexp.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/slowpow-fma4.c b/sysdeps/x86_64/fpu/multiarch/slowpow-fma4.c
new file mode 100644
index 0000000..744f3f6
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/slowpow-fma4.c
@@ -0,0 +1,10 @@
+#define __slowpow __slowpow_fma4
+#define __add __add_fma4
+#define __dbl_mp __dbl_mp_fma4
+#define __mpexp __mpexp_fma4
+#define __mplog __mplog_fma4
+#define __mul __mul_fma4
+#define __sub __sub_fma4
+#define __halfulp __halfulp_fma4
+
+#include <sysdeps/ieee754/dbl-64/slowpow.c>
-- 
2.7.4