[libc] Add ADD_FMA_FLAG macro to add -mfma flag to functions that requires it.

author Tue Ly <lntue@google.com>

Sat, 11 Dec 2021 04:01:22 +0000 (23:01 -0500)

committer Tue Ly <lntue@google.com>

Sat, 11 Dec 2021 21:21:33 +0000 (16:21 -0500)
author Tue Ly <lntue@google.com>
Sat, 11 Dec 2021 04:01:22 +0000 (23:01 -0500)
committer Tue Ly <lntue@google.com>
Sat, 11 Dec 2021 21:21:33 +0000 (16:21 -0500)
diff --git a/libc/src/__support/FPUtil/PolyEval.h b/libc/src/__support/FPUtil/PolyEval.h

index ead8ca87c2cc993f380ad3c5cd687e3741210830..54ba35f222471b2bc782f2b9744f3158bfd34fdb 100644 (file)
--- a/libc/src/__support/FPUtil/PolyEval.h
+++ b/libc/src/__support/FPUtil/PolyEval.h
@@ -28,7 +28,7 @@ namespace fputil {
  template <typename T> static inline T polyeval(T x, T a0) { return a0; }
  
  template <typename T, typename... Ts>
-static inline T polyeval(T x, T a0, Ts... a) {
+INLINE_FMA static inline T polyeval(T x, T a0, Ts... a) {
    return fma(x, polyeval(x, a...), a0);
  }
  
diff --git a/libc/src/__support/FPUtil/x86_64/FMA.h b/libc/src/__support/FPUtil/x86_64/FMA.h

index db0a18a1ca6b0c16f52e40fc46499c898137955c..70ebe382e841aebe96edfb83c297c6fa7afa364b 100644 (file)
--- a/libc/src/__support/FPUtil/x86_64/FMA.h
+++ b/libc/src/__support/FPUtil/x86_64/FMA.h
@@ -11,7 +11,7 @@
  
  #include "src/__support/architectures.h"
  
-#if !defined(LLVM_LIBC_ARCH_X86)
+#if !defined(LLVM_LIBC_ARCH_X86_64)
  #error "Invalid include"
  #endif
  
@@ -22,8 +22,7 @@ namespace __llvm_libc {
  namespace fputil {
  
  template <typename T>
-__attribute__((target(
-    "fma"))) static inline cpp::EnableIfType<cpp::IsSame<T, float>::Value, T>
+INLINE_FMA static inline cpp::EnableIfType<cpp::IsSame<T, float>::Value, T>
  fma(T x, T y, T z) {
    float result;
    __m128 xmm = _mm_load_ss(&x);           // NOLINT
@@ -35,8 +34,7 @@ fma(T x, T y, T z) {
  }
  
  template <typename T>
-__attribute__((target(
-    "fma"))) static inline cpp::EnableIfType<cpp::IsSame<T, double>::Value, T>
+INLINE_FMA static inline cpp::EnableIfType<cpp::IsSame<T, double>::Value, T>
  fma(T x, T y, T z) {
    double result;
    __m128d xmm = _mm_load_sd(&x);           // NOLINT
diff --git a/libc/src/__support/FPUtil/x86_64/PolyEval.h b/libc/src/__support/FPUtil/x86_64/PolyEval.h

index 43153f3c1eaf32d26f82a3a118c9a5fc357c0ac9..49e32b5100d257a2913eca7141fa14fe84c0e65b 100644 (file)
--- a/libc/src/__support/FPUtil/x86_64/PolyEval.h
+++ b/libc/src/__support/FPUtil/x86_64/PolyEval.h
@@ -23,11 +23,11 @@ namespace fputil {
  // Cubic polynomials:
  //   polyeval(x, a0, a1, a2, a3) = a3*x^3 + a2*x^2 + a1*x + a0
  template <>
-__attribute__((target("fma"))) inline float
-polyeval(float x, float a0, float a1, float a2, float a3) {
-  __m128 xmm = _mm_set1_ps(x);                 // NOLINT
-  __m128 a13 = _mm_set_ps(0.0f, x, a3, a1);    // NOLINT
-  __m128 a02 = _mm_set_ps(0.0f, 0.0f, a2, a0); // NOLINT
+INLINE_FMA inline float polyeval(float x, float a0, float a1, float a2,
+                                 float a3) {
+  __m128 xmm = _mm_set1_ps(x);
+  __m128 a13 = _mm_set_ps(0.0f, x, a3, a1);
+  __m128 a02 = _mm_set_ps(0.0f, 0.0f, a2, a0);
    // r = (0, x^2, a3*x + a2, a1*x + a0)
    __m128 r = _mm_fmadd_ps(a13, xmm, a02); // NOLINT
    // result = (a3*x + a2) * x^2 + (a1*x + a0)
@@ -35,11 +35,11 @@ polyeval(float x, float a0, float a1, float a2, float a3) {
  }
  
  template <>
-__attribute__((target("fma"))) inline double
-polyeval(double x, double a0, double a1, double a2, double a3) {
-  __m256d xmm = _mm256_set1_pd(x);               // NOLINT
-  __m256d a13 = _mm256_set_pd(0.0, x, a3, a1);   // NOLINT
-  __m256d a02 = _mm256_set_pd(0.0, 0.0, a2, a0); // NOLINT
+INLINE_FMA inline double polyeval(double x, double a0, double a1, double a2,
+                                  double a3) {
+  __m256d xmm = _mm256_set1_pd(x);
+  __m256d a13 = _mm256_set_pd(0.0, x, a3, a1);
+  __m256d a02 = _mm256_set_pd(0.0, 0.0, a2, a0);
    // r = (0, x^2, a3*x + a2, a1*x + a0)
    __m256d r = _mm256_fmadd_pd(a13, xmm, a02); // NOLINT
    // result = (a3*x + a2) * x^2 + (a1*x + a0)
@@ -50,12 +50,12 @@ polyeval(double x, double a0, double a1, double a2, double a3) {
  //   polyeval(x, a0, a1, a2, a3, a4, a5) = a5*x^5 + a4*x^4 + a3*x^3 + a2*x^2 +
  //                                         + a1*x + a0
  template <>
-__attribute__((target("fma"))) inline float
-polyeval(float x, float a0, float a1, float a2, float a3, float a4, float a5) {
-  __m128 xmm = _mm_set1_ps(x);                 // NOLINT
-  __m128 a25 = _mm_set_ps(0.0f, x, a5, a2);    // NOLINT
-  __m128 a14 = _mm_set_ps(0.0f, 0.0f, a4, a1); // NOLINT
-  __m128 a03 = _mm_set_ps(0.0f, 0.0f, a3, a0); // NOLINT
+INLINE_FMA inline float polyeval(float x, float a0, float a1, float a2,
+                                 float a3, float a4, float a5) {
+  __m128 xmm = _mm_set1_ps(x);
+  __m128 a25 = _mm_set_ps(0.0f, x, a5, a2);
+  __m128 a14 = _mm_set_ps(0.0f, 0.0f, a4, a1);
+  __m128 a03 = _mm_set_ps(0.0f, 0.0f, a3, a0);
    // r1 = (0, x^2, a5*x + a4, a2*x + a1)
    __m128 r1 = _mm_fmadd_ps(a25, xmm, a14); // NOLINT
    // r2 = (0, x^3, (a5*x + a4)*x + a3, (a2*x + a1)*x + a0
@@ -65,13 +65,12 @@ polyeval(float x, float a0, float a1, float a2, float a3, float a4, float a5) {
  }
  
  template <>
-__attribute__((target("fma"))) inline double
-polyeval(double x, double a0, double a1, double a2, double a3, double a4,
-         double a5) {
-  __m256d xmm = _mm256_set1_pd(x);               // NOLINT
-  __m256d a25 = _mm256_set_pd(0.0, x, a5, a2);   // NOLINT
-  __m256d a14 = _mm256_set_pd(0.0, 0.0, a4, a1); // NOLINT
-  __m256d a03 = _mm256_set_pd(0.0, 0.0, a3, a0); // NOLINT
+INLINE_FMA inline double polyeval(double x, double a0, double a1, double a2,
+                                  double a3, double a4, double a5) {
+  __m256d xmm = _mm256_set1_pd(x);
+  __m256d a25 = _mm256_set_pd(0.0, x, a5, a2);
+  __m256d a14 = _mm256_set_pd(0.0, 0.0, a4, a1);
+  __m256d a03 = _mm256_set_pd(0.0, 0.0, a3, a0);
    // r1 = (0, x^2, a5*x + a4, a2*x + a1)
    __m256d r1 = _mm256_fmadd_pd(a25, xmm, a14); // NOLINT
    // r2 = (0, x^3, (a5*x + a4)*x + a3, (a2*x + a1)*x + a0
diff --git a/libc/src/__support/architectures.h b/libc/src/__support/architectures.h

index 4975d229422f6da2bd2dc02d09f2605a54bb25dd..14eb1a586463f1f6c68481a36d9c5d8f8962bc5b 100644 (file)
--- a/libc/src/__support/architectures.h
+++ b/libc/src/__support/architectures.h
@@ -37,4 +37,10 @@
  #define LLVM_LIBC_ARCH_ANY_ARM
  #endif
  
+#if defined(LLVM_LIBC_ARCH_X86_64)
+#define INLINE_FMA __attribute__((target("fma")))
+#else
+#define INLINE_FMA
+#endif // LLVM_LIBC_ARCH_X86_64
+
  #endif // LLVM_LIBC_SUPPORT_ARCHITECTURES_H
diff --git a/libc/src/math/fma.cpp b/libc/src/math/fma.cpp

index 22aa20e78396e9b834a5bc0a410654ccbdd513ce..4d817786660388fb7df61a384cee3a331387be5e 100644 (file)
--- a/libc/src/math/fma.cpp
+++ b/libc/src/math/fma.cpp
@@ -13,6 +13,7 @@
  
  namespace __llvm_libc {
  
+INLINE_FMA
  LLVM_LIBC_FUNCTION(double, fma, (double x, double y, double z)) {
    return fputil::fma(x, y, z);
  }
diff --git a/libc/src/math/fmaf.cpp b/libc/src/math/fmaf.cpp

index 30074b8ca4a0bf519c2cac09025bf047222f26e5..d102fa5f685ceaa9bdefb273f7676ee5c0b07b45 100644 (file)
--- a/libc/src/math/fmaf.cpp
+++ b/libc/src/math/fmaf.cpp
@@ -13,6 +13,7 @@
  
  namespace __llvm_libc {
  
+INLINE_FMA
  LLVM_LIBC_FUNCTION(float, fmaf, (float x, float y, float z)) {
    return fputil::fma(x, y, z);
  }
diff --git a/libc/src/math/generic/expm1f.cpp b/libc/src/math/generic/expm1f.cpp

index cb3ba325c8f961f93927495094c1b3595aeb7ad7..578a359cdb5753513f9bebc660deaff9e7e959a5 100644 (file)
--- a/libc/src/math/generic/expm1f.cpp
+++ b/libc/src/math/generic/expm1f.cpp
@@ -22,6 +22,7 @@ namespace __llvm_libc {
  // each interval. The coefficients were generated by Sollya's fpminmax.
  //
  // See libc/utils/mathtools/expm1f.sollya for more detail.
+INLINE_FMA
  LLVM_LIBC_FUNCTION(float, expm1f, (float x)) {
    const float ln2 =
        0.69314718055994530941723212145817656807550013436025f; // For C++17:
author	Tue Ly <lntue@google.com>
	Sat, 11 Dec 2021 04:01:22 +0000 (23:01 -0500)
committer	Tue Ly <lntue@google.com>
	Sat, 11 Dec 2021 21:21:33 +0000 (16:21 -0500)
libc/src/__support/FPUtil/PolyEval.h		patch \| blob \| history
libc/src/__support/FPUtil/x86_64/FMA.h		patch \| blob \| history
libc/src/__support/FPUtil/x86_64/PolyEval.h		patch \| blob \| history
libc/src/__support/architectures.h		patch \| blob \| history
libc/src/math/fma.cpp		patch \| blob \| history
libc/src/math/fmaf.cpp		patch \| blob \| history
libc/src/math/generic/expm1f.cpp		patch \| blob \| history