[libc] Add support for x86-64 targets that do not have FMA instructions.

author Tue Ly <lntue@google.com>

Thu, 7 Apr 2022 20:02:10 +0000 (16:02 -0400)

committer Tue Ly <lntue@google.com>

Fri, 8 Apr 2022 18:12:24 +0000 (14:12 -0400)
author Tue Ly <lntue@google.com>
Thu, 7 Apr 2022 20:02:10 +0000 (16:02 -0400)
committer Tue Ly <lntue@google.com>
Fri, 8 Apr 2022 18:12:24 +0000 (14:12 -0400)
diff --git a/libc/src/__support/FPUtil/CMakeLists.txt b/libc/src/__support/FPUtil/CMakeLists.txt

index f1cd0b5..7f1cecc 100644 (file)
--- a/libc/src/__support/FPUtil/CMakeLists.txt
+++ b/libc/src/__support/FPUtil/CMakeLists.txt
@@ -12,7 +12,6 @@ add_header_library(
      NearestIntegerOperations.h
      NormalFloat.h
      PlatformDefs.h
-    PolyEval.h
      UInt.h
      XFloat.h
    DEPENDS
@@ -34,4 +33,29 @@ add_header_library(
      libc.src.__support.FPUtil.generic.sqrt
  )
  
+add_header_library(
+  fma
+  HDRS
+    FMA.h
+  DEPENDS
+    .fputil
+    libc.src.__support.FPUtil.generic.fma
+)
+
+add_header_library(
+  multiply_add
+  HDRS
+    multiply_add.h
+  DEPENDS
+    .fma
+)
+
+add_header_library(
+  polyeval
+  HDRS
+    PolyEval.h
+  DEPENDS
+    .multiply_add
+)
+
  add_subdirectory(generic)
diff --git a/libc/src/__support/FPUtil/FMA.h b/libc/src/__support/FPUtil/FMA.h

index c735c06..6823dd0 100644 (file)
--- a/libc/src/__support/FPUtil/FMA.h
+++ b/libc/src/__support/FPUtil/FMA.h
@@ -11,11 +11,16 @@
  
  #include "src/__support/architectures.h"
  
+#if defined(LIBC_TARGET_HAS_FMA)
+
  #if defined(LLVM_LIBC_ARCH_X86_64)
  #include "x86_64/FMA.h"
  #elif defined(LLVM_LIBC_ARCH_AARCH64)
  #include "aarch64/FMA.h"
+#endif
+
  #else
+// FMA instructions are not available
  #include "generic/FMA.h"
  #include "src/__support/CPP/TypeTraits.h"
  
diff --git a/libc/src/__support/FPUtil/PolyEval.h b/libc/src/__support/FPUtil/PolyEval.h

index 368ee38..c9e818a 100644 (file)
--- a/libc/src/__support/FPUtil/PolyEval.h
+++ b/libc/src/__support/FPUtil/PolyEval.h
@@ -9,19 +9,15 @@
  #ifndef LLVM_LIBC_SRC_SUPPORT_FPUTIL_POLYEVAL_H
  #define LLVM_LIBC_SRC_SUPPORT_FPUTIL_POLYEVAL_H
  
-#include "src/__support/CPP/TypeTraits.h"
-#include "src/__support/architectures.h"
+#include "multiply_add.h"
  
  // Evaluate polynomial using Horner's Scheme:
  // With polyeval(x, a_0, a_1, ..., a_n) = a_n * x^n + ... + a_1 * x + a_0, we
  // evaluated it as:  a_0 + x * (a_1 + x * ( ... (a_(n-1) + x * a_n) ... ) ) ).
-// We will use fma instructions if available.
+// We will use FMA instructions if available.
  // Example: to evaluate x^3 + 2*x^2 + 3*x + 4, call
  //   polyeval( x, 4.0, 3.0, 2.0, 1.0 )
  
-#if defined(LLVM_LIBC_ARCH_X86_64) || defined(LLVM_LIBC_ARCH_AARCH64)
-#include "FMA.h"
-
  namespace __llvm_libc {
  namespace fputil {
  
@@ -29,35 +25,10 @@ template <typename T> static inline T polyeval(T x, T a0) { return a0; }
  
  template <typename T, typename... Ts>
  INLINE_FMA static inline T polyeval(T x, T a0, Ts... a) {
-  return fma(x, polyeval(x, a...), a0);
+  return multiply_add(x, polyeval(x, a...), a0);
  }
  
  } // namespace fputil
  } // namespace __llvm_libc
  
-#ifdef LLVM_LIBC_ARCH_X86_64
-
-// [DISABLED] There is a regression with using vectorized version for polyeval
-// compared to the naive Horner's scheme with fma.  Need further investigation
-// #include "x86_64/PolyEval.h"
-
-#endif // LLVM_LIBC_ARCH_X86_64
-
-#else
-
-namespace __llvm_libc {
-namespace fputil {
-
-template <typename T> static inline T polyeval(T x, T a0) { return a0; }
-
-template <typename T, typename... Ts>
-static inline T polyeval(T x, T a0, Ts... a) {
-  return x * polyeval(x, a...) + a0;
-}
-
-} // namespace fputil
-} // namespace __llvm_libc
-
-#endif
-
-#endif // LLVM_LIBC_SRC_SUPPORT_FPUTIL_FMA_H
+#endif // LLVM_LIBC_SRC_SUPPORT_FPUTIL_POLYEVAL_H
diff --git a/libc/src/__support/FPUtil/aarch64/FMA.h b/libc/src/__support/FPUtil/aarch64/FMA.h

index c236c9a..ed637c8 100644 (file)
--- a/libc/src/__support/FPUtil/aarch64/FMA.h
+++ b/libc/src/__support/FPUtil/aarch64/FMA.h
@@ -15,6 +15,10 @@
  #error "Invalid include"
  #endif
  
+#if !defined(LIBC_TARGET_HAS_FMA)
+#error "FMA instructions are not supported"
+#endif
+
  #include "src/__support/CPP/TypeTraits.h"
  
  namespace __llvm_libc {
diff --git a/libc/src/__support/FPUtil/generic/CMakeLists.txt b/libc/src/__support/FPUtil/generic/CMakeLists.txt

index bf69e7d..a755e76 100644 (file)
--- a/libc/src/__support/FPUtil/generic/CMakeLists.txt
+++ b/libc/src/__support/FPUtil/generic/CMakeLists.txt
@@ -4,3 +4,9 @@ add_header_library(
      sqrt.h
      sqrt_80_bit_long_double.h
  )
+
+add_header_library(
+  fma
+  HDRS
+    FMA.h
+)
diff --git a/libc/src/__support/FPUtil/generic/FMA.h b/libc/src/__support/FPUtil/generic/FMA.h

index efdd8b7..78b640c 100644 (file)
--- a/libc/src/__support/FPUtil/generic/FMA.h
+++ b/libc/src/__support/FPUtil/generic/FMA.h
@@ -10,6 +10,7 @@
  #define LLVM_LIBC_SRC_SUPPORT_FPUTIL_GENERIC_FMA_H
  
  #include "src/__support/CPP/TypeTraits.h"
+#include "src/__support/FPUtil/FPBits.h"
  
  namespace __llvm_libc {
  namespace fputil {
diff --git a/libc/src/__support/FPUtil/multiply_add.h b/libc/src/__support/FPUtil/multiply_add.h

new file mode 100644 (file)

index 0000000..8f5da22
--- /dev/null
+++ b/libc/src/__support/FPUtil/multiply_add.h
@@ -0,0 +1,41 @@
+//===-- Common header for multiply-add implementations ----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_SUPPORT_FPUTIL_MULTIPLY_ADD_H
+#define LLVM_LIBC_SRC_SUPPORT_FPUTIL_MULTIPLY_ADD_H
+
+#include "src/__support/architectures.h"
+
+namespace __llvm_libc {
+namespace fputil {
+
+// Implement a simple wrapper for multiply-add operation:
+//   multiply_add(x, y, z) = x*y + z
+// which uses FMA instructions to speed up if available.
+
+template <typename T> static inline T multiply_add(T x, T y, T z) {
+  return x * y + z;
+}
+
+#if defined(LIBC_TARGET_HAS_FMA)
+// FMA instructions are available.
+#include "FMA.h"
+
+template <> inline float multiply_add<float>(float x, float y, float z) {
+  return fma(x, y, z);
+}
+
+template <> inline double multiply_add<double>(double x, double y, double z) {
+  return fma(x, y, z);
+}
+#endif // LIBC_TARGET_HAS_FMA
+
+} // namespace fputil
+} // namespace __llvm_libc
+
+#endif // LLVM_LIBC_SRC_SUPPORT_FPUTIL_MULTIPLY_ADD_H
diff --git a/libc/src/__support/FPUtil/x86_64/FMA.h b/libc/src/__support/FPUtil/x86_64/FMA.h

index 70ebe38..08de6da 100644 (file)
--- a/libc/src/__support/FPUtil/x86_64/FMA.h
+++ b/libc/src/__support/FPUtil/x86_64/FMA.h
@@ -15,6 +15,10 @@
  #error "Invalid include"
  #endif
  
+#if !defined(LIBC_TARGET_HAS_FMA)
+#error "FMA instructions are not supported"
+#endif
+
  #include "src/__support/CPP/TypeTraits.h"
  #include <immintrin.h>
  
diff --git a/libc/src/__support/architectures.h b/libc/src/__support/architectures.h

index 14eb1a5..70eeb99 100644 (file)
--- a/libc/src/__support/architectures.h
+++ b/libc/src/__support/architectures.h
@@ -37,7 +37,15 @@
  #define LLVM_LIBC_ARCH_ANY_ARM
  #endif
  
-#if defined(LLVM_LIBC_ARCH_X86_64)
+#if defined(LLVM_LIBC_ARCH_AARCH64)
+#define LIBC_TARGET_HAS_FMA
+#elif defined(LLVM_LIBC_ARCH_X86_64)
+#if (defined(__AVX2__) || defined(__FMA__))
+#define LIBC_TARGET_HAS_FMA
+#endif
+#endif
+
+#if (defined(LLVM_LIBC_ARCH_X86_64) && defined(LIBC_TARGET_HAS_FMA))
  #define INLINE_FMA __attribute__((target("fma")))
  #else
  #define INLINE_FMA
diff --git a/libc/src/math/CMakeLists.txt b/libc/src/math/CMakeLists.txt

index 9737f4a..8fc550d 100644 (file)
--- a/libc/src/math/CMakeLists.txt
+++ b/libc/src/math/CMakeLists.txt
@@ -48,8 +48,9 @@ add_entrypoint_object(
      fmaf.h
    DEPENDS
      libc.src.__support.FPUtil.fputil
+    libc.src.__support.FPUtil.fma
    COMPILE_OPTIONS
-    -O2
+    -O3
      -mfma
  )
  
@@ -61,8 +62,9 @@ add_entrypoint_object(
      fma.h
    DEPENDS
      libc.src.__support.FPUtil.fputil
+    libc.src.__support.FPUtil.fma
    COMPILE_OPTIONS
-    -O2
+    -O3
      -mfma
  )
  
diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt

index 73957cf..6a96b55 100644 (file)
--- a/libc/src/math/generic/CMakeLists.txt
+++ b/libc/src/math/generic/CMakeLists.txt
@@ -478,6 +478,7 @@ add_entrypoint_object(
    DEPENDS
      .common_constants
      libc.src.__support.FPUtil.fputil
+    libc.src.__support.FPUtil.polyeval
      libc.include.math
    COMPILE_OPTIONS
      -O3
@@ -492,6 +493,7 @@ add_entrypoint_object(
      ../exp2f.h
    DEPENDS
      libc.src.__support.FPUtil.fputil
+    libc.src.__support.FPUtil.polyeval
      libc.include.math
    COMPILE_OPTIONS
      -O3
@@ -507,6 +509,8 @@ add_entrypoint_object(
    DEPENDS
      .common_constants
      libc.src.__support.FPUtil.fputil
+    libc.src.__support.FPUtil.multiply_add
+    libc.src.__support.FPUtil.polyeval
      libc.include.math
    COMPILE_OPTIONS
      -O3
@@ -674,6 +678,8 @@ add_entrypoint_object(
    DEPENDS
      .common_constants
      libc.src.__support.FPUtil.fputil
+    libc.src.__support.FPUtil.multiply_add
+    libc.src.__support.FPUtil.polyeval
    COMPILE_OPTIONS
      -O3
      -mfma
@@ -688,6 +694,8 @@ add_entrypoint_object(
    DEPENDS
      .common_constants
      libc.src.__support.FPUtil.fputil
+    libc.src.__support.FPUtil.multiply_add
+    libc.src.__support.FPUtil.polyeval
    COMPILE_OPTIONS
      -O3
      -mfma
@@ -702,6 +710,7 @@ add_entrypoint_object(
    DEPENDS
      .common_constants
      libc.src.__support.FPUtil.fputil
+    libc.src.__support.FPUtil.polyeval
      COMPILE_OPTIONS
      -O3
      -mfma
@@ -716,6 +725,8 @@ add_entrypoint_object(
    DEPENDS
      .common_constants
      libc.src.__support.FPUtil.fputil
+    libc.src.__support.FPUtil.multiply_add
+    libc.src.__support.FPUtil.polyeval
    COMPILE_OPTIONS
      -O3
      -mfma
diff --git a/libc/src/math/generic/expm1f.cpp b/libc/src/math/generic/expm1f.cpp

index b0544b7..76232d6 100644 (file)
--- a/libc/src/math/generic/expm1f.cpp
+++ b/libc/src/math/generic/expm1f.cpp
@@ -83,7 +83,7 @@ LLVM_LIBC_FUNCTION(float, expm1f, (float x)) {
        //   = x otherwise.
        // To simplify the rounding decision and make it more efficient, we use
        //   fma(x, x, x) ~ x + x^2 instead.
-      return fputil::fma(x, x, x);
+      return fputil::multiply_add(x, x, x);
      }
  
      // 2^-25 <= |x| < 2^-4
@@ -96,7 +96,7 @@ LLVM_LIBC_FUNCTION(float, expm1f, (float x)) {
          fputil::polyeval(xd, 0x1p-1, 0x1.55555555557ddp-3, 0x1.55555555552fap-5,
                           0x1.111110fcd58b7p-7, 0x1.6c16c1717660bp-10,
                           0x1.a0241f0006d62p-13, 0x1.a01e3f8d3c06p-16);
-    return static_cast<float>(fputil::fma(r, xsq, xd));
+    return static_cast<float>(fputil::multiply_add(r, xsq, xd));
    }
  
    // For -18 < x < 89, to compute expm1(x), we perform the following range
@@ -132,7 +132,7 @@ LLVM_LIBC_FUNCTION(float, expm1f, (float x)) {
    double exp_lo =
        fputil::polyeval(xd, 0x1.0p0, 0x1.ffffffffff777p-1, 0x1.000000000071cp-1,
                         0x1.555566668e5e7p-3, 0x1.55555555ef243p-5);
-  return static_cast<float>(fputil::fma(exp_hi_mid, exp_lo, -1.0));
+  return static_cast<float>(fputil::multiply_add(exp_hi_mid, exp_lo, -1.0));
  }
  
  } // namespace __llvm_libc
diff --git a/libc/src/math/generic/log10f.cpp b/libc/src/math/generic/log10f.cpp

index 59ca659..878ae68 100644 (file)
--- a/libc/src/math/generic/log10f.cpp
+++ b/libc/src/math/generic/log10f.cpp
@@ -170,7 +170,7 @@ LLVM_LIBC_FUNCTION(float, log10f, (float x)) {
    double d = static_cast<float>(xbits) - static_cast<float>(f);
    d *= ONE_OVER_F[f_index];
  
-  double extra_factor = fputil::fma(m, LOG10_2, LOG10_F[f_index]);
+  double extra_factor = fputil::multiply_add(m, LOG10_2, LOG10_F[f_index]);
  
    double r = fputil::polyeval(d, extra_factor, 0x1.bcb7b1526e4c5p-2,
                                -0x1.bcb7b1518a5e9p-3, 0x1.287a72a6f716p-3,
diff --git a/libc/src/math/generic/log1pf.cpp b/libc/src/math/generic/log1pf.cpp

index 7d1e71e..6e8c678 100644 (file)
--- a/libc/src/math/generic/log1pf.cpp
+++ b/libc/src/math/generic/log1pf.cpp
@@ -66,7 +66,7 @@ INLINE_FMA static inline float log(double x) {
    double d = static_cast<double>(xbits) - static_cast<double>(f);
    d *= ONE_OVER_F[f_index];
  
-  double extra_factor = fputil::fma(m, LOG_2, LOG_F[f_index]);
+  double extra_factor = fputil::multiply_add(m, LOG_2, LOG_F[f_index]);
  
    double r = fputil::polyeval(d, extra_factor, 0x1.fffffffffffacp-1,
                                -0x1.fffffffef9cb2p-2, 0x1.5555513bc679ap-2,
@@ -161,7 +161,7 @@ LLVM_LIBC_FUNCTION(float, log1pf, (float x)) {
    // > fpminimax(log(1 + x)/x, 5, [|D...|], [-2^-8; 2^-8]);
    r = fputil::polyeval(xd, -0x1p-1, 0x1.5555555515551p-2, -0x1.ffffffff82bdap-3,
                         0x1.999b33348d3aep-3, -0x1.5556cae3adcc3p-3);
-  return static_cast<float>(fputil::fma(r, xd * xd, xd));
+  return static_cast<float>(fputil::multiply_add(r, xd * xd, xd));
  }
  
  } // namespace __llvm_libc
diff --git a/libc/src/math/generic/logf.cpp b/libc/src/math/generic/logf.cpp

index 3e71237..747f8c7 100644 (file)
--- a/libc/src/math/generic/logf.cpp
+++ b/libc/src/math/generic/logf.cpp
@@ -120,7 +120,7 @@ LLVM_LIBC_FUNCTION(float, logf, (float x)) {
    d *= ONE_OVER_F[f_index];
  
    double extra_factor =
-      fputil::fma(static_cast<double>(m), LOG_2, LOG_F[f_index]);
+      fputil::multiply_add(static_cast<double>(m), LOG_2, LOG_F[f_index]);
  
    double r = __llvm_libc::fputil::polyeval(
        d, extra_factor, 0x1.fffffffffffacp-1, -0x1.fffffffef9cb2p-2,
diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel

index ad8bcd0..93c45b9 100644 (file)
--- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
@@ -196,6 +196,54 @@ cc_library(
      ],
  )
  
+fma_common_hdrs = [
+    "src/__support/FPUtil/FMA.h",
+    "src/__support/FPUtil/generic/FMA.h",
+]
+
+fma_hdrs = selects.with_or({
+    "//conditions:default": fma_common_hdrs,
+    PLATFORM_CPU_X86_64: fma_common_hdrs + [
+        "src/__support/FPUtil/x86_64/FMA.h",
+    ],
+    PLATFORM_CPU_ARM64: fma_common_hdrs + [
+        "src/__support/FPUtil/aarch64/FMA.h",
+    ],
+})
+
+cc_library(
+    name = "__support_fputil_fma",
+    hdrs = fma_hdrs,
+    deps = [
+        ":__support_common",
+        ":__support_cpp_bit",
+        ":__support_cpp_type_traits",
+        ":__support_fputil",
+        ":libc_root",
+    ],
+)
+
+cc_library(
+    name = "__support_fputil_multiply_add",
+    hdrs = [
+        "src/__support/FPUtil/multiply_add.h",
+    ],
+    deps = [
+        ":__support_common",
+        ":__support_fputil_fma",
+    ],
+)
+
+cc_library(
+    name = "__support_fputil_polyeval",
+    hdrs = [
+        "src/__support/FPUtil/PolyEval.h",
+    ],
+    deps = [
+        ":__support_fputil_multiply_add",
+    ],
+)
+
  ################################ fenv targets ################################
  
  libc_function(
author	Tue Ly <lntue@google.com>
	Thu, 7 Apr 2022 20:02:10 +0000 (16:02 -0400)
committer	Tue Ly <lntue@google.com>
	Fri, 8 Apr 2022 18:12:24 +0000 (14:12 -0400)
libc/src/__support/FPUtil/CMakeLists.txt		patch \| blob \| history
libc/src/__support/FPUtil/FMA.h		patch \| blob \| history
libc/src/__support/FPUtil/PolyEval.h		patch \| blob \| history
libc/src/__support/FPUtil/aarch64/FMA.h		patch \| blob \| history
libc/src/__support/FPUtil/generic/CMakeLists.txt		patch \| blob \| history
libc/src/__support/FPUtil/generic/FMA.h		patch \| blob \| history
libc/src/__support/FPUtil/multiply_add.h	[new file with mode: 0644]	patch \| blob
libc/src/__support/FPUtil/x86_64/FMA.h		patch \| blob \| history
libc/src/__support/architectures.h		patch \| blob \| history
libc/src/math/CMakeLists.txt		patch \| blob \| history
libc/src/math/generic/CMakeLists.txt		patch \| blob \| history
libc/src/math/generic/expm1f.cpp		patch \| blob \| history
libc/src/math/generic/log10f.cpp		patch \| blob \| history
libc/src/math/generic/log1pf.cpp		patch \| blob \| history
libc/src/math/generic/logf.cpp		patch \| blob \| history
utils/bazel/llvm-project-overlay/libc/BUILD.bazel		patch \| blob \| history