NearestIntegerOperations.h
NormalFloat.h
PlatformDefs.h
- PolyEval.h
UInt.h
XFloat.h
DEPENDS
libc.src.__support.FPUtil.generic.sqrt
)
+add_header_library(
+ fma
+ HDRS
+ FMA.h
+ DEPENDS
+ .fputil
+ libc.src.__support.FPUtil.generic.fma
+)
+
+add_header_library(
+ multiply_add
+ HDRS
+ multiply_add.h
+ DEPENDS
+ .fma
+)
+
+add_header_library(
+ polyeval
+ HDRS
+ PolyEval.h
+ DEPENDS
+ .multiply_add
+)
+
add_subdirectory(generic)
#include "src/__support/architectures.h"
+#if defined(LIBC_TARGET_HAS_FMA)
+
#if defined(LLVM_LIBC_ARCH_X86_64)
#include "x86_64/FMA.h"
#elif defined(LLVM_LIBC_ARCH_AARCH64)
#include "aarch64/FMA.h"
+#endif
+
#else
+// FMA instructions are not available
#include "generic/FMA.h"
#include "src/__support/CPP/TypeTraits.h"
#ifndef LLVM_LIBC_SRC_SUPPORT_FPUTIL_POLYEVAL_H
#define LLVM_LIBC_SRC_SUPPORT_FPUTIL_POLYEVAL_H
-#include "src/__support/CPP/TypeTraits.h"
-#include "src/__support/architectures.h"
+#include "multiply_add.h"
// Evaluate polynomial using Horner's Scheme:
// With polyeval(x, a_0, a_1, ..., a_n) = a_n * x^n + ... + a_1 * x + a_0, we
// evaluated it as: a_0 + x * (a_1 + x * ( ... (a_(n-1) + x * a_n) ... ) ) ).
-// We will use fma instructions if available.
+// We will use FMA instructions if available.
// Example: to evaluate x^3 + 2*x^2 + 3*x + 4, call
// polyeval( x, 4.0, 3.0, 2.0, 1.0 )
-#if defined(LLVM_LIBC_ARCH_X86_64) || defined(LLVM_LIBC_ARCH_AARCH64)
-#include "FMA.h"
-
namespace __llvm_libc {
namespace fputil {
template <typename T, typename... Ts>
INLINE_FMA static inline T polyeval(T x, T a0, Ts... a) {
- return fma(x, polyeval(x, a...), a0);
+ return multiply_add(x, polyeval(x, a...), a0);
}
} // namespace fputil
} // namespace __llvm_libc
-#ifdef LLVM_LIBC_ARCH_X86_64
-
-// [DISABLED] There is a regression with using vectorized version for polyeval
-// compared to the naive Horner's scheme with fma. Need further investigation
-// #include "x86_64/PolyEval.h"
-
-#endif // LLVM_LIBC_ARCH_X86_64
-
-#else
-
-namespace __llvm_libc {
-namespace fputil {
-
-template <typename T> static inline T polyeval(T x, T a0) { return a0; }
-
-template <typename T, typename... Ts>
-static inline T polyeval(T x, T a0, Ts... a) {
- return x * polyeval(x, a...) + a0;
-}
-
-} // namespace fputil
-} // namespace __llvm_libc
-
-#endif
-
-#endif // LLVM_LIBC_SRC_SUPPORT_FPUTIL_FMA_H
+#endif // LLVM_LIBC_SRC_SUPPORT_FPUTIL_POLYEVAL_H
#error "Invalid include"
#endif
+#if !defined(LIBC_TARGET_HAS_FMA)
+#error "FMA instructions are not supported"
+#endif
+
#include "src/__support/CPP/TypeTraits.h"
namespace __llvm_libc {
sqrt.h
sqrt_80_bit_long_double.h
)
+
+add_header_library(
+ fma
+ HDRS
+ FMA.h
+)
#define LLVM_LIBC_SRC_SUPPORT_FPUTIL_GENERIC_FMA_H
#include "src/__support/CPP/TypeTraits.h"
+#include "src/__support/FPUtil/FPBits.h"
namespace __llvm_libc {
namespace fputil {
--- /dev/null
+//===-- Common header for multiply-add implementations ----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_SUPPORT_FPUTIL_MULTIPLY_ADD_H
+#define LLVM_LIBC_SRC_SUPPORT_FPUTIL_MULTIPLY_ADD_H
+
+#include "src/__support/architectures.h"
+
+namespace __llvm_libc {
+namespace fputil {
+
+// Implement a simple wrapper for multiply-add operation:
+// multiply_add(x, y, z) = x*y + z
+// which uses FMA instructions to speed up if available.
+
+template <typename T> static inline T multiply_add(T x, T y, T z) {
+ return x * y + z;
+}
+
+#if defined(LIBC_TARGET_HAS_FMA)
+// FMA instructions are available.
+#include "FMA.h"
+
+template <> inline float multiply_add<float>(float x, float y, float z) {
+ return fma(x, y, z);
+}
+
+template <> inline double multiply_add<double>(double x, double y, double z) {
+ return fma(x, y, z);
+}
+#endif // LIBC_TARGET_HAS_FMA
+
+} // namespace fputil
+} // namespace __llvm_libc
+
+#endif // LLVM_LIBC_SRC_SUPPORT_FPUTIL_MULTIPLY_ADD_H
#error "Invalid include"
#endif
+#if !defined(LIBC_TARGET_HAS_FMA)
+#error "FMA instructions are not supported"
+#endif
+
#include "src/__support/CPP/TypeTraits.h"
#include <immintrin.h>
#define LLVM_LIBC_ARCH_ANY_ARM
#endif
-#if defined(LLVM_LIBC_ARCH_X86_64)
+#if defined(LLVM_LIBC_ARCH_AARCH64)
+#define LIBC_TARGET_HAS_FMA
+#elif defined(LLVM_LIBC_ARCH_X86_64)
+#if (defined(__AVX2__) || defined(__FMA__))
+#define LIBC_TARGET_HAS_FMA
+#endif
+#endif
+
+#if (defined(LLVM_LIBC_ARCH_X86_64) && defined(LIBC_TARGET_HAS_FMA))
#define INLINE_FMA __attribute__((target("fma")))
#else
#define INLINE_FMA
fmaf.h
DEPENDS
libc.src.__support.FPUtil.fputil
+ libc.src.__support.FPUtil.fma
COMPILE_OPTIONS
- -O2
+ -O3
-mfma
)
fma.h
DEPENDS
libc.src.__support.FPUtil.fputil
+ libc.src.__support.FPUtil.fma
COMPILE_OPTIONS
- -O2
+ -O3
-mfma
)
DEPENDS
.common_constants
libc.src.__support.FPUtil.fputil
+ libc.src.__support.FPUtil.polyeval
libc.include.math
COMPILE_OPTIONS
-O3
../exp2f.h
DEPENDS
libc.src.__support.FPUtil.fputil
+ libc.src.__support.FPUtil.polyeval
libc.include.math
COMPILE_OPTIONS
-O3
DEPENDS
.common_constants
libc.src.__support.FPUtil.fputil
+ libc.src.__support.FPUtil.multiply_add
+ libc.src.__support.FPUtil.polyeval
libc.include.math
COMPILE_OPTIONS
-O3
DEPENDS
.common_constants
libc.src.__support.FPUtil.fputil
+ libc.src.__support.FPUtil.multiply_add
+ libc.src.__support.FPUtil.polyeval
COMPILE_OPTIONS
-O3
-mfma
DEPENDS
.common_constants
libc.src.__support.FPUtil.fputil
+ libc.src.__support.FPUtil.multiply_add
+ libc.src.__support.FPUtil.polyeval
COMPILE_OPTIONS
-O3
-mfma
DEPENDS
.common_constants
libc.src.__support.FPUtil.fputil
+ libc.src.__support.FPUtil.polyeval
COMPILE_OPTIONS
-O3
-mfma
DEPENDS
.common_constants
libc.src.__support.FPUtil.fputil
+ libc.src.__support.FPUtil.multiply_add
+ libc.src.__support.FPUtil.polyeval
COMPILE_OPTIONS
-O3
-mfma
// = x otherwise.
// To simplify the rounding decision and make it more efficient, we use
// fma(x, x, x) ~ x + x^2 instead.
- return fputil::fma(x, x, x);
+ return fputil::multiply_add(x, x, x);
}
// 2^-25 <= |x| < 2^-4
fputil::polyeval(xd, 0x1p-1, 0x1.55555555557ddp-3, 0x1.55555555552fap-5,
0x1.111110fcd58b7p-7, 0x1.6c16c1717660bp-10,
0x1.a0241f0006d62p-13, 0x1.a01e3f8d3c06p-16);
- return static_cast<float>(fputil::fma(r, xsq, xd));
+ return static_cast<float>(fputil::multiply_add(r, xsq, xd));
}
// For -18 < x < 89, to compute expm1(x), we perform the following range
double exp_lo =
fputil::polyeval(xd, 0x1.0p0, 0x1.ffffffffff777p-1, 0x1.000000000071cp-1,
0x1.555566668e5e7p-3, 0x1.55555555ef243p-5);
- return static_cast<float>(fputil::fma(exp_hi_mid, exp_lo, -1.0));
+ return static_cast<float>(fputil::multiply_add(exp_hi_mid, exp_lo, -1.0));
}
} // namespace __llvm_libc
double d = static_cast<float>(xbits) - static_cast<float>(f);
d *= ONE_OVER_F[f_index];
- double extra_factor = fputil::fma(m, LOG10_2, LOG10_F[f_index]);
+ double extra_factor = fputil::multiply_add(m, LOG10_2, LOG10_F[f_index]);
double r = fputil::polyeval(d, extra_factor, 0x1.bcb7b1526e4c5p-2,
-0x1.bcb7b1518a5e9p-3, 0x1.287a72a6f716p-3,
double d = static_cast<double>(xbits) - static_cast<double>(f);
d *= ONE_OVER_F[f_index];
- double extra_factor = fputil::fma(m, LOG_2, LOG_F[f_index]);
+ double extra_factor = fputil::multiply_add(m, LOG_2, LOG_F[f_index]);
double r = fputil::polyeval(d, extra_factor, 0x1.fffffffffffacp-1,
-0x1.fffffffef9cb2p-2, 0x1.5555513bc679ap-2,
// > fpminimax(log(1 + x)/x, 5, [|D...|], [-2^-8; 2^-8]);
r = fputil::polyeval(xd, -0x1p-1, 0x1.5555555515551p-2, -0x1.ffffffff82bdap-3,
0x1.999b33348d3aep-3, -0x1.5556cae3adcc3p-3);
- return static_cast<float>(fputil::fma(r, xd * xd, xd));
+ return static_cast<float>(fputil::multiply_add(r, xd * xd, xd));
}
} // namespace __llvm_libc
d *= ONE_OVER_F[f_index];
double extra_factor =
- fputil::fma(static_cast<double>(m), LOG_2, LOG_F[f_index]);
+ fputil::multiply_add(static_cast<double>(m), LOG_2, LOG_F[f_index]);
double r = __llvm_libc::fputil::polyeval(
d, extra_factor, 0x1.fffffffffffacp-1, -0x1.fffffffef9cb2p-2,
],
)
+fma_common_hdrs = [
+ "src/__support/FPUtil/FMA.h",
+ "src/__support/FPUtil/generic/FMA.h",
+]
+
+fma_hdrs = selects.with_or({
+ "//conditions:default": fma_common_hdrs,
+ PLATFORM_CPU_X86_64: fma_common_hdrs + [
+ "src/__support/FPUtil/x86_64/FMA.h",
+ ],
+ PLATFORM_CPU_ARM64: fma_common_hdrs + [
+ "src/__support/FPUtil/aarch64/FMA.h",
+ ],
+})
+
+cc_library(
+ name = "__support_fputil_fma",
+ hdrs = fma_hdrs,
+ deps = [
+ ":__support_common",
+ ":__support_cpp_bit",
+ ":__support_cpp_type_traits",
+ ":__support_fputil",
+ ":libc_root",
+ ],
+)
+
+cc_library(
+ name = "__support_fputil_multiply_add",
+ hdrs = [
+ "src/__support/FPUtil/multiply_add.h",
+ ],
+ deps = [
+ ":__support_common",
+ ":__support_fputil_fma",
+ ],
+)
+
+cc_library(
+ name = "__support_fputil_polyeval",
+ hdrs = [
+ "src/__support/FPUtil/PolyEval.h",
+ ],
+ deps = [
+ ":__support_fputil_multiply_add",
+ ],
+)
+
################################ fenv targets ################################
libc_function(