From: Tue Ly Date: Sun, 8 May 2022 22:27:34 +0000 (-0400) Subject: [libc] Implement double precision FMA for targets without FMA instructions. X-Git-Tag: upstream/15.0.7~5129 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=ee8992770754d186c9217d35cccd339efc55a36b;p=platform%2Fupstream%2Fllvm.git [libc] Implement double precision FMA for targets without FMA instructions. Implement double precision FMA (Fused Multiply-Add) for targets without FMA instructions using __uint128_t to store the intermediate results. Reviewed By: michaelrj, sivachandra Differential Revision: https://reviews.llvm.org/D124495 --- diff --git a/libc/src/__support/FPUtil/FMA.h b/libc/src/__support/FPUtil/FMA.h index 6823dd0..a3c1729 100644 --- a/libc/src/__support/FPUtil/FMA.h +++ b/libc/src/__support/FPUtil/FMA.h @@ -27,11 +27,7 @@ namespace __llvm_libc { namespace fputil { -// We have a generic implementation available only for single precision fma as -// we restrict it to float values for now. -template -static inline cpp::EnableIfType::Value, T> fma(T x, T y, - T z) { +template static inline T fma(T x, T y, T z) { return generic::fma(x, y, z); } diff --git a/libc/src/__support/FPUtil/generic/FMA.h b/libc/src/__support/FPUtil/generic/FMA.h index 78b640c..3dad2ad 100644 --- a/libc/src/__support/FPUtil/generic/FMA.h +++ b/libc/src/__support/FPUtil/generic/FMA.h @@ -9,16 +9,24 @@ #ifndef LLVM_LIBC_SRC_SUPPORT_FPUTIL_GENERIC_FMA_H #define LLVM_LIBC_SRC_SUPPORT_FPUTIL_GENERIC_FMA_H +#include "src/__support/CPP/Bit.h" #include "src/__support/CPP/TypeTraits.h" +#include "src/__support/FPUtil/FEnvImpl.h" #include "src/__support/FPUtil/FPBits.h" +#include "src/__support/FPUtil/FloatProperties.h" +#include "src/__support/FPUtil/builtin_wrappers.h" +#include "src/__support/common.h" namespace __llvm_libc { namespace fputil { namespace generic { -template -static inline cpp::EnableIfType::Value, T> fma(T x, T y, - T z) { +template static inline T fma(T x, T y, T z); + +// TODO(lntue): Implement fmaf that is correctly rounded to all rounding modes. +// The implementation below only is only correct for the default rounding mode, +// round-to-nearest tie-to-even. +template <> inline float fma(float x, float y, float z) { // Product is exact. double prod = static_cast(x) * static_cast(y); double z_d = static_cast(z); @@ -66,6 +74,215 @@ static inline cpp::EnableIfType::Value, T> fma(T x, T y, return static_cast(static_cast(bit_sum)); } +namespace internal { + +// Extract the sticky bits and shift the `mantissa` to the right by +// `shift_length`. +static inline bool shift_mantissa(int shift_length, __uint128_t &mant) { + if (shift_length >= 128) { + mant = 0; + return true; // prod_mant is non-zero. + } + __uint128_t mask = (__uint128_t(1) << shift_length) - 1; + bool sticky_bits = (mant & mask) != 0; + mant >>= shift_length; + return sticky_bits; +} + +} // namespace internal + +template <> inline double fma(double x, double y, double z) { + using FPBits = fputil::FPBits; + using FloatProp = fputil::FloatProperties; + + if (unlikely(x == 0 || y == 0 || z == 0)) { + return x * y + z; + } + + int x_exp = 0; + int y_exp = 0; + int z_exp = 0; + + // Normalize denormal inputs. + if (unlikely(FPBits(x).get_unbiased_exponent() == 0)) { + x_exp -= 52; + x *= 0x1.0p+52; + } + if (unlikely(FPBits(y).get_unbiased_exponent() == 0)) { + y_exp -= 52; + y *= 0x1.0p+52; + } + if (unlikely(FPBits(z).get_unbiased_exponent() == 0)) { + z_exp -= 52; + z *= 0x1.0p+52; + } + + FPBits x_bits(x), y_bits(y), z_bits(z); + bool x_sign = x_bits.get_sign(); + bool y_sign = y_bits.get_sign(); + bool z_sign = z_bits.get_sign(); + bool prod_sign = x_sign != y_sign; + x_exp += x_bits.get_unbiased_exponent(); + y_exp += y_bits.get_unbiased_exponent(); + z_exp += z_bits.get_unbiased_exponent(); + + if (unlikely(x_exp == FPBits::MAX_EXPONENT || y_exp == FPBits::MAX_EXPONENT || + z_exp == FPBits::MAX_EXPONENT)) + return x * y + z; + + // Extract mantissa and append hidden leading bits. + __uint128_t x_mant = x_bits.get_mantissa() | FPBits::MIN_NORMAL; + __uint128_t y_mant = y_bits.get_mantissa() | FPBits::MIN_NORMAL; + __uint128_t z_mant = z_bits.get_mantissa() | FPBits::MIN_NORMAL; + + // If the exponent of the product x*y > the exponent of z, then no extra + // precision beside the entire product x*y is needed. On the other hand, when + // the exponent of z >= the exponent of the product x*y, the worst-case that + // we need extra precision is when there is cancellation and the most + // significant bit of the product is aligned exactly with the second most + // significant bit of z: + // z : 10aa...a + // - prod : 1bb...bb....b + // In that case, in order to store the exact result, we need at least + // (Length of prod) - (MantissaLength of z) = 2*(52 + 1) - 52 = 54. + // Overall, before aligning the mantissas and exponents, we can simply left- + // shift the mantissa of z by at least 54, and left-shift the product of x*y + // by (that amount - 52). After that, it is enough to align the least + // significant bit, given that we keep track of the round and sticky bits + // after the least significant bit. + // We pick shifting z_mant by 64 bits so that technically we can simply use + // the original mantissa as high part when constructing 128-bit z_mant. So the + // mantissa of prod will be left-shifted by 64 - 54 = 10 initially. + + __uint128_t prod_mant = x_mant * y_mant << 10; + int prod_lsb_exp = + x_exp + y_exp - + (FPBits::EXPONENT_BIAS + 2 * MantissaWidth::VALUE + 10); + + z_mant <<= 64; + int z_lsb_exp = z_exp - (MantissaWidth::VALUE + 64); + bool round_bit = false; + bool sticky_bits = false; + bool z_shifted = false; + + // Align exponents. + if (prod_lsb_exp < z_lsb_exp) { + sticky_bits = internal::shift_mantissa(z_lsb_exp - prod_lsb_exp, prod_mant); + prod_lsb_exp = z_lsb_exp; + } else if (z_lsb_exp < prod_lsb_exp) { + z_shifted = true; + sticky_bits = internal::shift_mantissa(prod_lsb_exp - z_lsb_exp, z_mant); + } + + // Perform the addition: + // (-1)^prod_sign * prod_mant + (-1)^z_sign * z_mant. + // The final result will be stored in prod_sign and prod_mant. + if (prod_sign == z_sign) { + // Effectively an addition. + prod_mant += z_mant; + } else { + // Subtraction cases. + if (prod_mant >= z_mant) { + if (z_shifted && sticky_bits) { + // Add 1 more to the subtrahend so that the sticky bits remain + // positive. This would simplify the rounding logic. + ++z_mant; + } + prod_mant -= z_mant; + } else { + if (!z_shifted && sticky_bits) { + // Add 1 more to the subtrahend so that the sticky bits remain + // positive. This would simplify the rounding logic. + ++prod_mant; + } + prod_mant = z_mant - prod_mant; + prod_sign = z_sign; + } + } + + uint64_t result = 0; + int r_exp = 0; // Unbiased exponent of the result + + // Normalize the result. + if (prod_mant != 0) { + uint64_t prod_hi = static_cast(prod_mant >> 64); + int lead_zeros = + prod_hi ? clz(prod_hi) : 64 + clz(static_cast(prod_mant)); + // Move the leading 1 to the most significant bit. + prod_mant <<= lead_zeros; + // The lower 64 bits are always sticky bits after moving the leading 1 to + // the most significant bit. + sticky_bits |= (static_cast(prod_mant) != 0); + result = static_cast(prod_mant >> 64); + // Change prod_lsb_exp the be the exponent of the least significant bit of + // the result. + prod_lsb_exp += 64 - lead_zeros; + r_exp = prod_lsb_exp + 63; + + if (r_exp > 0) { + // The result is normal. We will shift the mantissa to the right by + // 63 - 52 = 11 bits (from the locations of the most significant bit). + // Then the rounding bit will correspond the the 11th bit, and the lowest + // 10 bits are merged into sticky bits. + round_bit = (result & 0x0400ULL) != 0; + sticky_bits |= (result & 0x03ffULL) != 0; + result >>= 11; + } else { + if (r_exp < -52) { + // The result is smaller than 1/2 of the smallest denormal number. + sticky_bits = true; // since the result is non-zero. + result = 0; + } else { + // The result is denormal. + uint64_t mask = 1ULL << (11 - r_exp); + round_bit = (result & mask) != 0; + sticky_bits |= (result & (mask - 1)) != 0; + if (r_exp > -52) + result >>= 12 - r_exp; + else + result = 0; + } + + r_exp = 0; + } + } else { + // Return +0.0 when there is exact cancellation, i.e., x*y == -z exactly. + prod_sign = false; + } + + // Finalize the result. + int round_mode = fputil::get_round(); + if (unlikely(r_exp >= FPBits::MAX_EXPONENT)) { + if ((round_mode == FE_TOWARDZERO) || + (round_mode == FE_UPWARD && prod_sign) || + (round_mode == FE_DOWNWARD && !prod_sign)) { + result = FPBits::MAX_NORMAL; + return prod_sign ? -bit_cast(result) : bit_cast(result); + } + return prod_sign ? static_cast(FPBits::neg_inf()) + : static_cast(FPBits::inf()); + } + + // Remove hidden bit and append the exponent field and sign bit. + result = (result & FloatProp::MANTISSA_MASK) | + (static_cast(r_exp) << FloatProp::MANTISSA_WIDTH); + if (prod_sign) { + result |= FloatProp::SIGN_MASK; + } + + // Rounding. + if (round_mode == FE_TONEAREST) { + if (round_bit && (sticky_bits || ((result & 1) != 0))) + ++result; + } else if ((round_mode == FE_UPWARD && !prod_sign) || + (round_mode == FE_DOWNWARD && prod_sign)) { + if (round_bit || sticky_bits) + ++result; + } + + return bit_cast(result); +} + } // namespace generic } // namespace fputil } // namespace __llvm_libc diff --git a/libc/src/math/CMakeLists.txt b/libc/src/math/CMakeLists.txt index e33022c..60ccfb2 100644 --- a/libc/src/math/CMakeLists.txt +++ b/libc/src/math/CMakeLists.txt @@ -64,8 +64,6 @@ add_entrypoint_object( libc.src.__support.FPUtil.fma COMPILE_OPTIONS -O3 - FLAGS - FMA_OPT__ONLY ) add_math_entrypoint_object(ceil) diff --git a/libc/test/src/math/CMakeLists.txt b/libc/test/src/math/CMakeLists.txt index 68a9aed..ddc02b2 100644 --- a/libc/test/src/math/CMakeLists.txt +++ b/libc/test/src/math/CMakeLists.txt @@ -1151,6 +1151,8 @@ add_fp_unittest( libc.src.__support.FPUtil.fputil ) +# TODO(lntue): The current implementation of fputil::general::fma is only +# correctly rounded for the default rounding mode round-to-nearest tie-to-even. add_fp_unittest( fmaf_test NEED_MPFR @@ -1162,6 +1164,8 @@ add_fp_unittest( libc.include.math libc.src.math.fmaf libc.src.__support.FPUtil.fputil + FLAGS + FMA_OPT__ONLY ) add_fp_unittest( diff --git a/libc/test/src/math/FmaTest.h b/libc/test/src/math/FmaTest.h index 2d04989..959c4e8 100644 --- a/libc/test/src/math/FmaTest.h +++ b/libc/test/src/math/FmaTest.h @@ -61,6 +61,9 @@ public: // Test overflow. T z = T(FPBits(FPBits::MAX_NORMAL)); EXPECT_FP_EQ(func(T(1.75), z, -z), T(0.75) * z); + // Exact cancellation. + EXPECT_FP_EQ(func(T(3.0), T(5.0), -T(15.0)), T(0.0)); + EXPECT_FP_EQ(func(T(-3.0), T(5.0), T(15.0)), T(0.0)); } void test_subnormal_range(Func func) { @@ -72,9 +75,9 @@ public: v += STEP, w -= STEP) { T x = T(FPBits(get_random_bit_pattern())), y = T(FPBits(v)), z = T(FPBits(w)); - T result = func(x, y, z); mpfr::TernaryInput input{x, y, z}; - ASSERT_MPFR_MATCH(mpfr::Operation::Fma, input, result, 0.5); + ASSERT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Fma, input, func(x, y, z), + 0.5); } } @@ -86,9 +89,9 @@ public: v += STEP, w -= STEP) { T x = T(FPBits(v)), y = T(FPBits(w)), z = T(FPBits(get_random_bit_pattern())); - T result = func(x, y, z); mpfr::TernaryInput input{x, y, z}; - ASSERT_MPFR_MATCH(mpfr::Operation::Fma, input, result, 0.5); + ASSERT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Fma, input, func(x, y, z), + 0.5); } } }; diff --git a/libc/test/src/math/fma_test.cpp b/libc/test/src/math/fma_test.cpp index a9388cc..be6b138 100644 --- a/libc/test/src/math/fma_test.cpp +++ b/libc/test/src/math/fma_test.cpp @@ -10,7 +10,271 @@ #include "src/math/fma.h" -using LlvmLibcFmaTest = FmaTestTemplate; +struct Inputs { + double a, b, c; +}; + +struct LlvmLibcFmaTest : public FmaTestTemplate { + void test_more_values() { + constexpr int N = 236; + constexpr Inputs INPUTS[N] = { + {0x1p+0, 0x2p+0, 0x3p+0}, + {0x1.4p+0, 0xcp-4, 0x1p-4}, + {0x0p+0, 0x0p+0, 0x0p+0}, + {0x1p+0, 0x0p+0, 0x0p+0}, + {0x0p+0, 0x1p+0, 0x0p+0}, + {0x1p+0, 0x1p+0, 0x1p+0}, + {0x0p+0, 0x0p+0, 0x1p+0}, + {0x0p+0, 0x0p+0, 0x2p+0}, + {0x0p+0, 0x0p+0, 0xf.fffffp+124}, + {0x0p+0, 0x0p+0, 0xf.ffffffffffff8p+1020}, + {0x0p+0, 0x1p+0, 0x1p+0}, + {0x1p+0, 0x0p+0, 0x1p+0}, + {0x0p+0, 0x1p+0, 0x2p+0}, + {0x1p+0, 0x0p+0, 0x2p+0}, + {0x0p+0, 0x1p+0, 0xf.fffffp+124}, + {0x0p+0, 0x1p+0, 0xf.ffffffffffff8p+1020}, + {0x1p+0, 0x0p+0, 0xf.fffffp+124}, + {0x1p+0, 0x0p+0, 0xf.ffffffffffff8p+1020}, + {0x4p-128, 0x4p-128, 0x0p+0}, + {0x4p-128, 0x4p-1024, 0x0p+0}, + {0x4p-128, 0x8p-972, 0x0p+0}, + {0x4p-1024, 0x4p-128, 0x0p+0}, + {0x4p-1024, 0x4p-1024, 0x0p+0}, + {0x4p-1024, 0x8p-972, 0x0p+0}, + {0x8p-972, 0x4p-128, 0x0p+0}, + {0x8p-972, 0x4p-1024, 0x0p+0}, + {0x8p-972, 0x8p-972, 0x0p+0}, + {0x4p-128, 0x4p-128, 0x0p+0}, + {0x4p-128, 0x4p-1024, 0x0p+0}, + {0x4p-128, 0x8p-972, 0x0p+0}, + {0x4p-1024, 0x4p-128, 0x0p+0}, + {0x4p-1024, 0x4p-1024, 0x0p+0}, + {0x4p-1024, 0x8p-972, 0x0p+0}, + {0x8p-972, 0x4p-128, 0x0p+0}, + {0x8p-972, 0x4p-1024, 0x0p+0}, + {0x8p-972, 0x8p-972, 0x0p+0}, + {0x4p-128, 0x4p-128, 0x0p+0}, + {0x4p-128, 0x4p-1024, 0x0p+0}, + {0x4p-128, 0x8p-972, 0x0p+0}, + {0x4p-1024, 0x4p-128, 0x0p+0}, + {0x4p-1024, 0x4p-1024, 0x0p+0}, + {0x4p-1024, 0x8p-972, 0x0p+0}, + {0x8p-972, 0x4p-128, 0x0p+0}, + {0x8p-972, 0x4p-1024, 0x0p+0}, + {0x8p-972, 0x8p-972, 0x0p+0}, + {0x4p-128, 0x4p-128, 0x0p+0}, + {0x4p-128, 0x4p-1024, 0x0p+0}, + {0x4p-128, 0x8p-972, 0x0p+0}, + {0x4p-1024, 0x4p-128, 0x0p+0}, + {0x4p-1024, 0x4p-1024, 0x0p+0}, + {0x4p-1024, 0x8p-972, 0x0p+0}, + {0x8p-972, 0x4p-128, 0x0p+0}, + {0x8p-972, 0x4p-1024, 0x0p+0}, + {0x8p-972, 0x8p-972, 0x0p+0}, + {0x4p-128, 0x4p-128, 0x0p+0}, + {0x4p-128, 0x4p-1024, 0x0p+0}, + {0x4p-128, 0x8p-972, 0x0p+0}, + {0x4p-1024, 0x4p-128, 0x0p+0}, + {0x4p-1024, 0x4p-1024, 0x0p+0}, + {0x4p-1024, 0x8p-972, 0x0p+0}, + {0x8p-972, 0x4p-128, 0x0p+0}, + {0x8p-972, 0x4p-1024, 0x0p+0}, + {0x8p-972, 0x8p-972, 0x0p+0}, + {0x4p-128, 0x4p-128, 0x0p+0}, + {0x4p-128, 0x4p-1024, 0x0p+0}, + {0x4p-128, 0x8p-972, 0x0p+0}, + {0x4p-1024, 0x4p-128, 0x0p+0}, + {0x4p-1024, 0x4p-1024, 0x0p+0}, + {0x4p-1024, 0x8p-972, 0x0p+0}, + {0x8p-972, 0x4p-128, 0x0p+0}, + {0x8p-972, 0x4p-1024, 0x0p+0}, + {0x8p-972, 0x8p-972, 0x0p+0}, + {0x4p-128, 0x4p-128, 0x0p+0}, + {0x4p-128, 0x4p-1024, 0x0p+0}, + {0x4p-128, 0x8p-972, 0x0p+0}, + {0x4p-1024, 0x4p-128, 0x0p+0}, + {0x4p-1024, 0x4p-1024, 0x0p+0}, + {0x4p-1024, 0x8p-972, 0x0p+0}, + {0x8p-972, 0x4p-128, 0x0p+0}, + {0x8p-972, 0x4p-1024, 0x0p+0}, + {0x8p-972, 0x8p-972, 0x0p+0}, + {0x4p-128, 0x4p-128, 0x0p+0}, + {0x4p-128, 0x4p-1024, 0x0p+0}, + {0x4p-128, 0x8p-972, 0x0p+0}, + {0x4p-1024, 0x4p-128, 0x0p+0}, + {0x4p-1024, 0x4p-1024, 0x0p+0}, + {0x4p-1024, 0x8p-972, 0x0p+0}, + {0x8p-972, 0x4p-128, 0x0p+0}, + {0x8p-972, 0x4p-1024, 0x0p+0}, + {0x8p-972, 0x8p-972, 0x0p+0}, + {0xf.fffffp+124, 0xf.fffffp+124, 0x4p-128}, + {0xf.fffffp+124, 0xf.fffffp+124, 0x4p-1024}, + {0xf.fffffp+124, 0xf.fffffp+124, 0x8p-972}, + {0xf.fffffp+124, 0xf.ffffffffffff8p+1020, 0x4p-128}, + {0xf.fffffp+124, 0xf.ffffffffffff8p+1020, 0x4p-1024}, + {0xf.fffffp+124, 0xf.ffffffffffff8p+1020, 0x8p-972}, + {0xf.ffffffffffff8p+1020, 0xf.fffffp+124, 0x4p-128}, + {0xf.ffffffffffff8p+1020, 0xf.fffffp+124, 0x4p-1024}, + {0xf.ffffffffffff8p+1020, 0xf.fffffp+124, 0x8p-972}, + {0xf.ffffffffffff8p+1020, 0xf.ffffffffffff8p+1020, 0x4p-128}, + {0xf.ffffffffffff8p+1020, 0xf.ffffffffffff8p+1020, 0x4p-1024}, + {0xf.ffffffffffff8p+1020, 0xf.ffffffffffff8p+1020, 0x8p-972}, + {0xf.fffffp+124, 0xf.fffffp+124, 0x4p-128}, + {0xf.fffffp+124, 0xf.fffffp+124, 0x4p-1024}, + {0xf.fffffp+124, 0xf.fffffp+124, 0x8p-972}, + {0xf.fffffp+124, 0xf.ffffffffffff8p+1020, 0x4p-128}, + {0xf.fffffp+124, 0xf.ffffffffffff8p+1020, 0x4p-1024}, + {0xf.fffffp+124, 0xf.ffffffffffff8p+1020, 0x8p-972}, + {0xf.ffffffffffff8p+1020, 0xf.fffffp+124, 0x4p-128}, + {0xf.ffffffffffff8p+1020, 0xf.fffffp+124, 0x4p-1024}, + {0xf.ffffffffffff8p+1020, 0xf.fffffp+124, 0x8p-972}, + {0xf.ffffffffffff8p+1020, 0xf.ffffffffffff8p+1020, 0x4p-128}, + {0xf.ffffffffffff8p+1020, 0xf.ffffffffffff8p+1020, 0x4p-1024}, + {0xf.ffffffffffff8p+1020, 0xf.ffffffffffff8p+1020, 0x8p-972}, + {0xf.fffffp+124, 0xf.fffffp+124, 0x4p-128}, + {0xf.fffffp+124, 0xf.fffffp+124, 0x4p-1024}, + {0xf.fffffp+124, 0xf.fffffp+124, 0x8p-972}, + {0xf.fffffp+124, 0xf.ffffffffffff8p+1020, 0x4p-128}, + {0xf.fffffp+124, 0xf.ffffffffffff8p+1020, 0x4p-1024}, + {0xf.fffffp+124, 0xf.ffffffffffff8p+1020, 0x8p-972}, + {0xf.ffffffffffff8p+1020, 0xf.fffffp+124, 0x4p-128}, + {0xf.ffffffffffff8p+1020, 0xf.fffffp+124, 0x4p-1024}, + {0xf.ffffffffffff8p+1020, 0xf.fffffp+124, 0x8p-972}, + {0xf.ffffffffffff8p+1020, 0xf.ffffffffffff8p+1020, 0x4p-128}, + {0xf.ffffffffffff8p+1020, 0xf.ffffffffffff8p+1020, 0x4p-1024}, + {0xf.ffffffffffff8p+1020, 0xf.ffffffffffff8p+1020, 0x8p-972}, + {0xf.fffffp+124, 0xf.fffffp+124, 0x4p-128}, + {0xf.fffffp+124, 0xf.fffffp+124, 0x4p-1024}, + {0xf.fffffp+124, 0xf.fffffp+124, 0x8p-972}, + {0xf.fffffp+124, 0xf.ffffffffffff8p+1020, 0x4p-128}, + {0xf.fffffp+124, 0xf.ffffffffffff8p+1020, 0x4p-1024}, + {0xf.fffffp+124, 0xf.ffffffffffff8p+1020, 0x8p-972}, + {0xf.ffffffffffff8p+1020, 0xf.fffffp+124, 0x4p-128}, + {0xf.ffffffffffff8p+1020, 0xf.fffffp+124, 0x4p-1024}, + {0xf.ffffffffffff8p+1020, 0xf.fffffp+124, 0x8p-972}, + {0xf.ffffffffffff8p+1020, 0xf.ffffffffffff8p+1020, 0x4p-128}, + {0xf.ffffffffffff8p+1020, 0xf.ffffffffffff8p+1020, 0x4p-1024}, + {0xf.ffffffffffff8p+1020, 0xf.ffffffffffff8p+1020, 0x8p-972}, + {0xf.fffffp+124, 0xf.fffffp+124, 0x4p-128}, + {0xf.fffffp+124, 0xf.fffffp+124, 0x4p-1024}, + {0xf.fffffp+124, 0xf.fffffp+124, 0x8p-972}, + {0xf.fffffp+124, 0xf.ffffffffffff8p+1020, 0x4p-128}, + {0xf.fffffp+124, 0xf.ffffffffffff8p+1020, 0x4p-1024}, + {0xf.fffffp+124, 0xf.ffffffffffff8p+1020, 0x8p-972}, + {0xf.ffffffffffff8p+1020, 0xf.fffffp+124, 0x4p-128}, + {0xf.ffffffffffff8p+1020, 0xf.fffffp+124, 0x4p-1024}, + {0xf.ffffffffffff8p+1020, 0xf.fffffp+124, 0x8p-972}, + {0xf.ffffffffffff8p+1020, 0xf.ffffffffffff8p+1020, 0x4p-128}, + {0xf.ffffffffffff8p+1020, 0xf.ffffffffffff8p+1020, 0x4p-1024}, + {0xf.ffffffffffff8p+1020, 0xf.ffffffffffff8p+1020, 0x8p-972}, + {0xf.fffffp+124, 0xf.fffffp+124, 0x4p-128}, + {0xf.fffffp+124, 0xf.fffffp+124, 0x4p-1024}, + {0xf.fffffp+124, 0xf.fffffp+124, 0x8p-972}, + {0xf.fffffp+124, 0xf.ffffffffffff8p+1020, 0x4p-128}, + {0xf.fffffp+124, 0xf.ffffffffffff8p+1020, 0x4p-1024}, + {0xf.fffffp+124, 0xf.ffffffffffff8p+1020, 0x8p-972}, + {0xf.ffffffffffff8p+1020, 0xf.fffffp+124, 0x4p-128}, + {0xf.ffffffffffff8p+1020, 0xf.fffffp+124, 0x4p-1024}, + {0xf.ffffffffffff8p+1020, 0xf.fffffp+124, 0x8p-972}, + {0xf.ffffffffffff8p+1020, 0xf.ffffffffffff8p+1020, 0x4p-128}, + {0xf.ffffffffffff8p+1020, 0xf.ffffffffffff8p+1020, 0x4p-1024}, + {0xf.ffffffffffff8p+1020, 0xf.ffffffffffff8p+1020, 0x8p-972}, + {0xf.fffffp+124, 0xf.fffffp+124, 0x4p-128}, + {0xf.fffffp+124, 0xf.fffffp+124, 0x4p-1024}, + {0xf.fffffp+124, 0xf.fffffp+124, 0x8p-972}, + {0xf.fffffp+124, 0xf.ffffffffffff8p+1020, 0x4p-128}, + {0xf.fffffp+124, 0xf.ffffffffffff8p+1020, 0x4p-1024}, + {0xf.fffffp+124, 0xf.ffffffffffff8p+1020, 0x8p-972}, + {0xf.ffffffffffff8p+1020, 0xf.fffffp+124, 0x4p-128}, + {0xf.ffffffffffff8p+1020, 0xf.fffffp+124, 0x4p-1024}, + {0xf.ffffffffffff8p+1020, 0xf.fffffp+124, 0x8p-972}, + {0xf.ffffffffffff8p+1020, 0xf.ffffffffffff8p+1020, 0x4p-128}, + {0xf.ffffffffffff8p+1020, 0xf.ffffffffffff8p+1020, 0x4p-1024}, + {0xf.ffffffffffff8p+1020, 0xf.ffffffffffff8p+1020, 0x8p-972}, + {0xf.fffffp+124, 0xf.fffffp+124, 0x4p-128}, + {0xf.fffffp+124, 0xf.fffffp+124, 0x4p-1024}, + {0xf.fffffp+124, 0xf.fffffp+124, 0x8p-972}, + {0xf.fffffp+124, 0xf.ffffffffffff8p+1020, 0x4p-128}, + {0xf.fffffp+124, 0xf.ffffffffffff8p+1020, 0x4p-1024}, + {0xf.fffffp+124, 0xf.ffffffffffff8p+1020, 0x8p-972}, + {0xf.ffffffffffff8p+1020, 0xf.fffffp+124, 0x4p-128}, + {0xf.ffffffffffff8p+1020, 0xf.fffffp+124, 0x4p-1024}, + {0xf.ffffffffffff8p+1020, 0xf.fffffp+124, 0x8p-972}, + {0xf.ffffffffffff8p+1020, 0xf.ffffffffffff8p+1020, 0x4p-128}, + {0xf.ffffffffffff8p+1020, 0xf.ffffffffffff8p+1020, 0x4p-1024}, + {0xf.ffffffffffff8p+1020, 0xf.ffffffffffff8p+1020, 0x8p-972}, + {0x2.fffp+12, 0x1.000002p+0, 0x1.ffffp-24}, + {0x1.fffp+0, 0x1.00001p+0, 0x1.fffp+0}, + {0xc.d5e6fp+124, 0x2.6af378p-128, 0x1.f08948p+0}, + {0x1.9abcdep+100, 0x2.6af378p-128, 0x3.e1129p-28}, + {0xf.fffffp+124, 0x1.001p+0, 0xf.fffffp+124}, + {0xf.fffffp+124, 0x1.fffffep+0, 0xf.fffffp+124}, + {0xf.fffffp+124, 0x2p+0, 0xf.fffffp+124}, + {0x5p-128, 0x8.00002p-4, 0x1p-128}, + {0x7.ffffep-128, 0x8.00001p-4, 0x8p-152}, + {0x8p-152, 0x8p-4, 0x3.fffff8p-128}, + {0x8p-152, 0x8.8p-4, 0x3.fffff8p-128}, + {0x8p-152, 0x8p-152, 0x8p+124}, + {0x8p-152, 0x8p-152, 0x4p-128}, + {0x8p-152, 0x8p-152, 0x3.fffff8p-128}, + {0x8p-152, 0x8p-152, 0x8p-152}, + {0xf.ffp-4, 0xf.ffp-4, 0xf.fep-4}, + {0x4.000008p-128, 0x4.000008p-28, 0x8p+124}, + {0x4.000008p-128, 0x4.000008p-28, 0x8p+100}, + {0x2.fep+12, 0x1.0000000000001p+0, 0x1.ffep-48}, + {0x1.fffp+0, 0x1.0000000000001p+0, 0x1.fffp+0}, + {0x1.0000002p+0, 0xf.fffffep-4, 0x1p-300}, + {0xe.f56df7797f768p+1020, 0x3.7ab6fbbcbfbb4p-1024, + 0x3.40bf1803497f6p+0}, + {0x1.deadbeef2feedp+900, 0x3.7ab6fbbcbfbb4p-1024, + 0x6.817e300692fecp-124}, + {0xf.ffffffffffff8p+1020, 0x1.001p+0, 0xf.ffffffffffff8p+1020}, + {0xf.ffffffffffff8p+1020, 0x1.fffffffffffffp+0, + 0xf.ffffffffffff8p+1020}, + {0xf.ffffffffffff8p+1020, 0x2p+0, 0xf.ffffffffffff8p+1020}, + {0x5.a827999fcef3p-540, 0x5.a827999fcef3p-540, 0x0p+0}, + {0x3.bd5b7dde5fddap-496, 0x3.bd5b7dde5fddap-496, 0xd.fc352bc352bap-992}, + {0x3.bd5b7dde5fddap-504, 0x3.bd5b7dde5fddap-504, + 0xd.fc352bc352bap-1008}, + {0x8p-540, 0x4p-540, 0x4p-1076}, + {0x1.7fffff8p-968, 0x4p-108, 0x4p-1048}, + {0x2.8000008p-968, 0x4p-108, 0x4p-1048}, + {0x2.8p-968, 0x4p-108, 0x4p-1048}, + {0x2.33956cdae7c2ep-960, 0x3.8e211518bfea2p-108, + 0x2.02c2b59766d9p-1024}, + {0x3.a5d5dadd1d3a6p-980, 0x2.9c0cd8c5593bap-64, 0x2.49179ac00d15p-1024}, + {0x2.2a7aca1773e0cp-908, 0x9.6809186a42038p-128, 0x2.c9e356b3f0fp-1024}, + {0x3.ffffffffffffep-712, 0x3.ffffffffffffep-276, + 0x3.fffffc0000ffep-984}, + {0x5p-1024, 0x8.000000000001p-4, 0x1p-1024}, + {0x7.ffffffffffffp-1024, 0x8.0000000000008p-4, 0x4p-1076}, + {0x4p-1076, 0x8p-4, 0x3.ffffffffffffcp-1024}, + {0x4p-1076, 0x8.8p-4, 0x3.ffffffffffffcp-1024}, + {0x4p-1076, 0x4p-1076, 0x8p+1020}, + {0x4p-1076, 0x4p-1076, 0x4p-1024}, + {0x4p-1076, 0x4p-1076, 0x3.ffffffffffffcp-1024}, + {0x4p-1076, 0x4p-1076, 0x4p-1076}, + {0xf.ffffffffffff8p-4, 0xf.ffffffffffff8p-4, 0xf.ffffffffffffp-4}, + {0x4.0000000000004p-1024, 0x2.0000000000002p-56, 0x8p+1020}, + {0x4.0000000000004p-1024, 0x2.0000000000002p-56, 0x4p+968}, + {0x7.fffff8p-128, 0x3.fffffcp+24, 0xf.fffffp+124}, + {0x7.ffffffffffffcp-1024, 0x7.ffffffffffffcp+52, + 0xf.ffffffffffff8p+1020}, + }; + + for (int i = 0; i < N; ++i) { + for (int signs = 0; signs < 7; ++signs) { + double a = (signs & 4) ? -INPUTS[i].a : INPUTS[i].a; + double b = (signs & 2) ? -INPUTS[i].b : INPUTS[i].b; + double c = (signs & 1) ? -INPUTS[i].c : INPUTS[i].c; + mpfr::TernaryInput input{a, b, c}; + ASSERT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Fma, input, + __llvm_libc::fma(a, b, c), 0.5); + } + } + } +}; TEST_F(LlvmLibcFmaTest, SpecialNumbers) { test_special_numbers(&__llvm_libc::fma); @@ -21,3 +285,5 @@ TEST_F(LlvmLibcFmaTest, SubnormalRange) { } TEST_F(LlvmLibcFmaTest, NormalRange) { test_normal_range(&__llvm_libc::fma); } + +TEST_F(LlvmLibcFmaTest, ExtraValues) { test_more_values(); } diff --git a/libc/test/src/math/fmaf_test.cpp b/libc/test/src/math/fmaf_test.cpp index 73d0094..8d1cf25 100644 --- a/libc/test/src/math/fmaf_test.cpp +++ b/libc/test/src/math/fmaf_test.cpp @@ -10,14 +10,14 @@ #include "src/math/fmaf.h" -using LlvmLibcFmaTest = FmaTestTemplate; +using LlvmLibcFmafTest = FmaTestTemplate; -TEST_F(LlvmLibcFmaTest, SpecialNumbers) { +TEST_F(LlvmLibcFmafTest, SpecialNumbers) { test_special_numbers(&__llvm_libc::fmaf); } -TEST_F(LlvmLibcFmaTest, SubnormalRange) { +TEST_F(LlvmLibcFmafTest, SubnormalRange) { test_subnormal_range(&__llvm_libc::fmaf); } -TEST_F(LlvmLibcFmaTest, NormalRange) { test_normal_range(&__llvm_libc::fmaf); } +TEST_F(LlvmLibcFmafTest, NormalRange) { test_normal_range(&__llvm_libc::fmaf); }