From 6168b422258e41087a5393b7b77a7349a8922a68 Mon Sep 17 00:00:00 2001 From: Tue Ly Date: Thu, 24 Mar 2022 18:07:46 -0400 Subject: [PATCH] [libc] Improve the performance of expf. Reduce the polynomial's degree from 7 down to 4. Currently we use a degree-7 minimax polynomial on an interval of length 2^-7 around 0 to compute `expf`. Based on the suggestion of @santoshn and the RLIBM project (https://github.com/rutgers-apl/rlibm-all/blob/main/source/float/exp.c) and the improvement we made with `exp2f` in https://reviews.llvm.org/D122346, it is possible to have a good polynomial of degree-4 on a subinterval of length 2^(-7) to approximate e^x. We did try to either reduce the degree of the polynomial down to 3 or increase the interval size to 2^(-6), but in both cases the number of exceptional values exploded. So we settle with using a degree-4 polynomial of the interval of size 2^(-7) around 0. Reviewed By: sivachandra, zimmermann6, santoshn Differential Revision: https://reviews.llvm.org/D122418 --- libc/src/math/generic/expf.cpp | 91 ++++++++++++++++------------- libc/test/src/math/exhaustive/expf_test.cpp | 4 +- libc/test/src/math/expf_test.cpp | 5 ++ 3 files changed, 57 insertions(+), 43 deletions(-) diff --git a/libc/src/math/generic/expf.cpp b/libc/src/math/generic/expf.cpp index 270e22d..4f27e73 100644 --- a/libc/src/math/generic/expf.cpp +++ b/libc/src/math/generic/expf.cpp @@ -24,35 +24,48 @@ LLVM_LIBC_FUNCTION(float, expf, (float x)) { using FPBits = typename fputil::FPBits; FPBits xbits(x); - // When x < log(2^-150) or nan - if (unlikely(xbits.uintval() >= 0xc2cf'f1b5U)) { - // exp(-Inf) = 0 - if (xbits.is_inf()) - return 0.0f; - // exp(nan) = nan - if (xbits.is_nan()) - return x; - if (fputil::get_round() == FE_UPWARD) - return static_cast(FPBits(FPBits::MIN_SUBNORMAL)); - errno = ERANGE; - return 0.0f; + uint32_t x_u = xbits.uintval(); + uint32_t x_abs = x_u & 0x7fff'ffffU; + + // Exceptional values + if (unlikely(x_u == 0xc236'bd8cU)) { // x = -0x1.6d7b18p+5f + return 0x1.108a58p-66f - x * 0x1.0p-95f; } - // x >= 89 or nan - if (unlikely(!xbits.get_sign() && (xbits.uintval() >= 0x42b2'0000))) { - if (xbits.uintval() < 0x7f80'0000U) { - int rounding = fputil::get_round(); - if (rounding == FE_DOWNWARD || rounding == FE_TOWARDZERO) - return static_cast(FPBits(FPBits::MAX_NORMAL)); + // When |x| >= 89, |x| < 2^-25, or x is nan + if (unlikely(x_abs >= 0x42b2'0000U || x_abs <= 0x3280'0000U)) { + // |x| < 2^-25 + if (xbits.get_unbiased_exponent() <= 101) { + return 1.0f + x; + } + + // When x < log(2^-150) or nan + if (xbits.uintval() >= 0xc2cf'f1b5U) { + // exp(-Inf) = 0 + if (xbits.is_inf()) + return 0.0f; + // exp(nan) = nan + if (xbits.is_nan()) + return x; + if (fputil::get_round() == FE_UPWARD) + return static_cast(FPBits(FPBits::MIN_SUBNORMAL)); errno = ERANGE; + return 0.0f; } - return x + static_cast(FPBits::inf()); - } - // |x| < 2^-25 - if (unlikely(xbits.get_unbiased_exponent() <= 101)) { - return 1.0f + x; - } + // x >= 89 or nan + if (!xbits.get_sign() && (xbits.uintval() >= 0x42b2'0000)) { + // x is finite + if (xbits.uintval() < 0x7f80'0000U) { + int rounding = fputil::get_round(); + if (rounding == FE_DOWNWARD || rounding == FE_TOWARDZERO) + return static_cast(FPBits(FPBits::MAX_NORMAL)); + errno = ERANGE; + } + // x is +inf or nan + return x + static_cast(FPBits::inf()); + } + } // For -104 < x < 89, to compute exp(x), we perform the following range // reduction: find hi, mid, lo such that: // x = hi + mid + lo, in which @@ -64,36 +77,30 @@ LLVM_LIBC_FUNCTION(float, expf, (float x)) { // Then, // exp(x) = exp(hi + mid + lo) = exp(hi) * exp(mid) * exp(lo). // We store exp(hi) and exp(mid) in the lookup tables EXP_M1 and EXP_M2 - // respectively. exp(lo) is computed using a degree-7 minimax polynomial + // respectively. exp(lo) is computed using a degree-4 minimax polynomial // generated by Sollya. - // x_hi = hi + mid. - int x_hi = static_cast(x * 0x1.0p7f); + // x_hi = (hi + mid) * 2^7 = round(x * 2^7). + // The default rounding mode for float-to-int conversion in C++ is + // round-toward-zero. To make it round-to-nearest, we add (-1)^sign(x) * 0.5 + // before conversion. + int x_hi = static_cast(x * 0x1.0p7f + (xbits.get_sign() ? -0.5f : 0.5f)); // Subtract (hi + mid) from x to get lo. x -= static_cast(x_hi) * 0x1.0p-7f; double xd = static_cast(x); - // Make sure that -2^(-8) <= lo < 2^-8. - if (x >= 0x1.0p-8f) { - ++x_hi; - xd -= 0x1.0p-7; - } - if (x < -0x1.0p-8f) { - --x_hi; - xd += 0x1.0p-7; - } x_hi += 104 << 7; // hi = x_hi >> 7 double exp_hi = EXP_M1[x_hi >> 7]; - // lo = x_hi & 0x0000'007fU; + // mid * 2^7 = x_hi & 0x0000'007fU; double exp_mid = EXP_M2[x_hi & 0x7f]; - // Degree-7 minimax polynomial generated by Sollya with the following + // Degree-4 minimax polynomial generated by Sollya with the following // commands: // > display = hexadecimal; - // > Q = fpminimax(expm1(x)/x, 6, [|D...|], [-2^-8, 2^-8]); + // > Q = fpminimax(expm1(x)/x, 3, [|D...|], [-2^-8, 2^-8]); // > Q; - double exp_lo = fputil::polyeval( - xd, 0x1p0, 0x1p0, 0x1p-1, 0x1.5555555555555p-3, 0x1.55555555553ap-5, - 0x1.1111111204dfcp-7, 0x1.6c16cb2da593ap-10, 0x1.9ff1648996d2ep-13); + double exp_lo = + fputil::polyeval(xd, 0x1p0, 0x1.ffffffffff777p-1, 0x1.000000000071cp-1, + 0x1.555566668e5e7p-3, 0x1.55555555ef243p-5); return static_cast(exp_hi * exp_mid * exp_lo); } diff --git a/libc/test/src/math/exhaustive/expf_test.cpp b/libc/test/src/math/exhaustive/expf_test.cpp index e5d3924..4546ac3 100644 --- a/libc/test/src/math/exhaustive/expf_test.cpp +++ b/libc/test/src/math/exhaustive/expf_test.cpp @@ -12,6 +12,8 @@ #include "utils/MPFRWrapper/MPFRUtils.h" #include "utils/UnitTest/FPMatcher.h" +#include + using FPBits = __llvm_libc::fputil::FPBits; namespace mpfr = __llvm_libc::testing::mpfr; @@ -32,7 +34,7 @@ struct LlvmLibcExpfExhaustiveTest : public LlvmLibcExhaustiveTest { } }; -static constexpr int NUM_THREADS = 16; +static const int NUM_THREADS = std::thread::hardware_concurrency(); // Range: [0, 89]; static constexpr uint32_t POS_START = 0x0000'0000U; diff --git a/libc/test/src/math/expf_test.cpp b/libc/test/src/math/expf_test.cpp index 3b8bbe2..05f267e 100644 --- a/libc/test/src/math/expf_test.cpp +++ b/libc/test/src/math/expf_test.cpp @@ -92,6 +92,11 @@ TEST(LlvmLibcExpfTest, Borderline) { ASSERT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Exp, x, __llvm_libc::expf(x), 0.5); EXPECT_MATH_ERRNO(0); + + x = float(FPBits(0xc236bd8cU)); + EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Exp, x, __llvm_libc::expf(x), + 0.5); + EXPECT_MATH_ERRNO(0); } TEST(LlvmLibcExpfTest, InFloatRange) { -- 2.7.4