From: peter klausler Date: Tue, 20 Aug 2019 23:38:48 +0000 (-0700) Subject: [flang] Better unsigned division by constants X-Git-Tag: llvmorg-12-init~9537^2~710 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=28d55be08bd66ca52d3df1867fa3f5fb289da4f2;p=platform%2Fupstream%2Fllvm.git [flang] Better unsigned division by constants Original-commit: flang-compiler/f18@1b35c24a8aa37f06b7d5ce8b1af636bf15ac979b Reviewed-on: https://github.com/flang-compiler/f18/pull/671 Tree-same-pre-rewrite: false --- diff --git a/flang/lib/common/bit-population-count.h b/flang/lib/common/bit-population-count.h index d7a254f..1a22138 100644 --- a/flang/lib/common/bit-population-count.h +++ b/flang/lib/common/bit-population-count.h @@ -86,7 +86,7 @@ template inline constexpr int TrailingZeroBitCount(UINT x) { if ((x & 1) != 0) { return 0; // fast path for odd values } else { - return BitPopulationCount(x ^ (x - 1)) - !!x; + return BitPopulationCount(static_cast(x ^ (x - 1))) - !!x; } } } diff --git a/flang/lib/common/unsigned-const-division.h b/flang/lib/common/unsigned-const-division.h new file mode 100644 index 0000000..31d288c --- /dev/null +++ b/flang/lib/common/unsigned-const-division.h @@ -0,0 +1,77 @@ +// Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef FORTRAN_COMMON_UNSIGNED_CONST_DIVISION_H_ +#define FORTRAN_COMMON_UNSIGNED_CONST_DIVISION_H_ + +// Work around unoptimized implementations of unsigned integer division +// by constant values in some compilers (looking at YOU, clang 7!) by +// explicitly implementing integer division by constant divisors as +// multiplication by a fixed-point reciprocal and a right shift. + +#include "bit-population-count.h" +#include "leading-zero-bit-count.h" +#include +#include + +namespace Fortran::common { + +template class FixedPointReciprocal { +public: + using type = UINT; + +private: + static_assert(std::is_unsigned_v); + static const int bits{static_cast(8 * sizeof(type))}; + static_assert(bits <= 64); + using Big = std::conditional_t<(bits <= 32), std::uint64_t, __uint128_t>; + +public: + static constexpr FixedPointReciprocal For(type n) { + if (n == 0) { + return {0, 0}; + } else if ((n & (n - 1)) == 0) { // n is a power of two + return {TrailingZeroBitCount(n), 1}; + } else { + int shift{bits - 1 + BitsNeededFor(n)}; + return {shift, static_cast(((Big{1} << shift) + n - 1) / n)}; + } + } + + constexpr type Divide(type n) const { + return (static_cast(reciprocal_) * n) >> shift_; + } + +private: + constexpr FixedPointReciprocal(int s, type r) : shift_{s}, reciprocal_{r} {} + + int shift_; + type reciprocal_; +}; + +static_assert(FixedPointReciprocal::For(5).Divide(2000000000u) == + 400000000u); +static_assert(FixedPointReciprocal::For(10).Divide( + 10000000000000000u) == 1000000000000000u); + +template +inline constexpr UINT DivideUnsignedBy(UINT n) { + if constexpr (!std::is_same_v) { + return FixedPointReciprocal::For(DENOM).Divide(n); + } else { + return n / DENOM; + } +} +} +#endif diff --git a/flang/lib/decimal/big-radix-floating-point.h b/flang/lib/decimal/big-radix-floating-point.h index 6884964..4d64039 100644 --- a/flang/lib/decimal/big-radix-floating-point.h +++ b/flang/lib/decimal/big-radix-floating-point.h @@ -27,9 +27,9 @@ #include "binary-floating-point.h" #include "decimal.h" -#include "int-divide-workaround.h" #include "../common/bit-population-count.h" #include "../common/leading-zero-bit-count.h" +#include "../common/unsigned-const-division.h" #include #include #include @@ -134,7 +134,7 @@ private: std::is_same_v || std::is_unsigned_v); SetToZero(); while (n != 0) { - auto q{FastDivision(n)}; + auto q{common::DivideUnsignedBy(n)}; if (n != 10 * q) { break; } @@ -148,7 +148,7 @@ private: return 0; } else { while (n != 0 && digits_ < digitLimit_) { - auto q{FastDivision(n)}; + auto q{common::DivideUnsignedBy(n)}; digit_[digits_++] = n - radix * q; n = q; } @@ -196,7 +196,7 @@ private: Digit remainder{0}; for (int j{digits_ - 1}; j >= 0; --j) { // N.B. Because DIVISOR is a constant, these operations should be cheap. - Digit q{FastDivision(digit_[j])}; + Digit q{common::DivideUnsignedBy(digit_[j])}; Digit nrem{digit_[j] - DIVISOR * q}; digit_[j] = q + (radix / DIVISOR) * remainder; remainder = nrem; @@ -246,7 +246,7 @@ private: template int MultiplyByHelper(int carry = 0) { for (int j{0}; j < digits_; ++j) { auto v{N * digit_[j] + carry}; - carry = FastDivision(v); + carry = common::DivideUnsignedBy(v); digit_[j] = v - carry * radix; // i.e., v % radix } return carry; diff --git a/flang/lib/decimal/binary-to-decimal.cc b/flang/lib/decimal/binary-to-decimal.cc index cec8ba2..4e9909b6d 100644 --- a/flang/lib/decimal/binary-to-decimal.cc +++ b/flang/lib/decimal/binary-to-decimal.cc @@ -139,7 +139,7 @@ BigRadixFloatingPointNumber::ConvertToDecimal(char *buffer, // Treat the MSD specially: don't emit leading zeroes. Digit dig{digit_[digits_ - 1]}; for (int k{0}; k < LOG10RADIX; k += 2) { - Digit d{FastDivision(dig)}; + Digit d{common::DivideUnsignedBy(dig)}; dig = 100 * (dig - d * hundredth); const char *q{lut + 2 * d}; if (q[0] != '0' || p > start) { @@ -152,7 +152,7 @@ BigRadixFloatingPointNumber::ConvertToDecimal(char *buffer, for (int j{digits_ - 1}; j-- > 0;) { Digit dig{digit_[j]}; for (int k{0}; k < log10Radix; k += 2) { - Digit d{FastDivision(dig)}; + Digit d{common::DivideUnsignedBy(dig)}; dig = 100 * (dig - d * hundredth); const char *q = lut + 2 * d; *p++ = q[0]; @@ -276,9 +276,9 @@ void BigRadixFloatingPointNumber::Minimize( Digit least{less.digit_[offset]}; Digit my{digit_[0]}; while (true) { - Digit q{FastDivision(my)}; + Digit q{common::DivideUnsignedBy(my)}; Digit r{my - 10 * q}; - Digit lq{FastDivision(least)}; + Digit lq{common::DivideUnsignedBy(least)}; Digit lr{least - 10 * lq}; if (r != 0 && lq == q) { Digit sub{(r - lr) >> 1}; diff --git a/flang/lib/decimal/int-divide-workaround.h b/flang/lib/decimal/int-divide-workaround.h deleted file mode 100644 index 21d2cb6..0000000 --- a/flang/lib/decimal/int-divide-workaround.h +++ /dev/null @@ -1,102 +0,0 @@ -// Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef INT_DIVIDE_H_ -#define INT_DIVIDE_H_ - -// Work around unoptimized implementations of unsigned integer division -// by constant values in some compilers (looking at YOU, clang 7!) - -#ifdef __clang__ -#if __clang_major__ < 8 -#define USE_INT_DIVIDE_WORKAROUNDS 1 -#endif -#endif - -#include - -namespace Fortran::decimal { - -template inline constexpr UINT FastDivision(UINT n) { - return n / DENOM; -} - -#if USE_INT_DIVIDE_WORKAROUNDS -template<> -inline constexpr std::uint64_t FastDivision( - std::uint64_t n) { - cast<__uint128_t{0x39a5652fb1137857} * n) >> (64 + 51); -} - -template<> -inline constexpr std::uint64_t FastDivision( - std::uint64_t n) { - return (__uint128_t{0xb424dc35095cd81} * n) >> (64 + 42); -} - -template<> -inline constexpr std::uint32_t FastDivision( - std::uint32_t n) { - return (std::uint64_t{0x431bde83} * n) >> (32 + 18); -} - -template<> -inline constexpr std::uint32_t FastDivision( - std::uint32_t n) { - return (std::uint64_t{0xd1b71759} * n) >> (32 + 13); -} - -template<> -inline constexpr std::uint64_t FastDivision( - std::uint64_t n) { - return (__uint128_t{0xcccccccccccccccd} * n) >> (64 + 3); -} - -template<> -inline constexpr std::uint32_t FastDivision( - std::uint32_t n) { - return (std::uint64_t{0xcccccccd} * n) >> (32 + 3); -} - -template<> -inline constexpr std::uint64_t FastDivision( - std::uint64_t n) { - return (__uint128_t{0xcccccccccccccccd} * n) >> (64 + 2); -} - -template<> -inline constexpr std::uint32_t FastDivision( - std::uint32_t n) { - return (std::uint64_t{0xcccccccd} * n) >> (32 + 2); -} -#endif - -static_assert( - FastDivision(9999999999999999u) == 0); -static_assert( - FastDivision(10000000000000000u) == 1); -static_assert( - FastDivision(99999999999999u) == 0); -static_assert( - FastDivision(100000000000000u) == 1); -static_assert(FastDivision(999999u) == 0); -static_assert(FastDivision(1000000u) == 1); -static_assert(FastDivision(18446744073709551615u) == - 1844674407370955161u); -static_assert(FastDivision(4294967295u) == 429496729u); -static_assert(FastDivision(18446744073709551615u) == - 3689348814741910323u); -static_assert(FastDivision(4294967295u) == 858993459u); -} -#endif diff --git a/flang/test/decimal/thorough-test.cc b/flang/test/decimal/thorough-test.cc index 5d4644b..17d4d1f 100644 --- a/flang/test/decimal/thorough-test.cc +++ b/flang/test/decimal/thorough-test.cc @@ -18,9 +18,9 @@ #include #include -static constexpr int incr{1}; // steps through all values -static constexpr bool doNegative{true}; -static constexpr bool doMinimize{true}; +static constexpr int incr{10}; // steps through all values +static constexpr bool doNegative{}; // true}; +static constexpr bool doMinimize{}; // true}; using namespace Fortran::decimal;