if ((x & 1) != 0) {
return 0; // fast path for odd values
} else {
- return BitPopulationCount(x ^ (x - 1)) - !!x;
+ return BitPopulationCount(static_cast<UINT>(x ^ (x - 1))) - !!x;
}
}
}
--- /dev/null
+// Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef FORTRAN_COMMON_UNSIGNED_CONST_DIVISION_H_
+#define FORTRAN_COMMON_UNSIGNED_CONST_DIVISION_H_
+
+// Work around unoptimized implementations of unsigned integer division
+// by constant values in some compilers (looking at YOU, clang 7!) by
+// explicitly implementing integer division by constant divisors as
+// multiplication by a fixed-point reciprocal and a right shift.
+
+#include "bit-population-count.h"
+#include "leading-zero-bit-count.h"
+#include <cinttypes>
+#include <type_traits>
+
+namespace Fortran::common {
+
+template<typename UINT> class FixedPointReciprocal {
+public:
+ using type = UINT;
+
+private:
+ static_assert(std::is_unsigned_v<type>);
+ static const int bits{static_cast<int>(8 * sizeof(type))};
+ static_assert(bits <= 64);
+ using Big = std::conditional_t<(bits <= 32), std::uint64_t, __uint128_t>;
+
+public:
+ static constexpr FixedPointReciprocal For(type n) {
+ if (n == 0) {
+ return {0, 0};
+ } else if ((n & (n - 1)) == 0) { // n is a power of two
+ return {TrailingZeroBitCount(n), 1};
+ } else {
+ int shift{bits - 1 + BitsNeededFor(n)};
+ return {shift, static_cast<type>(((Big{1} << shift) + n - 1) / n)};
+ }
+ }
+
+ constexpr type Divide(type n) const {
+ return (static_cast<Big>(reciprocal_) * n) >> shift_;
+ }
+
+private:
+ constexpr FixedPointReciprocal(int s, type r) : shift_{s}, reciprocal_{r} {}
+
+ int shift_;
+ type reciprocal_;
+};
+
+static_assert(FixedPointReciprocal<std::uint32_t>::For(5).Divide(2000000000u) ==
+ 400000000u);
+static_assert(FixedPointReciprocal<std::uint64_t>::For(10).Divide(
+ 10000000000000000u) == 1000000000000000u);
+
+template<typename UINT, UINT DENOM>
+inline constexpr UINT DivideUnsignedBy(UINT n) {
+ if constexpr (!std::is_same_v<UINT, __uint128_t>) {
+ return FixedPointReciprocal<UINT>::For(DENOM).Divide(n);
+ } else {
+ return n / DENOM;
+ }
+}
+}
+#endif
#include "binary-floating-point.h"
#include "decimal.h"
-#include "int-divide-workaround.h"
#include "../common/bit-population-count.h"
#include "../common/leading-zero-bit-count.h"
+#include "../common/unsigned-const-division.h"
#include <cinttypes>
#include <limits>
#include <type_traits>
std::is_same_v<UINT, __uint128_t> || std::is_unsigned_v<UINT>);
SetToZero();
while (n != 0) {
- auto q{FastDivision<UINT, 10>(n)};
+ auto q{common::DivideUnsignedBy<UINT, 10>(n)};
if (n != 10 * q) {
break;
}
return 0;
} else {
while (n != 0 && digits_ < digitLimit_) {
- auto q{FastDivision<UINT, radix>(n)};
+ auto q{common::DivideUnsignedBy<UINT, radix>(n)};
digit_[digits_++] = n - radix * q;
n = q;
}
Digit remainder{0};
for (int j{digits_ - 1}; j >= 0; --j) {
// N.B. Because DIVISOR is a constant, these operations should be cheap.
- Digit q{FastDivision<Digit, DIVISOR>(digit_[j])};
+ Digit q{common::DivideUnsignedBy<Digit, DIVISOR>(digit_[j])};
Digit nrem{digit_[j] - DIVISOR * q};
digit_[j] = q + (radix / DIVISOR) * remainder;
remainder = nrem;
template<int N> int MultiplyByHelper(int carry = 0) {
for (int j{0}; j < digits_; ++j) {
auto v{N * digit_[j] + carry};
- carry = FastDivision<Digit, radix>(v);
+ carry = common::DivideUnsignedBy<Digit, radix>(v);
digit_[j] = v - carry * radix; // i.e., v % radix
}
return carry;
// Treat the MSD specially: don't emit leading zeroes.
Digit dig{digit_[digits_ - 1]};
for (int k{0}; k < LOG10RADIX; k += 2) {
- Digit d{FastDivision<Digit, hundredth>(dig)};
+ Digit d{common::DivideUnsignedBy<Digit, hundredth>(dig)};
dig = 100 * (dig - d * hundredth);
const char *q{lut + 2 * d};
if (q[0] != '0' || p > start) {
for (int j{digits_ - 1}; j-- > 0;) {
Digit dig{digit_[j]};
for (int k{0}; k < log10Radix; k += 2) {
- Digit d{FastDivision<Digit, hundredth>(dig)};
+ Digit d{common::DivideUnsignedBy<Digit, hundredth>(dig)};
dig = 100 * (dig - d * hundredth);
const char *q = lut + 2 * d;
*p++ = q[0];
Digit least{less.digit_[offset]};
Digit my{digit_[0]};
while (true) {
- Digit q{FastDivision<Digit, 10>(my)};
+ Digit q{common::DivideUnsignedBy<Digit, 10>(my)};
Digit r{my - 10 * q};
- Digit lq{FastDivision<Digit, 10>(least)};
+ Digit lq{common::DivideUnsignedBy<Digit, 10>(least)};
Digit lr{least - 10 * lq};
if (r != 0 && lq == q) {
Digit sub{(r - lr) >> 1};
+++ /dev/null
-// Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef INT_DIVIDE_H_
-#define INT_DIVIDE_H_
-
-// Work around unoptimized implementations of unsigned integer division
-// by constant values in some compilers (looking at YOU, clang 7!)
-
-#ifdef __clang__
-#if __clang_major__ < 8
-#define USE_INT_DIVIDE_WORKAROUNDS 1
-#endif
-#endif
-
-#include <cinttypes>
-
-namespace Fortran::decimal {
-
-template<typename UINT, UINT DENOM> inline constexpr UINT FastDivision(UINT n) {
- return n / DENOM;
-}
-
-#if USE_INT_DIVIDE_WORKAROUNDS
-template<>
-inline constexpr std::uint64_t FastDivision<std::uint64_t, 10000000000000000u>(
- std::uint64_t n) {
- cast<__uint128_t{0x39a5652fb1137857} * n) >> (64 + 51);
-}
-
-template<>
-inline constexpr std::uint64_t FastDivision<std::uint64_t, 100000000000000u>(
- std::uint64_t n) {
- return (__uint128_t{0xb424dc35095cd81} * n) >> (64 + 42);
-}
-
-template<>
-inline constexpr std::uint32_t FastDivision<std::uint32_t, 1000000u>(
- std::uint32_t n) {
- return (std::uint64_t{0x431bde83} * n) >> (32 + 18);
-}
-
-template<>
-inline constexpr std::uint32_t FastDivision<std::uint32_t, 10000u>(
- std::uint32_t n) {
- return (std::uint64_t{0xd1b71759} * n) >> (32 + 13);
-}
-
-template<>
-inline constexpr std::uint64_t FastDivision<std::uint64_t, 10u>(
- std::uint64_t n) {
- return (__uint128_t{0xcccccccccccccccd} * n) >> (64 + 3);
-}
-
-template<>
-inline constexpr std::uint32_t FastDivision<std::uint32_t, 10u>(
- std::uint32_t n) {
- return (std::uint64_t{0xcccccccd} * n) >> (32 + 3);
-}
-
-template<>
-inline constexpr std::uint64_t FastDivision<std::uint64_t, 5u>(
- std::uint64_t n) {
- return (__uint128_t{0xcccccccccccccccd} * n) >> (64 + 2);
-}
-
-template<>
-inline constexpr std::uint32_t FastDivision<std::uint32_t, 5u>(
- std::uint32_t n) {
- return (std::uint64_t{0xcccccccd} * n) >> (32 + 2);
-}
-#endif
-
-static_assert(
- FastDivision<std::uint64_t, 10000000000000000u>(9999999999999999u) == 0);
-static_assert(
- FastDivision<std::uint64_t, 10000000000000000u>(10000000000000000u) == 1);
-static_assert(
- FastDivision<std::uint64_t, 100000000000000u>(99999999999999u) == 0);
-static_assert(
- FastDivision<std::uint64_t, 100000000000000u>(100000000000000u) == 1);
-static_assert(FastDivision<std::uint32_t, 1000000u>(999999u) == 0);
-static_assert(FastDivision<std::uint32_t, 1000000u>(1000000u) == 1);
-static_assert(FastDivision<std::uint64_t, 10>(18446744073709551615u) ==
- 1844674407370955161u);
-static_assert(FastDivision<std::uint32_t, 10>(4294967295u) == 429496729u);
-static_assert(FastDivision<std::uint64_t, 5>(18446744073709551615u) ==
- 3689348814741910323u);
-static_assert(FastDivision<std::uint32_t, 5>(4294967295u) == 858993459u);
-}
-#endif
#include <cstring>
#include <iostream>
-static constexpr int incr{1}; // steps through all values
-static constexpr bool doNegative{true};
-static constexpr bool doMinimize{true};
+static constexpr int incr{10}; // steps through all values
+static constexpr bool doNegative{}; // true};
+static constexpr bool doMinimize{}; // true};
using namespace Fortran::decimal;