#include "binary-floating-point.h"
#include "decimal.h"
+#include "int-divide-workaround.h"
#include "../common/bit-population-count.h"
#include "../common/leading-zero-bit-count.h"
#include <cinttypes>
std::is_same_v<UINT, __uint128_t> || std::is_unsigned_v<UINT>);
SetToZero();
while (n != 0) {
- auto q{n / 10};
+ auto q{FastDivision<UINT, 10>(n)};
if (n != 10 * q) {
break;
}
return 0;
} else {
while (n != 0 && digits_ < digitLimit_) {
- auto q{n / radix};
+ auto q{FastDivision<UINT, radix>(n)};
digit_[digits_++] = n - radix * q;
n = q;
}
Digit remainder{0};
for (int j{digits_ - 1}; j >= 0; --j) {
// N.B. Because DIVISOR is a constant, these operations should be cheap.
- Digit q{digit_[j] / DIVISOR};
+ Digit q{FastDivision<Digit, DIVISOR>(digit_[j])};
Digit nrem{digit_[j] - DIVISOR * q};
digit_[j] = q + (radix / DIVISOR) * remainder;
remainder = nrem;
template<int N> int MultiplyByHelper(int carry = 0) {
for (int j{0}; j < digits_; ++j) {
- digit_[j] = N * digit_[j] + carry;
- carry = digit_[j] / radix; // N.B. radix is constant, this is fast
- digit_[j] -= carry * radix;
+ auto v{N * digit_[j] + carry};
+ carry = FastDivision<Digit, radix>(v);
+ digit_[j] = v - carry * radix; // i.e., v % radix
}
return carry;
}
// Treat the MSD specially: don't emit leading zeroes.
Digit dig{digit_[digits_ - 1]};
for (int k{0}; k < LOG10RADIX; k += 2) {
- Digit d{dig / hundredth};
+ Digit d{FastDivision<Digit, hundredth>(dig)};
dig = 100 * (dig - d * hundredth);
const char *q{lut + 2 * d};
if (q[0] != '0' || p > start) {
for (int j{digits_ - 1}; j-- > 0;) {
Digit dig{digit_[j]};
for (int k{0}; k < log10Radix; k += 2) {
- Digit d{dig / hundredth};
+ Digit d{FastDivision<Digit, hundredth>(dig)};
dig = 100 * (dig - d * hundredth);
const char *q = lut + 2 * d;
*p++ = q[0];
Digit least{less.digit_[offset]};
Digit my{digit_[0]};
while (true) {
- Digit q{my / 10};
+ Digit q{FastDivision<Digit, 10>(my)};
Digit r{my - 10 * q};
- Digit lq{least / 10};
+ Digit lq{FastDivision<Digit, 10>(least)};
Digit lr{least - 10 * lq};
if (r != 0 && lq == q) {
Digit sub{(r - lr) >> 1};
if (q == start || (q == start + 1 && *start == '.')) {
return false; // require at least one digit
}
- auto times{radix};
const char *d{q};
+ // Strip off trailing zeroes
if (point != nullptr) {
while (d > firstDigit && d[-1] == '0') {
--d;
}
}
if (d == firstDigit) {
- exponent_ = 0;
+ exponent_ = 0; // all zeros
}
if (point != nullptr) {
exponent_ -= static_cast<int>(d - point - 1);
}
+ // Trim any excess digits
const char *limit{firstDigit + maxDigits * log10Radix + (point != nullptr)};
if (d > limit) {
inexact = true;
}
}
}
- while (d-- > firstDigit) {
+ // Rack the decimal digits up into big Digits.
+ for (auto times{radix}; d-- > firstDigit;) {
if (*d != '.') {
if (times == radix) {
digit_[digits_++] = *d - '0';
// large power of ten. Its radix point is defined to be to the right of its
// digits, and "exponent_" is the power of ten by which it is to be scaled.
Normalize();
- if (digits_ == 0) {
+ if (digits_ == 0) { // zero value
if (isNegative_) {
using Raw = typename Binary::RawType;
Raw negZero{static_cast<Raw>(1) << (Binary::bits - 1)};
}
}
// The value is not zero.
+ IntermediateFloat<PREC> f;
+#if 0 // actually a small loss
+ // Make the value odd.
+ if (int trailing0s{common::TrailingZeroBitCount(digit_[0])}) {
+ f.AdjustExponent(trailing0s);
+ for (; trailing0s > log10Radix; trailing0s -= log10Radix) {
+ DivideByPowerOfTwo(log10Radix);
+ }
+ DivideByPowerOfTwo(trailing0s);
+ }
+#endif
// Shift our perspective on the radix (& decimal) point so that
// it sits to the *left* of the digits.
exponent_ += digits_ * log10Radix;
// Apply any negative decimal exponent by multiplication
// by a power of two, adjusting the binary exponent to compensate.
- IntermediateFloat<PREC> f;
while (exponent_ < log10Radix) {
f.AdjustExponent(-9);
digitLimit_ = digits_;
--- /dev/null
+// Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef INT_DIVIDE_H_
+#define INT_DIVIDE_H_
+
+// Work around unoptimized implementations of unsigned integer division
+// by constant values in some compilers (looking at YOU, clang 7!)
+
+#ifdef __clang__
+#if __clang_major__ < 8
+#define USE_INT_DIVIDE_WORKAROUNDS 1
+#endif
+#endif
+
+#include <cinttypes>
+
+namespace Fortran::decimal {
+
+template<typename UINT, UINT DENOM> inline constexpr UINT FastDivision(UINT n) {
+ return n / DENOM;
+}
+
+#if USE_INT_DIVIDE_WORKAROUNDS
+template<> inline constexpr std::uint64_t FastDivision<std::uint64_t, 10000000000000000u>(std::uint64_t n) {
+ return (static_cast<__uint128_t>(0x39a5652fb1137857) * n) >> (64 + 51);
+}
+
+template<> inline constexpr std::uint64_t FastDivision<std::uint64_t, 100000000000000u>(std::uint64_t n) {
+ return (static_cast<__uint128_t>(0xb424dc35095cd81) * n) >> (64 + 42);
+}
+
+template<> inline constexpr std::uint32_t FastDivision<std::uint32_t, 1000000u>(std::uint32_t n) {
+ return (static_cast<std::uint64_t>(0x431bde83) * n) >> (32 + 18);
+}
+
+template<> inline constexpr std::uint32_t FastDivision<std::uint32_t, 10000u>(std::uint32_t n) {
+ return (static_cast<std::uint64_t>(0xd1b71759) * n) >> (32 + 13);
+}
+
+template<> inline constexpr std::uint64_t FastDivision<std::uint64_t, 10u>(std::uint64_t n) {
+ return (static_cast<__uint128_t>(0xcccccccccccccccd) * n) >> (64 + 3);
+}
+
+template<> inline constexpr std::uint32_t FastDivision<std::uint32_t, 10u>(std::uint32_t n) {
+ return (static_cast<std::uint64_t>(0xcccccccd) * n) >> (32 + 3);
+}
+
+template<> inline constexpr std::uint64_t FastDivision<std::uint64_t, 5u>(std::uint64_t n) {
+ return (static_cast<__uint128_t>(0xcccccccccccccccd) * n) >> (64 + 2);
+}
+
+template<> inline constexpr std::uint32_t FastDivision<std::uint32_t, 5u>(std::uint32_t n) {
+ return (static_cast<std::uint64_t>(0xcccccccd) * n) >> (32 + 2);
+}
+#endif
+
+static_assert(FastDivision<std::uint64_t, 10000000000000000u>(9999999999999999u) == 0);
+static_assert(FastDivision<std::uint64_t, 10000000000000000u>(10000000000000000u) == 1);
+static_assert(FastDivision<std::uint64_t, 100000000000000u>(99999999999999u) == 0);
+static_assert(FastDivision<std::uint64_t, 100000000000000u>(100000000000000u) == 1);
+static_assert(FastDivision<std::uint32_t, 1000000u>(999999u) == 0);
+static_assert(FastDivision<std::uint32_t, 1000000u>(1000000u) == 1);
+static_assert(FastDivision<std::uint64_t, 10>(18446744073709551615u) == 1844674407370955161u);
+static_assert(FastDivision<std::uint32_t, 10>(4294967295u) == 429496729u);
+static_assert(FastDivision<std::uint64_t, 5>(18446744073709551615u) == 3689348814741910323u);
+static_assert(FastDivision<std::uint32_t, 5>(4294967295u) == 858993459u);
+}
+#endif