// "Parity is for farmers." -- Seymour R. Cray
-template<typename UINT> inline constexpr int TrailingZeroCount(UINT x) {
+template<typename UINT> inline constexpr int TrailingZeroBitCount(UINT x) {
return BitPopulationCount(x ^ (x - 1)) - !!x;
}
}
if (bits_ == 0) {
return std::nullopt;
} else {
- return {TrailingZeroCount(bits_)};
+ return {TrailingZeroBitCount(bits_)};
}
}
namespace Fortran::evaluate::value {
-static constexpr std::ostream *debug{nullptr};
-
-template<typename REAL>
-std::ostream &Decimal<REAL>::Dump(std::ostream &o) const {
+template<typename REAL, int LOG10RADIX>
+std::ostream &Decimal<REAL, LOG10RADIX>::Dump(std::ostream &o) const {
if (isNegative_) {
o << '-';
}
return o << " e" << exponent_ << '\n';
}
-template<typename REAL> void Decimal<REAL>::FromReal(const REAL &x) {
+template<typename REAL, int LOG10RADIX>
+auto Decimal<REAL, LOG10RADIX>::FromReal(const REAL &x) -> Decimal & {
if (x.IsNegative()) {
FromReal(x.Negate());
isNegative_ = true;
- return;
- }
- if (debug) {
- *debug << "FromReal(" << x.DumpHexadecimal() << ") bits " << Real::bits
- << '\n';
+ return *this;
}
if (x.IsZero()) {
- return;
+ return SetToZero();
}
int twoPow{x.UnbiasedExponent()};
twoPow -= Real::bits - 1;
- if (debug) {
- *debug << "initial twoPow " << twoPow << '\n';
+ if (!Real::implicitMSB) {
+ ++twoPow;
}
int lshift{x.exponentBits};
if (twoPow <= -lshift) {
lshift += twoPow;
twoPow = 0;
}
- if (debug) {
- *debug << "second twoPow " << twoPow << ", lshift " << lshift << '\n';
- }
using Word = typename Real::Word;
Word word{Word::ConvertUnsigned(x.GetFraction()).value};
SetTo(word.SHIFTL(lshift));
- if (debug) {
- Dump(*debug);
- }
for (; twoPow > 0 && IsDivisibleBy<5>(); --twoPow) {
DivideBy<5>();
Normalize();
++exponent_;
- if (debug) {
- Dump(*debug << "/5 ");
- }
}
// Scale by factors of 8, then by 2.
static constexpr int fastForward{1 << log2FastForward};
for (; twoPow >= log2FastForward; twoPow -= log2FastForward) {
MultiplyBy<fastForward>();
- if (debug) {
- Dump(*debug << '*' << fastForward << ' ');
- }
}
for (; twoPow > 0; --twoPow) {
MultiplyBy<2>();
- if (debug) {
- Dump(*debug << "*2 ");
- }
}
for (; twoPow <= -log2FastForward && IsDivisibleBy<fastForward>();
twoPow += log2FastForward) {
DivideBy<fastForward>();
Normalize();
- if (debug) {
- Dump(*debug << '/' << fastForward << ' ');
- }
}
for (; twoPow < 0 && IsDivisibleBy<2>(); ++twoPow) {
DivideBy<2>();
Normalize();
- if (debug) {
- Dump(*debug << "/2 ");
- }
}
for (; twoPow < 0; ++twoPow) {
MultiplyBy<5>();
--exponent_;
- if (debug) {
- Dump(*debug << "*5 ");
- }
}
+ return *this;
}
-// Represents an unrounded binary floating-point
-// value with an unbiased (signed) binary exponent.
+// Local utility class: represents an unrounded binary
+// floating-point value with an unbiased (i.e., signed)
+// binary exponent.
template<typename REAL> class IntermediateFloat {
public:
using Real = REAL;
using Word = typename Real::Word;
- void SetTo(std::uint64_t n) {
- if constexpr (Word::bits >= 8 * sizeof n) {
+ template<typename UINT> void SetTo(UINT n) {
+ static constexpr int nBits{CHAR_BIT * sizeof n};
+ if constexpr (Word::bits >= nBits) {
word_ = n;
} else {
- int shift{64 - LeadingZeroBitCount(n) - Word::bits};
+ int shift{nBits - LeadingZeroBitCount(n) - Word::bits};
if (shift <= 0) {
word_ = n;
} else {
word_ = n >> shift;
exponent_ += shift;
- bool sticky{n << (64 - shift) != 0};
+ bool sticky{n << (nBits - shift) != 0};
if (sticky) {
word_ = word_.IOR(Word{1});
}
void AdjustExponent(int by) { exponent_ += by; }
ValueWithRealFlags<Real> ToReal(
- bool isNegative = false, Rounding rounding = Rounding::TiesToEven) const;
+ bool isNegative = false, Rounding rounding = defaultRounding) const;
private:
Word word_{0};
return o << "0x" << word_.Hexadecimal() << " *2**" << exponent_;
}
+template<typename REAL> REAL MakePowerOfTwo(int exponent) {
+ auto raw{typename REAL::Word{exponent}.SHIFTL(REAL::significandBits)};
+ if (!REAL::implicitMSB) {
+ raw = raw.IBSET(REAL::significandBits - 1);
+ }
+ return REAL{raw};
+}
+
template<typename REAL>
ValueWithRealFlags<REAL> IntermediateFloat<REAL>::ToReal(
bool isNegative, Rounding rounding) const {
Word sticky{word_.IAND(Word{1})};
shifted.word_ = word_.SHIFTR(1).IOR(sticky);
shifted.exponent_ = exponent_ + 1;
- if (debug) {
- shifted.Dump(*debug << "IntermediateFloat::ToReal: shifted: ") << '\n';
- }
return shifted.ToReal(isNegative, rounding);
}
ValueWithRealFlags<Real> result;
} else {
result = Real::FromInteger(word_, rounding);
}
- if (debug) {
- *debug << "IntermediateFloat::ToReal: after FromInteger: "
- << result.value.DumpHexadecimal() << " * 2**" << exponent_ << '\n';
- }
int expo{exponent_};
while (expo + Real::exponentBias < 1) {
- Real twoPow{Word{1}.SHIFTL(Real::significandBits)}; // min normal value
+ Real twoPow{MakePowerOfTwo<Real>(1)}; // min normal value
result.value = result.value.Multiply(twoPow).AccumulateFlags(result.flags);
expo += Real::exponentBias - 1;
- if (debug) {
- *debug << "IntermediateFloat::ToReal: reduced: "
- << result.value.DumpHexadecimal() << " * 2**" << expo << '\n';
- }
}
while (expo + Real::exponentBias >= Real::maxExponent) {
- Real twoPow{Word{Real::maxExponent - 1}.SHIFTL(Real::significandBits)};
+ Real twoPow{MakePowerOfTwo<Real>(Real::maxExponent - 1)};
result.value = result.value.Multiply(twoPow).AccumulateFlags(result.flags);
expo += Real::maxExponent - 1 - Real::exponentBias;
- if (debug) {
- *debug << "IntermediateFloat::ToReal: magnified: "
- << result.value.DumpHexadecimal() << " * 2**" << expo << '\n';
- }
- }
- Real twoPow{Word{expo + Real::exponentBias}.SHIFTL(Real::significandBits)};
- if (debug) {
- *debug << "IntermediateFloat::ToReal: twoPow: " << twoPow.DumpHexadecimal()
- << '\n';
}
+ Real twoPow{MakePowerOfTwo<Real>(expo + Real::exponentBias)};
result.value = result.value.Multiply(twoPow).AccumulateFlags(result.flags);
return result;
}
-template<typename REAL>
-ValueWithRealFlags<REAL> Decimal<REAL>::ToReal(
+template<typename REAL, int LOG10RADIX>
+ValueWithRealFlags<REAL> Decimal<REAL, LOG10RADIX>::ToReal(
const char *&p, Rounding rounding) {
- if (debug) {
- *debug << "ToReal('" << p << "')\n";
- }
while (*p == ' ') {
++p;
}
SetToZero();
- digitLimit_ = maxDigits;
+ digitLimit_ = maxDigits - 1;
isNegative_ = *p == '-';
if (*p == '-' || *p == '+') {
++p;
--exponent_;
}
}
- if (debug) Dump(*debug << "ToReal in loop, p at '" << p << "'\n'");
}
switch (*p) {
}
}
- if (debug) {
- Dump(*debug << "ToReal start, p at '" << p << "'\n");
- }
if (IsZero()) {
ValueWithRealFlags<Real> result;
if (isNegative_) {
return result;
}
- // At this point, *this holds a multi-precision base-quintillion
- // integer with its radix point to the right of its digits,
- // and "exponent_" is the power of ten by which it is to be scaled.
+ // At this point, *this holds a multi-precision integer value in a radix
+ // of a large power of ten. Its radix point is defined to be to the right
+ // of its digits, and "exponent_" is the power of ten by which it is to
+ // be scaled.
IntermediateFloat<Real> f;
// Avoid needless rounding by scaling the value down by a multiple of two
// to make it odd.
- while (digits_ > 0 && (digit_[0] & 1) == 0) {
+ Normalize();
+ while (digits_ > 1 && (digit_[0] & 1) == 0) {
f.AdjustExponent(1);
DivideBy<2>();
}
- Normalize();
- if (debug) {
- Dump(f.Dump(*debug << "made odd ") << '\n');
+ if (digits_ == 1) {
+ int shift{common::TrailingZeroBitCount(digit_[0])};
+ f.AdjustExponent(shift);
+ digit_[0] >>= shift;
}
+ Normalize();
if (exponent_ < 0) {
// If the number were to be represented in decimal and scaled,
// there would be decimal digits to the right of the decimal point.
- // Align that decimal exponent to be a multiple of log10(quintillion)
- // so that the base-quintillion digits can be viewed as having an
- // effective radix point that's meaningful.
- int align{-exponent_ % log10Quintillion};
+ // Align that decimal exponent to be a multiple of log10(radix) so
+ // that the digits can be viewed as having an effective radix point.
+ int align{-exponent_ % log10Radix};
if (align > 0) {
- for (; align < log10Quintillion; ++align) {
+ digitLimit_ = maxDigits;
+ for (; align < log10Radix; ++align) {
--exponent_;
- MultiplyBy<5>();
f.AdjustExponent(1);
- }
- if (debug) {
- Dump(f.Dump(*debug << "aligned ") << '\n');
+ int carry{MultiplyBy<5>()};
+ CHECK(carry == 0);
}
}
}
// Transfer the integer part, if any, to the floating-point
// result. The most significant digit can be moved directly;
// lesser-order digits require transfer of carries.
- if (exponent_ >= -(digits_ - 1) * log10Quintillion) {
- if (debug) {
- Dump(f.Dump(*debug << "converting integer part ") << '\n');
- }
+ if (exponent_ >= -(digits_ - 1) * log10Radix) {
f.SetTo(digit_[--digits_]);
- if (debug) {
- Dump(f.Dump(*debug << "after top digit ") << '\n');
- }
- while (exponent_ > -digits_ * log10Quintillion) {
+ while (exponent_ > -digits_ * log10Radix) {
digitLimit_ = digits_;
int carry{MultiplyBy<10>()};
f.MultiplyAndAdd(10, carry);
--exponent_;
- if (debug) {
- Dump(f.Dump(*debug << "foor of loop after carry " << carry << ": ")
- << '\n');
- }
}
}
// in higher order positions as carries take place.
// Once exponent_ is zero, the carries will then be
// appended to the floating-point result.
- exponent_ += digits_ * log10Quintillion;
- if (debug) {
- Dump(f.Dump(*debug << "after converting integer part ") << '\n');
- }
+ exponent_ += digits_ * log10Radix;
// Convert the remaining fraction into bits of the
// resulting floating-point value until we run out of
// room.
while (!f.IsFull() && !IsZero()) {
- if (debug) {
- Dump(f.Dump(*debug << "step ") << '\n');
- }
f.AdjustExponent(-1);
digitLimit_ = digits_;
std::uint32_t carry = MultiplyBy<2>();
RemoveLeastOrderZeroDigits();
if (carry != 0) {
if (exponent_ < 0) {
- exponent_ += log10Quintillion;
+ exponent_ += log10Radix;
digit_[digits_++] = carry;
carry = 0;
}
}
f.MultiplyAndAdd(2, carry);
}
- if (debug) {
- Dump(f.Dump(*debug << "after converting fraction ") << '\n');
- }
return f.ToReal(isNegative_, rounding);
}
-template<typename REAL>
-std::string Decimal<REAL>::ToString(int maxDigits) const {
+template<typename REAL, int LOG10RADIX>
+std::string Decimal<REAL, LOG10RADIX>::ToString(int maxDigits) const {
std::string result;
if (isNegative_) {
result += '-';
std::string d{std::to_string(digit_[digits_ - 1])};
for (int j{digits_ - 2}; j >= 0; --j) {
auto part{std::to_string(digit_[j])};
- unsigned zeroes = log10Quintillion - part.size();
+ unsigned zeroes = log10Radix - part.size();
d += std::string(zeroes, '0');
d += part;
}
return result;
}
+template<typename REAL, int LOG10RADIX>
+std::string Decimal<REAL, LOG10RADIX>::ToMinimalString(
+ const Real &x, Rounding rounding) const {
+ for (int digits{1};; ++digits) {
+ std::string result{ToString(digits)};
+ const char *p{result.data()};
+ ValueWithRealFlags<Real> readBack{Decimal{}.ToReal(p, rounding)};
+ if (x.Compare(readBack.value) == Relation::Equal) {
+ return result;
+ }
+ }
+}
+
template class Decimal<Real<Integer<16>, 11>>;
template class Decimal<Real<Integer<32>, 24>>;
template class Decimal<Real<Integer<64>, 53>>;
#include "common.h"
#include "integer.h"
+#include "leading-zero-bit-count.h"
#include "real.h"
#include <cinttypes>
#include <limits>
// This is a helper class for use in floating-point conversions
// to and from decimal representations. It holds a multiple-precision
-// integer value using digits in base 10**18 (one quintillion). This
-// radix is the largest power of ten such that 10 times that value will
-// fit in an unsigned 64-bit binary integer. It is accompanied by a
-// signed exponent that denotes multiplication by a power of ten.
+// integer value using digits in radix that is a large power of ten.
+// (A radix of 10**18 (one quintillion) is the largest that could be used
+// because this radix is the largest power of ten such that 10 times that
+// value will fit in an unsigned 64-bit binary integer; a radix of 10**8,
+// however, runs faster since unsigned 32-bit division by a constant can be
+// performed cheaply.) The digits are accompanied by a signed exponent
+// that denotes multiplication by a power of ten.
//
// The operations supported by this class are limited to those required
// for conversions between binary and decimal representations; it is not
namespace Fortran::evaluate::value {
-template<typename REAL> class Decimal {
+static constexpr std::uint64_t TenToThe(int power) {
+ return power <= 0 ? 1 : 10 * TenToThe(power - 1);
+}
+
+// The default radix is 10**8 (100,000,000) for best
+// performance.
+template<typename REAL, int LOG10RADIX = 8> class Decimal {
private:
- using Digit = std::uint64_t;
using Real = REAL;
-
- // 10 * quintillion must not overflow a 64-bit unsigned integer
- static constexpr int log10Quintillion{18};
- static constexpr Digit quintillion{
- static_cast<Digit>(1000000) * 1000000 * 1000000};
- static_assert(quintillion < std::numeric_limits<Digit>::max() / 10,
- "10**18 is too big somehow");
- static_assert(quintillion > std::numeric_limits<Digit>::max() / 100,
- "10**18 is too small somehow");
+ static constexpr int log10Radix{LOG10RADIX};
+ static constexpr std::uint64_t uint64Radix{TenToThe(log10Radix)};
+ static constexpr int minDigitBits{64 - LeadingZeroBitCount(uint64Radix)};
+ using Digit = HostUnsignedInt<minDigitBits>;
+ static constexpr Digit radix{uint64Radix};
+ static_assert(radix < std::numeric_limits<Digit>::max() / 10,
+ "radix is too big somehow");
+ static_assert(radix > std::numeric_limits<Digit>::max() / 100,
+ "radix is too small somehow");
// The base-2 logarithm of the least significant bit that can arise
- // in a subnormal IEEE floating-point number.
+ // in a subnormal IEEE floating-point number. Padded up to make room
+ // for scaling for alignment in decimal->binary conversion.
static constexpr int minLog2AnyBit{
-static_cast<int>(Real::exponentBias) - Real::precision};
- static constexpr int maxDigits{2 - minLog2AnyBit / log10Quintillion};
+ static constexpr int maxDigits{3 - minLog2AnyBit / log10Radix};
public:
Decimal() {}
- void SetToZero() {
+ Decimal &SetToZero() {
isNegative_ = false;
digits_ = 0;
- first_ = 0;
exponent_ = 0;
+ return *this;
}
- void FromReal(const Real &);
+ Decimal &FromReal(const Real &);
// Convert a character representation of a floating-point value to
// the underlying Real type. The reference argument is a pointer that
// is left pointing to the first character that wasn't included.
ValueWithRealFlags<Real> ToReal(
- const char *&, Rounding rounding = Rounding::TiesToEven);
+ const char *&, Rounding rounding = defaultRounding);
+ // ToString() emits the mathematically exact decimal representation
+ // in scientific notation. ToMinimalString() emits the shortest
+ // decimal representation that reads back to the original value.
std::string ToString(int maxDigits = 1000000) const;
+ std::string ToMinimalString(
+ const Real &, Rounding rounding = defaultRounding) const;
private:
std::ostream &Dump(std::ostream &) const;
bool IsZero() const {
- for (int j{first_}; j < digits_; ++j) {
+ for (int j{0}; j < digits_; ++j) {
if (digit_[j] != 0) {
return false;
}
return true;
}
+ // Predicate: true when 10*value would cause a carry
bool IsFull() const {
- return digits_ == digitLimit_ && 10 * digit_[digits_ - 1] >= quintillion;
+ return digits_ == digitLimit_ && 10 * digit_[digits_ - 1] >= radix;
}
+ // Sets this value to that of an Integer<> instance.
+ // Returns any remainder (usually zero).
template<typename INT> INT SetTo(INT n) {
SetToZero();
while (!n.IsZero()) {
++exponent_;
n = qr.quotient;
}
- if constexpr (INT::bits < 60) {
- // n is necessarily less than a quintillion
+ if constexpr (INT::bits < minDigitBits) {
+ // n is necessarily small enough to fit into a digit
if (!n.IsZero()) {
digit_[digits_++] = n.ToUInt64();
}
- return 0;
+ return {};
} else {
while (!n.IsZero() && digits_ < digitLimit_) {
- auto qr{n.DivideUnsigned(quintillion)};
+ auto qr{n.DivideUnsigned(radix)};
digit_[digits_++] = qr.remainder.ToUInt64();
- if (digits_ == first_ + 1 && digit_[first_] == 0) {
- ++first_;
- }
n = qr.quotient;
}
return n;
}
int RemoveLeastOrderZeroDigits() {
- int removed{0};
- while (first_ < digits_ && digit_[first_] == 0) {
- ++first_;
- ++removed;
+ int remove{0};
+ while (remove < digits_ && digit_[remove] == 0) {
+ ++remove;
}
- if (first_ == digits_) {
- first_ = digits_ = 0;
+ if (remove >= digits_) {
+ digits_ = 0;
+ return remove;
}
- return removed;
+ if (remove > 0) {
+ for (int j{0}; j + remove < digits_; ++j) {
+ digit_[j] = digit_[j + remove];
+ }
+ digits_ -= remove;
+ }
+ return remove;
}
void Normalize() {
while (digits_ > 0 && digit_[digits_ - 1] == 0) {
--digits_;
}
- exponent_ += RemoveLeastOrderZeroDigits() * log10Quintillion;
+ exponent_ += RemoveLeastOrderZeroDigits() * log10Radix;
}
- // This limited divisibility test only works for even divisors of 10**18,
+ // This limited divisibility test only works for even divisors of the radix,
// which is fine since it's only used with 2 and 5.
template<int N> bool IsDivisibleBy() const {
- static_assert(N > 1 && quintillion % N == 0, "bad modulus");
- return digits_ == first_ || (digit_[first_] % N) == 0;
+ static_assert(N > 1 && radix % N == 0, "bad modulus");
+ return digits_ == 0 || (digit_[0] % N) == 0;
}
- template<int N> int DivideBy() {
+ template<int DIVISOR> int DivideBy() {
int remainder{0};
for (int j{digits_ - 1}; j >= 0; --j) {
- if (j < first_) {
- if (remainder == 0) {
- break;
- }
- first_ = j;
- }
- int nrem = digit_[j] % N;
- digit_[j] /= N;
- digit_[j] += (quintillion / N) * remainder;
+ // N.B. Because DIVISOR is a constant, these operations should be cheap.
+ int nrem = digit_[j] % DIVISOR;
+ digit_[j] /= DIVISOR;
+ digit_[j] += (radix / DIVISOR) * remainder;
remainder = nrem;
}
return remainder;
}
template<int N> int MultiplyBy(int carry = 0) {
- for (int j{first_}; j < digits_; ++j) {
+ for (int j{0}; j < digits_; ++j) {
digit_[j] = N * digit_[j] + carry;
- carry = digit_[j] / quintillion;
- digit_[j] %= quintillion;
- if (j == first_ && digit_[j] == 0) {
- ++first_;
- }
+ carry = digit_[j] / radix; // N.B. radix is constant, this is fast
+ digit_[j] %= radix;
}
if (carry != 0) {
if (digits_ < digitLimit_) {
return carry;
}
- Digit digit_[maxDigits]; // base-quintillion digits in little-endian order
+ Digit digit_[maxDigits]; // in little-endian order
int digits_{0}; // significant elements in digit_[] array
- int first_{0}; // digits below this are all zero
int digitLimit_{maxDigits}; // clamp
int exponent_{0}; // signed power of ten
bool isNegative_{false};
}
template<typename W, int P, bool IM>
-std::ostream &Real<W, P, IM>::AsFortran(std::ostream &o, int kind) const {
+std::ostream &Real<W, P, IM>::AsFortran(
+ std::ostream &o, int kind, Rounding rounding) const {
if (IsNotANumber()) {
o << "(0._" << kind << "/0.)";
} else if (IsInfinite()) {
o << "(";
}
Decimal<Real> decimal;
- decimal.FromReal(*this);
- o << decimal.ToString();
- o << '_' << kind;
+ o << decimal.FromReal(*this).ToString() << '_' << kind;
if (parenthesize) {
o << ')';
}
// Emits a character representation for an equivalent Fortran constant
// or parenthesized constant expression that produces this value.
- std::ostream &AsFortran(std::ostream &, int kind) const;
+ std::ostream &AsFortran(
+ std::ostream &, int kind, Rounding rounding = defaultRounding) const;
private:
using Significand = Integer<significandBits>; // no implicit bit
}
template<typename R> void basicTests(int rm, Rounding rounding) {
+ static constexpr int kind{R::bits / 8};
char desc[64];
using Word = typename R::Word;
- std::snprintf(
- desc, sizeof desc, "bits=%d, le=%d", R::bits, Word::littleEndian);
+ std::snprintf(desc, sizeof desc, "bits=%d, le=%d, kind=%d", R::bits,
+ Word::littleEndian, kind);
R zero;
TEST(!zero.IsNegative())(desc);
TEST(!zero.IsNotANumber())(desc);
TEST(!vr.value.IsInfinite())(ldesc);
TEST(ivf.flags.empty())(ldesc);
MATCH(x, ivf.value.ToUInt64())(ldesc);
+ std::stringstream ss;
+ vr.value.AsFortran(ss, kind, rounding);
+ std::string decimal{ss.str()};
+ const char *p{decimal.data()};
+ char ddesc[128];
+ std::snprintf(ddesc, sizeof ddesc, "%s decimal='%s'", ldesc, p);
+ MATCH(x, static_cast<std::uint64_t>(std::stold(decimal)))(ddesc);
+ auto check{R::Read(p, rounding)};
+ auto icheck{check.value.template ToInteger<Integer8>()};
+ MATCH(x, icheck.value.ToUInt64())(ddesc);
+ TEST(vr.value.Compare(check.value) == Relation::Equal)(ddesc);
}
TEST(vr.value.AINT().value.Compare(vr.value) == Relation::Equal)(ldesc);
ix = ix.Negate().value;
MATCH(IsInfinite(rj), x.IsInfinite())
("%d IsInfinite(0x%llx)", pass, static_cast<long long>(rj));
- int kind{REAL::bits / 8};
+ static constexpr int kind{REAL::bits / 8};
std::stringstream ss, css;
- x.AsFortran(ss, kind);
+ x.AsFortran(ss, kind, rounding);
std::string s{ss.str()};
if (IsNaN(rj)) {
css << "(0._" << kind << "/0.)";