From 96d560b84bb61432ac171896162de75a1e2714cb Mon Sep 17 00:00:00 2001 From: peter klausler Date: Mon, 4 Jun 2018 09:49:47 -0700 Subject: [PATCH] [flang] more work on reals Original-commit: flang-compiler/f18@3afe85bf153e39e20fd2562c605167caf48e7cff Reviewed-on: https://github.com/flang-compiler/f18/pull/101 Tree-same-pre-rewrite: false --- flang/lib/evaluate/common.h | 13 ++++ flang/lib/evaluate/real.h | 170 ++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 178 insertions(+), 5 deletions(-) diff --git a/flang/lib/evaluate/common.h b/flang/lib/evaluate/common.h index c62223e..c558aed 100644 --- a/flang/lib/evaluate/common.h +++ b/flang/lib/evaluate/common.h @@ -53,6 +53,17 @@ static constexpr Relation Reverse(Relation relation) { } } +namespace RealFlag { +enum { + Ok = 0, Overflow = 1, DivideByZero = 2, InvalidArgument = 4, + Underflow = 8, Inexact = 16 +}; +} // namespace RealFlag + +enum class Rounding { + TiesToEven, ToZero, Down, Up, TiesAwayFromZero +}; + #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ constexpr bool IsHostLittleEndian{false}; #elif __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ @@ -61,6 +72,8 @@ constexpr bool IsHostLittleEndian{true}; #error host endianness is not known #endif +// HostUnsignedInt finds the smallest native unsigned integer type +// whose size is >= BITS. template struct SmallestUInt {}; template<> struct SmallestUInt { using type = std::uint8_t; diff --git a/flang/lib/evaluate/real.h b/flang/lib/evaluate/real.h index c0d68b1..dcd96618 100644 --- a/flang/lib/evaluate/real.h +++ b/flang/lib/evaluate/real.h @@ -18,18 +18,26 @@ #include "common.h" #include "integer.h" #include +#include namespace Fortran::evaluate { +// Model IEEE-754 floating-point numbers. The exponent range is that of a +// full int, and all significands are explicitly normalized. // The class template parameter specifies the total number of bits in the -// significand, including any bit that might be implicit in a machine -// representation. +// significand, including the explicit greatest-order bit. template class Real { public: static constexpr int significandBits{SIGNIFICAND_BITS}; static_assert(significandBits > 0); + struct RealResult { + Real value; + int flags{RealFlag::Ok}; + }; + constexpr Real() {} // +0.0 + constexpr Real(const Real &) = default; constexpr Real(std::int64_t n) : significand_{n}, exponent_{64}, negative_{n < 0} { if (negative_) { @@ -45,6 +53,8 @@ public: constexpr bool IsNotANumber() const { return notANumber_; } constexpr bool IsZero() const { return !notANumber_ && significand_.IsZero(); } + constexpr Real &operator=(const Real &) = default; + constexpr Relation Compare(const Real &y) const { if (notANumber_ || y.notANumber_) { return Relation::Unordered; @@ -92,21 +102,171 @@ public: } } + constexpr RealResult Add(const Real &y, Rounding rounding) const { + RealResult result; + if (notANumber_ || y.notANumber_) { + result.value.notANumber_ = true; // NaN + x -> NaN + result.flags = RealFlag::InvalidArgument; + return result; + } + if (infinite_ || y.infinite_) { + if (negative_ == y.negative_) { + result.value.infinite_ = true; // +/-Inf + +/-Inf -> +/-Inf + result.value.negative_ = negative_; + } else { + result.value.notANumber_ = true; // +/-Inf + -/+Inf -> NaN + result.flags = RealFlag::InvalidArgument; + } + return result; + } + if (exponent_ < y.exponent_) { + // y is larger; simplify by reversing + return y.Add(*this, rounding); + } + if (exponent_ == y.exponent_ && + negative_ != y.negative_ && + significand_.CompareUnsigned(y.significand_) == Ordering::Less) { + // Same exponent, opposite signs, and y is larger. + result = y.Add(*this, rounding); + result.negative_ ^= true; + return result; + } + // exponent is greater than or equal to y's + result.value = y; + result.value.exponent_ = exponent_; + result.value.negative_ = negative_; + RoundingBits roundingBits{result.value.ShiftSignificandRight(exponent_ - y.exponent_)}; + if (negative_ != y.negative_) { + ValueWithOverflow negated{result.value.significand_.Negate()}; + if (negated.overflow) { + // y had only its MSB set. Result is our significand, less its MSB. + result.value.significand_ = significand_.IBCLR(significandBits - 1); + } else { + ValueWithCarry diff{significand_.AddUnsigned(negated.value)}; + result.value.significand_ = negated.value; + } + } else { + ValueWithCarry sum{significand_.AddUnsigned(result.value.significand_)}; + if (sum.carry) { + roundingBits.guard |= roundingBits.round; + roundingBits.round = sum.value.BTEST(0); + result.value.significand_ = sum.value.SHIFTR(1).IBSET(significandBits - 1); + ++result.value.exponent_; + } else { + result.value.significand_ = sum.value; + } + } + result.value.Round(rounding, roundingBits); + result.flags |= result.value.Normalize(); + return result; + } + + constexpr RealResult Subtract(const Real &y, Rounding rounding) const { + Real minusy{y}; + minusy.negative_ ^= true; + return Add(minusy, rounding); + } + + constexpr RealResult Multiply(const Real &y, Rounding rounding) const { + RealResult result; + if (notANumber_ || y.notANumber_) { + result.value.notANumber_ = true; // NaN * x -> NaN + result.flags = RealFlag::InvalidArgument; + return result; + } + if (infinite_ || y.infinite_) { + result.value.infinite_ = true; + result.value.negative_ = negative_ != y.negative_; + return result; + } + } + private: using Significand = Integer; using DoubleSignificand = Integer<2 * significandBits>; + static constexpr int maxExponent{std::numeric_limits::max() / 2}; + static constexpr int minExponent{-maxExponent}; + + struct RoundingBits { + RoundingBits() {} + RoundingBits(const RoundingBits &) = default; + RoundingBits &operator(const RoundingBits &) = default; + bool round{false}; + bool guard{false}; // a/k/a "sticky" bit + }; // All values are normalized on output and assumed normal on input. - void Normalize() { - if (!notANumber_ && !infinite_) { + // Returns flag bits. + int Normalize() { + if (notANumber_) { + return RealFlag::InvalidArgument; + } else if (infinite_) { + return RealFlag::Ok; + } else { int shift{significand_.LEADZ()}; if (shift >= significandBits) { exponent_ = 0; // +/-0.0 + return RealFlag::Ok; } else { exponent_ -= shift; - significand_ = significand_.SHIFTL(shift); + if (exponent_ < minExponent) { + exponent_ = 0; + significand_ = Significand{}; + return RealFlag::Underflow; + } else if (exponent_ > maxExponent) { + infinite_ = true; + return RealFlag::Overflow; + } else { + if (shift > 0) { + significand_ = significand_.SHIFTL(shift); + } + return RealFlag::Ok; + } + } + } + } + + constexpr bool MustRound(Rounding rounding, const RoundingBits &bits) const { + switch (rounding) { + case Rounding::TiesToEven: + return bits.round && !bits.guard && significand_.BTEST(0); + case Rounding::ToZero: + return false; + case Rounding::Down: + return negative_ && (bits.round || bits.guard); + case Rounding::Up: + return !negative_ && (bits.round || bits.guard); + case Rounding::TiesAwayFromZero: + return bits.round && !bits.guard; + } + } + + void Round(Rounding rounding, const RoundingBits &bits) { + if (MustRound(rounding, bits)) { + ValueWithCarry sum{significand_.AddUnsigned(Significand{}, true)}; + if (sum.carry) { + // significand was all ones, and we rounded + ++exponent_; + significand_ = sum.value.SHIFTR(1).IBSET(significandBits - 1); + } else { + significand_ = sum.value; + } + } + } + + RoundingBits ShiftSignificandRight(int places) { + RoundingBits result; + if (places > bits) { + result.guard = !significand_.IsZero(); + significand_.Clear(); + } else if (places > 0) { + if (places > 1) { + result.guard = significand_.TRAILZ() + 1 < places; } + result.round = significand_.BTEST(places - 1); + significand_ = significand_.SHIFTR(places); } + return result; } Significand significand_{}; -- 2.7.4