}
bool isNegative{IsNegative()};
bool yIsNegative{y.IsNegative()};
- if (IsInfinite() || y.IsInfinite()) {
- if (isNegative == yIsNegative) {
- result.value = *this; // +/-Inf + +/-Inf -> +/-Inf
+ if (IsInfinite()) {
+ if (y.IsInfinite()) {
+ if (isNegative == yIsNegative) {
+ result.value = *this; // +/-Inf + +/-Inf -> +/-Inf
+ } else {
+ result.value.word_ = NaNWord(); // +/-Inf + -/+Inf -> NaN
+ result.flags.set(RealFlag::InvalidArgument);
+ }
} else {
- result.value.word_ = NaNWord(); // +/-Inf + -/+Inf -> NaN
- result.flags.set(RealFlag::InvalidArgument);
+ result.value = *this; // +/-Inf + x -> +/-Inf
}
return result;
}
+ if (y.IsInfinite()) {
+ result.value = y; // x + +/-Inf -> +/-Inf
+ return result;
+ }
std::uint64_t exponent{Exponent()};
std::uint64_t yExponent{y.Exponent()};
if (exponent < yExponent) {
fraction = sum.value;
if (isNegative == yIsNegative && sum.carry) {
roundingBits.ShiftRight(sum.value.BTEST(0));
- fraction = fraction.SHIFTR(1).IBSET(precision - 1);
+ fraction = fraction.SHIFTR(1).IBSET(fraction.bits - 1);
++exponent;
}
result.flags |=
} else {
bool isNegative{IsNegative() != y.IsNegative()};
if (IsInfinite() || y.IsInfinite()) {
- result.value.Normalize(isNegative, maxExponent, Fraction{});
+ if (IsZero() || y.IsZero()) {
+ result.value.word_ = NaNWord(); // 0 * Inf -> NaN
+ result.flags.set(RealFlag::InvalidArgument);
+ } else {
+ result.value.Normalize(isNegative, maxExponent, Fraction{});
+ }
} else {
auto product = GetFraction().MultiplyUnsigned(y.GetFraction());
- std::uint64_t exponent{Exponent() + y.Exponent() - exponentBias};
- result.flags |=
- result.value.Normalize(isNegative, exponent, product.upper);
- result.flags |= result.value.Round(
- rounding, RoundingBits{product.lower, precision});
+ std::int64_t exponent = Exponent(), yExponent = y.Exponent();
+ // A zero exponent field value has the same weight as 1.
+ exponent += !exponent;
+ yExponent += !yExponent;
+ exponent += yExponent;
+ exponent -= exponentBias;
+ ++exponent;
+ if (exponent < 1) {
+ int rshift = 1 - exponent;
+ exponent = 1;
+ bool sticky{false};
+ if (rshift >= product.upper.bits + product.lower.bits) {
+ sticky = !product.lower.IsZero() || !product.upper.IsZero();
+ } else if (rshift >= product.lower.bits) {
+ sticky = !product.lower.IsZero();
+ } else {
+ sticky = !product.lower.IAND(product.lower.MASKR(rshift)).IsZero();
+ }
+ product.lower = product.lower.DSHIFTR(product.upper, rshift);
+ product.upper = product.upper.SHIFTR(rshift);
+ if (sticky) {
+ product.lower = product.lower.IBSET(0);
+ }
+ }
+ int leadz{product.upper.LEADZ()};
+ if (leadz >= product.upper.bits) {
+ leadz += product.lower.LEADZ();
+ }
+ int lshift{leadz};
+ if (lshift > exponent - 1) {
+ lshift = exponent - 1;
+ }
+ exponent -= lshift;
+ product.upper = product.upper.DSHIFTL(product.lower, lshift);
+ product.lower = product.lower.SHIFTL(lshift);
+ RoundingBits roundingBits{product.lower, product.upper.bits};
+ result.flags |= result.value.Normalize(
+ isNegative, exponent, product.upper, &roundingBits);
+ result.flags |= result.value.Round(rounding, roundingBits);
}
}
return result;
.IBSET(significandBits - 2);
}
- constexpr RealFlags Normalize(bool negative, std::uint64_t biasedExponent,
+ constexpr RealFlags Normalize(bool negative, std::uint64_t exponent,
const Fraction &fraction, RoundingBits *roundingBits = nullptr) {
- if (biasedExponent >= maxExponent) {
- word_ = Word{maxExponent}.SHIFTL(significandBits);
+ if (exponent >= maxExponent) {
+ word_ = Word{maxExponent}.SHIFTL(significandBits); // Inf
if (negative) {
word_ = word_.IBSET(bits - 1);
}
return {RealFlag::Overflow};
+ }
+ if (fraction.BTEST(fraction.bits - 1)) {
+ // fraction is normalized
+ word_ = Word::Convert(fraction).value;
+ if (exponent == 0) {
+ exponent = 1;
+ }
} else {
std::uint64_t lshift = fraction.LEADZ();
- if (lshift >= precision) {
+ if (lshift >= fraction.bits) {
// +/-0.0
word_ = Word{};
+ exponent = 0;
} else {
word_ = Word::Convert(fraction).value;
- if (lshift < biasedExponent) {
- biasedExponent -= lshift;
- } else if (biasedExponent > 0) {
- lshift = biasedExponent - 1;
- biasedExponent = 0;
+ if (lshift < exponent) {
+ exponent -= lshift;
+ } else if (exponent > 0) {
+ lshift = exponent - 1;
+ exponent = 0;
+ } else if (lshift == 0) {
+ exponent = 1;
} else {
lshift = 0;
}
}
}
}
- if (implicitMSB) {
- word_ = word_.IBCLR(significandBits);
- }
- word_ = word_.IOR(Word{biasedExponent}.SHIFTL(significandBits));
- }
- if (negative) {
- word_ = word_.IBSET(bits - 1);
}
- return {};
}
+ if (implicitMSB) {
+ word_ = word_.IBCLR(significandBits);
+ }
+ word_ = word_.IOR(Word{exponent}.SHIFTL(significandBits));
+ if (negative) {
+ word_ = word_.IBSET(bits - 1);
+ }
+ return {};
}
// Rounds a result, if necessary.
}
}
-// Takes a 12-bit number and distributes its bits across a 32-bit single
+// Takes a 13-bit number and distributes its bits across a 32-bit single
// precision real. All sign and exponent bit positions are tested, plus
-// the upper two bits and lowest bit in the significand.
+// the upper two bits and lowest bit in the significand. The middle bits
+// of the significand are either all zeroes or all ones.
std::uint32_t MakeReal(std::uint32_t n) {
- return (n << 23) | (n >> 11) | ((n & 6) << 20);
+ return ((n & 0x1ffc) << 20) | !!(n & 2) | ((-(n & 1) & 0xfffff) << 1);
}
std::uint32_t NormalizeNaN(std::uint32_t x) {
std::uint32_t u32;
float f;
} u;
- for (std::uint32_t j{0}; j < 4096; ++j) {
+ for (std::uint32_t j{0}; j < 8192; ++j) {
std::uint32_t rj{MakeReal(j)};
u.u32 = rj;
float fj{u.f};
RealKind4 x{Integer<32>{std::uint64_t{rj}}};
- for (std::uint32_t k{0}; k < 4096; ++k) {
+ for (std::uint32_t k{0}; k < 8192; ++k) {
std::uint32_t rk{MakeReal(k)};
u.u32 = rk;
float fk{u.f};
std::uint32_t check = diff.value.RawBits().ToUInt64();
MATCH(rcheck, check)("0x%x - 0x%x", rj, rk);
}
-#if 0
{ ValueWithRealFlags<RealKind4> prod{x.Multiply(y)};
ScopedHostFloatingPointEnvironment fpenv;
float fcheck{fj * fk};
std::uint32_t check = prod.value.RawBits().ToUInt64();
MATCH(rcheck, check)("0x%x * 0x%x", rj, rk);
}
+#if 0
{ ValueWithRealFlags<RealKind4> quot{x.Divide(y)};
ScopedHostFloatingPointEnvironment fpenv;
float fcheck{fj * fk};
tests<RealKind8>();
tests<RealKind10>();
tests<RealKind16>();
- subset32bit();
+ subset32bit(); // TODO rounding modes, exception flags
return testing::Complete();
}