1 /*-------------------------------------------------------------------------
2 * drawElements Quality Program Tester Core
3 * ----------------------------------------
5 * Copyright 2014 The Android Open Source Project
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
21 * \brief Adjustable-precision floating point operations.
22 *//*--------------------------------------------------------------------*/
24 #include "tcuFloatFormat.hpp"
27 #include "deUniquePtr.hpp"
38 Interval chooseInterval(YesNoMaybe choice, const Interval& no, const Interval& yes)
44 case MAYBE: return no | yes;
45 default: DE_FATAL("Impossible case");
51 double computeMaxValue (int maxExp, int fractionBits)
53 return (deLdExp(1.0, maxExp) +
54 deLdExp(double((1ull << fractionBits) - 1), maxExp - fractionBits));
59 FloatFormat::FloatFormat (int minExp,
63 YesNoMaybe hasSubnormal_,
68 , m_fractionBits (fractionBits)
69 , m_hasSubnormal (hasSubnormal_)
72 , m_exactPrecision (exactPrecision)
73 , m_maxValue (computeMaxValue(maxExp, fractionBits))
75 DE_ASSERT(minExp <= maxExp);
78 /*-------------------------------------------------------------------------
79 * On the definition of ULP
81 * The GLSL spec does not define ULP. However, it refers to IEEE 754, which
82 * (reportedly) uses Harrison's definition:
84 * ULP(x) is the distance between the closest floating point numbers
85 * a and be such that a <= x <= b and a != b
87 * Note that this means that when x = 2^n, ULP(x) = 2^(n-p-1), i.e. it is the
88 * distance to the next lowest float, not next highest.
90 * Furthermore, it is assumed that ULP is calculated relative to the exact
91 * value, not the approximation. This is because otherwise a less accurate
92 * approximation could be closer in ULPs, because its ULPs are bigger.
94 * For details, see "On the definition of ulp(x)" by Jean-Michel Muller
96 *-----------------------------------------------------------------------*/
98 double FloatFormat::ulp (double x, double count) const
101 const double frac = deFractExp(deAbs(x), &exp);
105 else if (deIsInf(frac))
106 return deLdExp(1.0, m_maxExp - m_fractionBits);
107 else if (frac == 1.0)
109 // Harrison's ULP: choose distance to closest (i.e. next lower) at binade
113 else if (frac == 0.0)
116 // ULP cannot be lower than the smallest quantum.
117 exp = de::max(exp, m_minExp);
120 const double oneULP = deLdExp(1.0, exp - m_fractionBits);
121 ScopedRoundingMode ctx (DE_ROUNDINGMODE_TO_POSITIVE_INF);
123 return oneULP * count;
127 //! Return the difference between the given nominal exponent and
128 //! the exponent of the lowest significand bit of the
129 //! representation of a number with this format.
130 //! For normal numbers this is the number of significand bits, but
131 //! for subnormals it is less and for values of exp where 2^exp is too
132 //! small to represent it is <0
133 int FloatFormat::exponentShift (int exp) const
135 return m_fractionBits - de::max(m_minExp - exp, 0);
138 //! Return the number closest to `d` that is exactly representable with the
139 //! significand bits and minimum exponent of the floatformat. Round up if
140 //! `upward` is true, otherwise down.
141 double FloatFormat::round (double d, bool upward) const
144 const double frac = deFractExp(d, &exp);
145 const int shift = exponentShift(exp);
146 const double shiftFrac = deLdExp(frac, shift);
147 const double roundFrac = upward ? deCeil(shiftFrac) : deFloor(shiftFrac);
149 return deLdExp(roundFrac, exp - shift);
152 //! Return the range of numbers that `d` might be converted to in the
153 //! floatformat, given its limitations with infinities, subnormals and maximum
155 Interval FloatFormat::clampValue (double d) const
157 const double rSign = deSign(d);
160 DE_ASSERT(!deIsNaN(d));
162 deFractExp(d, &rExp);
164 return chooseInterval(m_hasSubnormal, rSign * 0.0, d);
165 else if (deIsInf(d) || rExp > m_maxExp)
166 return chooseInterval(m_hasInf, rSign * getMaxValue(), rSign * TCU_INFINITY);
171 //! Return the range of numbers that might be used with this format to
172 //! represent a number within `x`.
173 Interval FloatFormat::convert (const Interval& x) const
180 // If NaN might be supported, NaN is a legal return value
184 // If NaN might not be supported, any (non-NaN) value is legal,
185 // _subject_ to clamping. Hence we modify tmp, not ret.
187 tmp = Interval::unbounded();
190 // Round both bounds _inwards_ to closest representable values.
192 ret |= clampValue(round(tmp.lo(), true)) | clampValue(round(tmp.hi(), false));
194 // If this format's precision is not exact, the (possibly out-of-bounds)
195 // original value is also a possible result.
196 if (!m_exactPrecision)
202 double FloatFormat::roundOut (double d, bool upward, bool roundUnderOverflow) const
207 if (roundUnderOverflow && exp > m_maxExp && (upward == (d < 0.0)))
208 return deSign(d) * getMaxValue();
210 return round(d, upward);
213 //! Round output of an operation.
214 //! \param roundUnderOverflow Can +/-inf rounded to min/max representable;
215 //! should be false if any of operands was inf, true otherwise.
216 Interval FloatFormat::roundOut (const Interval& x, bool roundUnderOverflow) const
218 Interval ret = x.nan();
221 ret |= Interval(roundOut(x.lo(), false, roundUnderOverflow),
222 roundOut(x.hi(), true, roundUnderOverflow));
227 std::string FloatFormat::floatToHex (double x) const
232 return (x < 0.0 ? "-" : "+") + std::string("inf");
233 else if (x == 0.0) // \todo [2014-03-27 lauri] Negative zero
237 const double frac = deFractExp(deAbs(x), &exp);
238 const int shift = exponentShift(exp);
239 const deUint64 bits = deUint64(deLdExp(frac, shift));
240 const deUint64 whole = bits >> m_fractionBits;
241 const deUint64 fraction = bits & ((deUint64(1) << m_fractionBits) - 1);
242 const int exponent = exp + m_fractionBits - shift;
243 const int numDigits = (m_fractionBits + 3) / 4;
244 const deUint64 aligned = fraction << (numDigits * 4 - m_fractionBits);
245 std::ostringstream oss;
247 oss << (x < 0 ? "-" : "")
248 << "0x" << whole << "."
249 << std::hex << std::setw(numDigits) << std::setfill('0') << aligned
250 << "p" << std::dec << std::setw(0) << exponent;
255 std::string FloatFormat::intervalToHex (const Interval& interval) const
257 if (interval.empty())
258 return interval.hasNaN() ? "{ NaN }" : "{}";
260 else if (interval.lo() == interval.hi())
261 return (std::string(interval.hasNaN() ? "{ NaN, " : "{ ") +
262 floatToHex(interval.lo()) + " }");
263 else if (interval == Interval::unbounded(true))
266 return (std::string(interval.hasNaN() ? "{ NaN } | " : "") +
267 "[" + floatToHex(interval.lo()) + ", " + floatToHex(interval.hi()) + "]");
270 template <typename T>
271 static FloatFormat nativeFormat (void)
273 typedef std::numeric_limits<T> Limits;
275 DE_ASSERT(Limits::radix == 2);
277 return FloatFormat(Limits::min_exponent - 1, // These have a built-in offset of one
278 Limits::max_exponent - 1,
279 Limits::digits - 1, // don't count the hidden bit
280 Limits::has_denorm != std::denorm_absent,
281 Limits::has_infinity ? YES : NO,
282 Limits::has_quiet_NaN ? YES : NO,
283 ((Limits::has_denorm == std::denorm_present) ? YES :
284 (Limits::has_denorm == std::denorm_absent) ? NO :
288 FloatFormat FloatFormat::nativeFloat (void)
290 return nativeFormat<float>();
293 FloatFormat FloatFormat::nativeDouble (void)
295 return nativeFormat<double>();
302 using std::ostringstream;
310 Test (MovePtr<FloatFormat> fmt) : m_fmt(fmt) {}
311 double p (int e) const { return deLdExp(1.0, e); }
312 void check (const string& expr,
314 double reference) const;
315 void testULP (double arg, double ref) const;
316 void testRound (double arg, double refDown, double refUp) const;
318 UniquePtr<FloatFormat> m_fmt;
321 void Test::check (const string& expr, double result, double reference) const
323 if (result != reference)
326 oss << expr << " returned " << result << ", expected " << reference;
327 TCU_FAIL(oss.str().c_str());
331 void Test::testULP (double arg, double ref) const
335 oss << "ulp(" << arg << ")";
336 check(oss.str(), m_fmt->ulp(arg), ref);
339 void Test::testRound (double arg, double refDown, double refUp) const
343 oss << "round(" << arg << ", false)";
344 check(oss.str(), m_fmt->round(arg, false), refDown);
348 oss << "round(" << arg << ", true)";
349 check(oss.str(), m_fmt->round(arg, true), refUp);
353 class TestBinary32 : public Test
357 : Test (MovePtr<FloatFormat>(new FloatFormat(-126, 127, 23, true))) {}
359 void runTest (void) const;
362 void TestBinary32::runTest (void) const
364 testULP(p(0), p(-24));
365 testULP(p(0) + p(-23), p(-23));
366 testULP(p(-124), p(-148));
367 testULP(p(-125), p(-149));
368 testULP(p(-125) + p(-140), p(-148));
369 testULP(p(-126), p(-149));
370 testULP(p(-130), p(-149));
372 testRound(p(0) + p(-20) + p(-40), p(0) + p(-20), p(0) + p(-20) + p(-23));
373 testRound(p(-126) - p(-150), p(-126) - p(-149), p(-126));
375 TCU_CHECK(m_fmt->floatToHex(p(0)) == "0x1.000000p0");
376 TCU_CHECK(m_fmt->floatToHex(p(8) + p(-4)) == "0x1.001000p8");
377 TCU_CHECK(m_fmt->floatToHex(p(-140)) == "0x0.000400p-126");
378 TCU_CHECK(m_fmt->floatToHex(p(-140)) == "0x0.000400p-126");
379 TCU_CHECK(m_fmt->floatToHex(p(-126) + p(-125)) == "0x1.800000p-125");
384 void FloatFormat_selfTest (void)