}
}
+/*--------------------------------------------------------------------*//*!
+ * \brief Round the given number `val` to nearest even by discarding
+ * the last `numBitsToDiscard` bits.
+ * \param val value to round
+ * \param numBitsToDiscard number of (least significant) bits to discard
+ * \return The rounded value with the last `numBitsToDiscard` removed
+ *//*--------------------------------------------------------------------*/
+static deUint32 roundToNearestEven (deUint32 val, const deUint32 numBitsToDiscard)
+{
+ const deUint32 lastBits = val & ((1 << numBitsToDiscard) - 1);
+ const deUint32 headBit = val & (1 << (numBitsToDiscard - 1));
+
+ DE_ASSERT(numBitsToDiscard > 0 && numBitsToDiscard < 32); /* Make sure no overflow. */
+ val >>= numBitsToDiscard;
+
+ if (headBit == 0)
+ {
+ return val;
+ }
+ else if (headBit == lastBits)
+ {
+ if ((val & 0x1) == 0x1)
+ {
+ return val + 1;
+ }
+ else
+ {
+ return val;
+ }
+ }
+ else
+ {
+ return val + 1;
+ }
+}
+
+deFloat16 deFloat32To16Round (float val32, deRoundingMode mode)
+{
+ union
+ {
+ float f; /* Interpret as 32-bit float */
+ deUint32 u; /* Interpret as 32-bit unsigned integer */
+ } x;
+ deUint32 sign; /* sign : 0000 0000 0000 0000 X000 0000 0000 0000 */
+ deUint32 exp32; /* exp32: biased exponent for 32-bit floats */
+ int exp16; /* exp16: biased exponent for 16-bit floats */
+ deUint32 mantissa;
+
+ /* We only support these two rounding modes for now */
+ DE_ASSERT(mode == DE_ROUNDINGMODE_TO_ZERO || mode == DE_ROUNDINGMODE_TO_NEAREST_EVEN);
+
+ x.f = val32;
+ sign = (x.u >> 16u) & 0x00008000u;
+ exp32 = (x.u >> 23u) & 0x000000ffu;
+ exp16 = (int) (exp32) - 127 + 15; /* 15/127: exponent bias for 16-bit/32-bit floats */
+ mantissa = x.u & 0x007fffffu;
+
+ /* Case: zero and denormalized floats */
+ if (exp32 == 0)
+ {
+ /* Denormalized floats are < 2^(1-127), not representable in 16-bit floats, rounding to zero. */
+ return (deFloat16) sign;
+ }
+ /* Case: Inf and NaN */
+ else if (exp32 == 0x000000ffu)
+ {
+ if (mantissa == 0u)
+ {
+ /* Inf */
+ return (deFloat16) (sign | 0x7c00u);
+ }
+ else
+ {
+ /* NaN */
+ mantissa >>= 13u; /* 16-bit floats has 10-bit for mantissa, 13-bit less than 32-bit floats. */
+ /* Make sure we don't turn NaN into zero by | (mantissa == 0). */
+ return (deFloat16) (sign | 0x7c00u | mantissa | (mantissa == 0u));
+ }
+ }
+ /* The following are cases for normalized floats.
+ *
+ * * If exp16 is less than 0, we are experiencing underflow for the exponent. To encode this underflowed exponent,
+ * we can only shift the mantissa further right.
+ * The real exponent is exp16 - 15. A denormalized 16-bit float can represent -14 via its exponent.
+ * Note that the most significant bit in the mantissa of a denormalized float is already -1 as for exponent.
+ * So, we just need to right shift the mantissa -exp16 bits.
+ * * If exp16 is 0, mantissa shifting requirement is similar to the above.
+ * * If exp16 is greater than 30 (0b11110), we are experiencing overflow for the exponent of 16-bit normalized floats.
+ */
+ /* Case: normalized floats -> zero */
+ else if (exp16 < -10)
+ {
+ /* 16-bit floats have only 10 bits for mantissa. Minimal 16-bit denormalized float is (2^-10) * (2^-14). */
+ /* Expecting a number < (2^-10) * (2^-14) here, not representable, round to zero. */
+ return (deFloat16) sign;
+ }
+ /* Case: normalized floats -> zero and denormalized halfs */
+ else if (exp16 <= 0)
+ {
+ /* Add the implicit leading 1 in mormalized float to mantissa. */
+ mantissa |= 0x00800000u;
+ /* We have a (23 + 1)-bit mantissa, but 16-bit floats only expect 10-bit mantissa.
+ * Need to discard the last 14-bits considering rounding mode.
+ * We also need to shift right -exp16 bits to encode the underflowed exponent.
+ */
+ if (mode == DE_ROUNDINGMODE_TO_ZERO)
+ {
+ mantissa >>= (14 - exp16);
+ }
+ else
+ {
+ /* mantissa in the above may exceed 10-bits, in which case overflow happens.
+ * The overflowed bit is automatically carried to exponent then.
+ */
+ mantissa = roundToNearestEven(mantissa, 14 - exp16);
+ }
+ return (deFloat16) (sign | mantissa);
+ }
+ /* Case: normalized floats -> normalized floats */
+ else if (exp16 <= 30)
+ {
+ if (mode == DE_ROUNDINGMODE_TO_ZERO)
+ {
+ return (deFloat16) (sign | ((deUint32)exp16 << 10u) | (mantissa >> 13u));
+ }
+ else
+ {
+ mantissa = roundToNearestEven(mantissa, 13);
+ /* Handle overflow. exp16 may overflow (and become Inf) itself, but that's correct. */
+ exp16 = (exp16 << 10u) + (mantissa & (1 << 10));
+ mantissa &= (1u << 10) - 1;
+ return (deFloat16) (sign | ((deUint32) exp16) | mantissa);
+ }
+ }
+ /* Case: normalized floats (too large to be representable as 16-bit floats) */
+ else
+ {
+ /* According to IEEE Std 754-2008 Section 7.4,
+ * * roundTiesToEven and roundTiesToAway carry all overflows to Inf with the sign
+ * of the intermediate result.
+ * * roundTowardZero carries all overflows to the format’s largest finite number
+ * with the sign of the intermediate result.
+ */
+ if (mode == DE_ROUNDINGMODE_TO_ZERO)
+ {
+ return (deFloat16) (sign | 0x7bffu); /* 111 1011 1111 1111 */
+ }
+ else
+ {
+ return (deFloat16) (sign | (0x1f << 10));
+ }
+ }
+
+ /* Make compiler happy */
+ return (deFloat16) 0;
+}
+
float deFloat16To32 (deFloat16 val16)
{
deUint32 sign;