framework/common/tcuFloat.hpp

   1 #ifndef _TCUFLOAT_HPP
   2 #define _TCUFLOAT_HPP
   3 /*-------------------------------------------------------------------------
   4  * drawElements Quality Program Tester Core
   5  * ----------------------------------------
   6  *
   7  * Copyright 2014 The Android Open Source Project
   8  *
   9  * Licensed under the Apache License, Version 2.0 (the "License");
  10  * you may not use this file except in compliance with the License.
  11  * You may obtain a copy of the License at
  12  *
  13  *      http://www.apache.org/licenses/LICENSE-2.0
  14  *
  15  * Unless required by applicable law or agreed to in writing, software
  16  * distributed under the License is distributed on an "AS IS" BASIS,
  17  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  18  * See the License for the specific language governing permissions and
  19  * limitations under the License.
  20  *
  21  *//*!
  22  * \file
  23  * \brief Reconfigurable floating-point value template.
  24  *//*--------------------------------------------------------------------*/
  25
  26 #include "tcuDefs.hpp"
  27
  28 // For memcpy().
  29 #include <string.h>
  30
  31 namespace tcu
  32 {
  33
  34 enum FloatFlags
  35 {
  36         FLOAT_HAS_SIGN                  = (1<<0),
  37         FLOAT_SUPPORT_DENORM    = (1<<1)
  38 };
  39
  40 /*--------------------------------------------------------------------*//*!
  41  * \brief Floating-point format template
  42  *
  43  * This template implements arbitrary floating-point handling. Template
  44  * can be used for conversion between different formats and checking
  45  * various properties of floating-point values.
  46  *//*--------------------------------------------------------------------*/
  47 template <typename StorageType_, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
  48 class Float
  49 {
  50 public:
  51         typedef StorageType_ StorageType;
  52
  53         enum
  54         {
  55                 EXPONENT_BITS   = ExponentBits,
  56                 MANTISSA_BITS   = MantissaBits,
  57                 EXPONENT_BIAS   = ExponentBias,
  58                 FLAGS                   = Flags,
  59         };
  60
  61                                                         Float                   (void);
  62         explicit                                Float                   (StorageType value);
  63         explicit                                Float                   (float v);
  64         explicit                                Float                   (double v);
  65
  66         template <typename OtherStorageType, int OtherExponentBits, int OtherMantissaBits, int OtherExponentBias, deUint32 OtherFlags>
  67         static Float                    convert                 (const Float<OtherStorageType, OtherExponentBits, OtherMantissaBits, OtherExponentBias, OtherFlags>& src);
  68
  69         static inline Float             convert                 (const Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>& src) { return src; }
  70
  71         /*--------------------------------------------------------------------*//*!
  72          * \brief Construct floating point value
  73          * \param sign          Sign. Must be +1/-1
  74          * \param exponent      Exponent in range [1-ExponentBias, ExponentBias+1]
  75          * \param mantissa      Mantissa bits with implicit leading bit explicitly set
  76          * \return The specified float
  77          *
  78          * This function constructs a floating point value from its inputs.
  79          * The normally implicit leading bit of the mantissa must be explicitly set.
  80          * The exponent normally used for zero/subnormals is an invalid input. Such
  81          * values are specified with the leading mantissa bit of zero and the lowest
  82          * normal exponent (1-ExponentBias). Additionally having both exponent and
  83          * mantissa set to zero is a shorthand notation for the correctly signed
  84          * floating point zero. Inf and NaN must be specified directly with an
  85          * exponent of ExponentBias+1 and the appropriate mantissa (with leading
  86          * bit set)
  87          *//*--------------------------------------------------------------------*/
  88         static inline Float             construct               (int sign, int exponent, StorageType mantissa);
  89
  90         /*--------------------------------------------------------------------*//*!
  91          * \brief Construct floating point value. Explicit version
  92          * \param sign          Sign. Must be +1/-1
  93          * \param exponent      Exponent in range [-ExponentBias, ExponentBias+1]
  94          * \param mantissa      Mantissa bits
  95          * \return The specified float
  96          *
  97          * This function constructs a floating point value from its inputs with
  98          * minimal intervention.
  99          * The sign is turned into a sign bit and the exponent bias is added.
 100          * See IEEE-754 for additional information on the inputs and
 101          * the encoding of special values.
 102          *//*--------------------------------------------------------------------*/
 103         static Float                    constructBits   (int sign, int exponent, StorageType mantissaBits);
 104
 105         StorageType                             bits                    (void) const    { return m_value;                                                                                                                       }
 106         float                                   asFloat                 (void) const;
 107         double                                  asDouble                (void) const;
 108
 109         inline int                              signBit                 (void) const    { return (int)(m_value >> (ExponentBits+MantissaBits)) & 1;                                     }
 110         inline StorageType              exponentBits    (void) const    { return (m_value >> MantissaBits) & ((StorageType(1)<<ExponentBits)-1);        }
 111         inline StorageType              mantissaBits    (void) const    { return m_value & ((StorageType(1)<<MantissaBits)-1);                                          }
 112
 113         inline int                              sign                    (void) const    { return signBit() ? -1 : 1;                                                                                                                                                    }
 114         inline int                              exponent                (void) const    { return isDenorm() ? 1 - ExponentBias : (int)exponentBits() - ExponentBias;                                                    }
 115         inline StorageType              mantissa                (void) const    { return isZero() || isDenorm() ? mantissaBits() : (mantissaBits() | (StorageType(1)<<MantissaBits));   }
 116
 117         inline bool                             isInf                   (void) const    { return exponentBits() == ((1<<ExponentBits)-1)        && mantissaBits() == 0; }
 118         inline bool                             isNaN                   (void) const    { return exponentBits() == ((1<<ExponentBits)-1)        && mantissaBits() != 0; }
 119         inline bool                             isZero                  (void) const    { return exponentBits() == 0                                            && mantissaBits() == 0; }
 120         inline bool                             isDenorm                (void) const    { return exponentBits() == 0                                            && mantissaBits() != 0; }
 121
 122         static Float                    zero                    (int sign);
 123         static Float                    inf                             (int sign);
 124         static Float                    nan                             (void);
 125
 126 private:
 127         StorageType                             m_value;
 128 } DE_WARN_UNUSED_TYPE;
 129
 130 // Common floating-point types.
 131 typedef Float<deUint16,  5, 10,   15, FLOAT_HAS_SIGN|FLOAT_SUPPORT_DENORM>      Float16;        //!< IEEE 754-2008 16-bit floating-point value
 132 typedef Float<deUint32,  8, 23,  127, FLOAT_HAS_SIGN|FLOAT_SUPPORT_DENORM>      Float32;        //!< IEEE 754 32-bit floating-point value
 133 typedef Float<deUint64, 11, 52, 1023, FLOAT_HAS_SIGN|FLOAT_SUPPORT_DENORM>      Float64;        //!< IEEE 754 64-bit floating-point value
 134
 135 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
 136 inline Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::Float (void)
 137         : m_value(0)
 138 {
 139 }
 140
 141 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
 142 inline Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::Float (StorageType value)
 143         : m_value(value)
 144 {
 145 }
 146
 147 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
 148 inline Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::Float (float value)
 149         : m_value(0)
 150 {
 151         deUint32 u32;
 152         memcpy(&u32, &value, sizeof(deUint32));
 153         *this = convert(Float32(u32));
 154 }
 155
 156 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
 157 inline Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::Float (double value)
 158         : m_value(0)
 159 {
 160         deUint64 u64;
 161         memcpy(&u64, &value, sizeof(deUint64));
 162         *this = convert(Float64(u64));
 163 }
 164
 165 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
 166 inline float Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::asFloat (void) const
 167 {
 168         float           v;
 169         deUint32        u32             = Float32::convert(*this).bits();
 170         memcpy(&v, &u32, sizeof(deUint32));
 171         return v;
 172 }
 173
 174 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
 175 inline double Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::asDouble (void) const
 176 {
 177         double          v;
 178         deUint64        u64             = Float64::convert(*this).bits();
 179         memcpy(&v, &u64, sizeof(deUint64));
 180         return v;
 181 }
 182
 183 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
 184 inline Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags> Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::zero (int sign)
 185 {
 186         DE_ASSERT(sign == 1 || ((Flags & FLOAT_HAS_SIGN) && sign == -1));
 187         return Float(StorageType((sign > 0 ? 0ull : 1ull) << (ExponentBits+MantissaBits)));
 188 }
 189
 190 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
 191 inline Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags> Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::inf (int sign)
 192 {
 193         DE_ASSERT(sign == 1 || ((Flags & FLOAT_HAS_SIGN) && sign == -1));
 194         return Float(StorageType(((sign > 0 ? 0ull : 1ull) << (ExponentBits+MantissaBits)) | (((1ull<<ExponentBits)-1) << MantissaBits)));
 195 }
 196
 197 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
 198 inline Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags> Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::nan (void)
 199 {
 200         return Float(StorageType((1ull<<(ExponentBits+MantissaBits))-1));
 201 }
 202
 203 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
 204 Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>
 205 Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::construct
 206         (int sign, int exponent, StorageType mantissa)
 207 {
 208         // Repurpose this otherwise invalid input as a shorthand notation for zero (no need for caller to care about internal representation)
 209         const bool                      isShorthandZero = exponent == 0 && mantissa == 0;
 210
 211         // Handles the typical notation for zero (min exponent, mantissa 0). Note that the exponent usually used exponent (-ExponentBias) for zero/subnormals is not used.
 212         // Instead zero/subnormals have the (normally implicit) leading mantissa bit set to zero.
 213         const bool                      isDenormOrZero  = (exponent == 1 - ExponentBias) && (mantissa >> MantissaBits == 0);
 214         const StorageType       s                               = StorageType((StorageType(sign < 0 ? 1 : 0)) << (StorageType(ExponentBits+MantissaBits)));
 215         const StorageType       exp                             = (isShorthandZero  || isDenormOrZero) ? StorageType(0) : StorageType(exponent + ExponentBias);
 216
 217         DE_ASSERT(sign == +1 || sign == -1);
 218         DE_ASSERT(isShorthandZero || isDenormOrZero || mantissa >> MantissaBits == 1);
 219         DE_ASSERT(exp >> ExponentBits == 0);
 220
 221         return Float(StorageType(s | (exp << MantissaBits) | (mantissa & ((StorageType(1)<<MantissaBits)-1))));
 222 }
 223
 224 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
 225 Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>
 226 Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::constructBits
 227         (int sign, int exponent, StorageType mantissaBits)
 228 {
 229         const StorageType signBit               = sign < 0 ? 1 : 0;
 230         const StorageType exponentBits  = exponent + ExponentBias;
 231
 232         DE_ASSERT(sign == +1 || sign == -1 );
 233         DE_ASSERT(exponentBits >> ExponentBits == 0);
 234         DE_ASSERT(mantissaBits >> MantissaBits == 0);
 235
 236         return Float(StorageType((signBit << (ExponentBits+MantissaBits)) | (exponentBits << MantissaBits) | (mantissaBits)));
 237 }
 238
 239 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
 240 template <typename OtherStorageType, int OtherExponentBits, int OtherMantissaBits, int OtherExponentBias, deUint32 OtherFlags>
 241 Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>
 242 Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::convert
 243         (const Float<OtherStorageType, OtherExponentBits, OtherMantissaBits, OtherExponentBias, OtherFlags>& other)
 244 {
 245         if (!(Flags & FLOAT_HAS_SIGN) && other.sign() < 0)
 246         {
 247                 // Negative number, truncate to zero.
 248                 return zero(+1);
 249         }
 250         else if (other.isInf())
 251         {
 252                 return inf(other.sign());
 253         }
 254         else if (other.isNaN())
 255         {
 256                 return nan();
 257         }
 258         else if (other.isZero())
 259         {
 260                 return zero(other.sign());
 261         }
 262         else
 263         {
 264                 const int                       eMin    = 1 - ExponentBias;
 265                 const int                       eMax    = ((1<<ExponentBits)-2) - ExponentBias;
 266
 267                 const StorageType       s               = StorageType((StorageType(other.signBit())) << (StorageType(ExponentBits+MantissaBits))); // \note Not sign, but sign bit.
 268                 int                                     e               = other.exponent();
 269                 deUint64                        m               = other.mantissa();
 270
 271                 // Normalize denormalized values prior to conversion.
 272                 while (!(m & (1ull<<OtherMantissaBits)))
 273                 {
 274                         m <<= 1;
 275                         e  -= 1;
 276                 }
 277
 278                 if (e < eMin)
 279                 {
 280                         // Underflow.
 281                         if ((Flags & FLOAT_SUPPORT_DENORM) && (eMin-e-1 <= MantissaBits))
 282                         {
 283                                 // Shift and round (RTE).
 284                                 int                     bitDiff = (OtherMantissaBits-MantissaBits) + (eMin-e);
 285                                 deUint64        half    = (1ull << (bitDiff - 1)) - 1;
 286                                 deUint64        bias    = (m >> bitDiff) & 1;
 287
 288                                 return Float(StorageType(s | (m + half + bias) >> bitDiff));
 289                         }
 290                         else
 291                                 return zero(other.sign());
 292                 }
 293                 else
 294                 {
 295                         // Remove leading 1.
 296                         m = m & ~(1ull<<OtherMantissaBits);
 297
 298                         if (MantissaBits < OtherMantissaBits)
 299                         {
 300                                 // Round mantissa (round to nearest even).
 301                                 int                     bitDiff = OtherMantissaBits-MantissaBits;
 302                                 deUint64        half    = (1ull << (bitDiff - 1)) - 1;
 303                                 deUint64        bias    = (m >> bitDiff) & 1;
 304
 305                                 m = (m + half + bias) >> bitDiff;
 306
 307                                 if (m & (1ull<<MantissaBits))
 308                                 {
 309                                         // Overflow in mantissa.
 310                                         m  = 0;
 311                                         e += 1;
 312                                 }
 313                         }
 314                         else
 315                         {
 316                                 int bitDiff = MantissaBits-OtherMantissaBits;
 317                                 m = m << bitDiff;
 318                         }
 319
 320                         if (e > eMax)
 321                         {
 322                                 // Overflow.
 323                                 return inf(other.sign());
 324                         }
 325                         else
 326                         {
 327                                 DE_ASSERT(de::inRange(e, eMin, eMax));
 328                                 DE_ASSERT(((e + ExponentBias) & ~((1ull<<ExponentBits)-1)) == 0);
 329                                 DE_ASSERT((m & ~((1ull<<MantissaBits)-1)) == 0);
 330
 331                                 return Float(StorageType(s | (StorageType(e + ExponentBias) << MantissaBits) | m));
 332                         }
 333                 }
 334         }
 335 }
 336
 337 } // tcu
 338
 339 #endif // _TCUFLOAT_HPP