Extras/CDTestFramework/Opcode/Ice/IceFPU.h

   1 ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
   2 /**
   3  *      Contains FPU related code.
   4  *      \file           IceFPU.h
   5  *      \author         Pierre Terdiman
   6  *      \date           April, 4, 2000
   7  */
   8 ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
   9
  10 ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
  11 // Include Guard
  12 #ifndef ICEFPU_H
  13 #define ICEFPU_H
  14
  15         #define SIGN_BITMASK                    0x80000000
  16
  17         //! Integer representation of a floating-point value.
  18         #define IR(x)                                   ((udword&)(x))
  19
  20         //! Signed integer representation of a floating-point value.
  21         #define SIR(x)                                  ((sdword&)(x))
  22
  23         //! Absolute integer representation of a floating-point value
  24         #define AIR(x)                                  (IR(x)&0x7fffffff)
  25
  26         //! Floating-point representation of an integer value.
  27         #define FR(x)                                   ((float&)(x))
  28
  29         //! Integer-based comparison of a floating point value.
  30         //! Don't use it blindly, it can be faster or slower than the FPU comparison, depends on the context.
  31         #define IS_NEGATIVE_FLOAT(x)    (IR(x)&0x80000000)
  32
  33         //! Checks 2 values have different signs
  34         inline_ BOOL DifferentSign(float f0, float f1)
  35         {
  36                 return (IR(f0)^IR(f1))&SIGN_BITMASK;
  37         }
  38
  39         //! Fast fabs for floating-point values. It just clears the sign bit.
  40         //! Don't use it blindy, it can be faster or slower than the FPU comparison, depends on the context.
  41         inline_ float FastFabs(float x)
  42         {
  43                 udword FloatBits = IR(x)&0x7fffffff;
  44                 return FR(FloatBits);
  45         }
  46
  47         //! Fast square root for floating-point values.
  48         inline_ float FastSqrt(float square)
  49         {
  50                         float retval;
  51
  52                         __asm {
  53                                         mov             eax, square
  54                                         sub             eax, 0x3F800000
  55                                         sar             eax, 1
  56                                         add             eax, 0x3F800000
  57                                         mov             [retval], eax
  58                         }
  59                         return retval;
  60         }
  61
  62         //! Saturates positive to zero.
  63         inline_ float fsat(float f)
  64         {
  65                 udword y = (udword&)f & ~((sdword&)f >>31);
  66                 return (float&)y;
  67         }
  68
  69         //! Computes 1.0f / sqrtf(x).
  70         inline_ float frsqrt(float f)
  71         {
  72                 float x = f * 0.5f;
  73                 udword y = 0x5f3759df - ((udword&)f >> 1);
  74                 // Iteration...
  75                 (float&)y  = (float&)y * ( 1.5f - ( x * (float&)y * (float&)y ) );
  76                 // Result
  77                 return (float&)y;
  78         }
  79
  80         //! Computes 1.0f / sqrtf(x). Comes from NVIDIA.
  81         inline_ float InvSqrt(const float& x)
  82         {
  83                 udword tmp = (udword(IEEE_1_0 << 1) + IEEE_1_0 - *(udword*)&x) >> 1;
  84                 float y = *(float*)&tmp;
  85                 return y * (1.47f - 0.47f * x * y * y);
  86         }
  87
  88         //! Computes 1.0f / sqrtf(x). Comes from Quake3. Looks like the first one I had above.
  89         //! See http://www.magic-software.com/3DGEDInvSqrt.html
  90         inline_ float RSqrt(float number)
  91         {
  92                 long i;
  93                 float x2, y;
  94                 const float threehalfs = 1.5f;
  95
  96                 x2 = number * 0.5f;
  97                 y  = number;
  98                 i  = * (long *) &y;
  99                 i  = 0x5f3759df - (i >> 1);
 100                 y  = * (float *) &i;
 101                 y  = y * (threehalfs - (x2 * y * y));
 102
 103                 return y;
 104         }
 105
 106         //! TO BE DOCUMENTED
 107         inline_ float fsqrt(float f)
 108         {
 109                 udword y = ( ( (sdword&)f - 0x3f800000 ) >> 1 ) + 0x3f800000;
 110                 // Iteration...?
 111                 // (float&)y = (3.0f - ((float&)y * (float&)y) / f) * (float&)y * 0.5f;
 112                 // Result
 113                 return (float&)y;
 114         }
 115
 116         //! Returns the float ranged espilon value.
 117         inline_ float fepsilon(float f)
 118         {
 119                 udword b = (udword&)f & 0xff800000;
 120                 udword a = b | 0x00000001;
 121                 (float&)a -= (float&)b;
 122                 // Result
 123                 return (float&)a;
 124         }
 125
 126         //! Is the float valid ?
 127         inline_ bool IsNAN(float value)                         { return (IR(value)&0x7f800000) == 0x7f800000;  }
 128         inline_ bool IsIndeterminate(float value)       { return IR(value) == 0xffc00000;                               }
 129         inline_ bool IsPlusInf(float value)                     { return IR(value) == 0x7f800000;                               }
 130         inline_ bool IsMinusInf(float value)            { return IR(value) == 0xff800000;                               }
 131
 132         inline_ bool IsValidFloat(float value)
 133         {
 134                 if(IsNAN(value))                        return false;
 135                 if(IsIndeterminate(value))      return false;
 136                 if(IsPlusInf(value))            return false;
 137                 if(IsMinusInf(value))           return false;
 138                 return true;
 139         }
 140
 141         #define CHECK_VALID_FLOAT(x)    ASSERT(IsValidFloat(x));
 142
 143 /*
 144         //! FPU precision setting function.
 145         inline_ void SetFPU()
 146         {
 147                 // This function evaluates whether the floating-point
 148                 // control word is set to single precision/round to nearest/
 149                 // exceptions disabled. If these conditions don't hold, the
 150                 // function changes the control word to set them and returns
 151                 // TRUE, putting the old control word value in the passback
 152                 // location pointed to by pwOldCW.
 153                 {
 154                         uword wTemp, wSave;
 155
 156                         __asm fstcw wSave
 157                         if (wSave & 0x300 ||            // Not single mode
 158                                 0x3f != (wSave & 0x3f) ||   // Exceptions enabled
 159                                 wSave & 0xC00)              // Not round to nearest mode
 160                         {
 161                                 __asm
 162                                 {
 163                                         mov ax, wSave
 164                                         and ax, not 300h    ;; single mode
 165                                         or  ax, 3fh         ;; disable all exceptions
 166                                         and ax, not 0xC00   ;; round to nearest mode
 167                                         mov wTemp, ax
 168                                         fldcw   wTemp
 169                                 }
 170                         }
 171                 }
 172         }
 173 */
 174         //! This function computes the slowest possible floating-point value (you can also directly use FLT_EPSILON)
 175         inline_ float ComputeFloatEpsilon()
 176         {
 177                 float f = 1.0f;
 178                 ((udword&)f)^=1;
 179                 return f - 1.0f;        // You can check it's the same as FLT_EPSILON
 180         }
 181
 182         inline_ bool IsFloatZero(float x, float epsilon=1e-6f)
 183         {
 184                 return x*x < epsilon;
 185         }
 186
 187         #define FCOMI_ST0       _asm    _emit   0xdb    _asm    _emit   0xf0
 188         #define FCOMIP_ST0      _asm    _emit   0xdf    _asm    _emit   0xf0
 189         #define FCMOVB_ST0      _asm    _emit   0xda    _asm    _emit   0xc0
 190         #define FCMOVNB_ST0     _asm    _emit   0xdb    _asm    _emit   0xc0
 191
 192         #define FCOMI_ST1       _asm    _emit   0xdb    _asm    _emit   0xf1
 193         #define FCOMIP_ST1      _asm    _emit   0xdf    _asm    _emit   0xf1
 194         #define FCMOVB_ST1      _asm    _emit   0xda    _asm    _emit   0xc1
 195         #define FCMOVNB_ST1     _asm    _emit   0xdb    _asm    _emit   0xc1
 196
 197         #define FCOMI_ST2       _asm    _emit   0xdb    _asm    _emit   0xf2
 198         #define FCOMIP_ST2      _asm    _emit   0xdf    _asm    _emit   0xf2
 199         #define FCMOVB_ST2      _asm    _emit   0xda    _asm    _emit   0xc2
 200         #define FCMOVNB_ST2     _asm    _emit   0xdb    _asm    _emit   0xc2
 201
 202         #define FCOMI_ST3       _asm    _emit   0xdb    _asm    _emit   0xf3
 203         #define FCOMIP_ST3      _asm    _emit   0xdf    _asm    _emit   0xf3
 204         #define FCMOVB_ST3      _asm    _emit   0xda    _asm    _emit   0xc3
 205         #define FCMOVNB_ST3     _asm    _emit   0xdb    _asm    _emit   0xc3
 206
 207         #define FCOMI_ST4       _asm    _emit   0xdb    _asm    _emit   0xf4
 208         #define FCOMIP_ST4      _asm    _emit   0xdf    _asm    _emit   0xf4
 209         #define FCMOVB_ST4      _asm    _emit   0xda    _asm    _emit   0xc4
 210         #define FCMOVNB_ST4     _asm    _emit   0xdb    _asm    _emit   0xc4
 211
 212         #define FCOMI_ST5       _asm    _emit   0xdb    _asm    _emit   0xf5
 213         #define FCOMIP_ST5      _asm    _emit   0xdf    _asm    _emit   0xf5
 214         #define FCMOVB_ST5      _asm    _emit   0xda    _asm    _emit   0xc5
 215         #define FCMOVNB_ST5     _asm    _emit   0xdb    _asm    _emit   0xc5
 216
 217         #define FCOMI_ST6       _asm    _emit   0xdb    _asm    _emit   0xf6
 218         #define FCOMIP_ST6      _asm    _emit   0xdf    _asm    _emit   0xf6
 219         #define FCMOVB_ST6      _asm    _emit   0xda    _asm    _emit   0xc6
 220         #define FCMOVNB_ST6     _asm    _emit   0xdb    _asm    _emit   0xc6
 221
 222         #define FCOMI_ST7       _asm    _emit   0xdb    _asm    _emit   0xf7
 223         #define FCOMIP_ST7      _asm    _emit   0xdf    _asm    _emit   0xf7
 224         #define FCMOVB_ST7      _asm    _emit   0xda    _asm    _emit   0xc7
 225         #define FCMOVNB_ST7     _asm    _emit   0xdb    _asm    _emit   0xc7
 226
 227         //! A global function to find MAX(a,b) using FCOMI/FCMOV
 228         inline_ float FCMax2(float a, float b)
 229         {
 230                 float Res;
 231                 _asm    fld             [a]
 232                 _asm    fld             [b]
 233                 FCOMI_ST1
 234                 FCMOVB_ST1
 235                 _asm    fstp    [Res]
 236                 _asm    fcomp
 237                 return Res;
 238         }
 239
 240         //! A global function to find MIN(a,b) using FCOMI/FCMOV
 241         inline_ float FCMin2(float a, float b)
 242         {
 243                 float Res;
 244                 _asm    fld             [a]
 245                 _asm    fld             [b]
 246                 FCOMI_ST1
 247                 FCMOVNB_ST1
 248                 _asm    fstp    [Res]
 249                 _asm    fcomp
 250                 return Res;
 251         }
 252
 253         //! A global function to find MAX(a,b,c) using FCOMI/FCMOV
 254         inline_ float FCMax3(float a, float b, float c)
 255         {
 256                 float Res;
 257                 _asm    fld             [a]
 258                 _asm    fld             [b]
 259                 _asm    fld             [c]
 260                 FCOMI_ST1
 261                 FCMOVB_ST1
 262                 FCOMI_ST2
 263                 FCMOVB_ST2
 264                 _asm    fstp    [Res]
 265                 _asm    fcompp
 266                 return Res;
 267         }
 268
 269         //! A global function to find MIN(a,b,c) using FCOMI/FCMOV
 270         inline_ float FCMin3(float a, float b, float c)
 271         {
 272                 float Res;
 273                 _asm    fld             [a]
 274                 _asm    fld             [b]
 275                 _asm    fld             [c]
 276                 FCOMI_ST1
 277                 FCMOVNB_ST1
 278                 FCOMI_ST2
 279                 FCMOVNB_ST2
 280                 _asm    fstp    [Res]
 281                 _asm    fcompp
 282                 return Res;
 283         }
 284
 285         //! A global function to find MAX(a,b,c,d) using FCOMI/FCMOV
 286         inline_ float FCMax4(float a, float b, float c, float d)
 287         {
 288                 float Res;
 289                 _asm    fld             [a]
 290                 _asm    fld             [b]
 291                 _asm    fld             [c]
 292                 _asm    fld             [d]
 293                 FCOMI_ST1
 294                 FCMOVB_ST1
 295                 FCOMI_ST2
 296                 FCMOVB_ST2
 297                 FCOMI_ST3
 298                 FCMOVB_ST3
 299                 _asm    fstp    [Res]
 300                 _asm    fcompp
 301                 _asm    fcomp
 302                 return Res;
 303         }
 304
 305         //! A global function to find MIN(a,b,c,d) using FCOMI/FCMOV
 306         inline_ float FCMin4(float a, float b, float c, float d)
 307         {
 308                 float Res;
 309                 _asm    fld             [a]
 310                 _asm    fld             [b]
 311                 _asm    fld             [c]
 312                 _asm    fld             [d]
 313                 FCOMI_ST1
 314                 FCMOVNB_ST1
 315                 FCOMI_ST2
 316                 FCMOVNB_ST2
 317                 FCOMI_ST3
 318                 FCMOVNB_ST3
 319                 _asm    fstp    [Res]
 320                 _asm    fcompp
 321                 _asm    fcomp
 322                 return Res;
 323         }
 324
 325         inline_ int ConvertToSortable(float f)
 326         {
 327                 int& Fi = (int&)f;
 328                 int Fmask = (Fi>>31);
 329                 Fi ^= Fmask;
 330                 Fmask &= ~(1<<31);
 331                 Fi -= Fmask;
 332                 return Fi;
 333         }
 334
 335         inline_ udword EncodeFloat(const float val)
 336         {
 337                 // We may need to check on -0 and 0
 338                 // But it should make no practical difference.
 339                 udword ir = IR(val);
 340
 341                 if(ir & 0x80000000) //negative?
 342                         ir = ~ir;//reverse sequence of negative numbers
 343                 else
 344                         ir |= 0x80000000; // flip sign
 345
 346                 return ir;
 347         }
 348
 349         inline_ float DecodeFloat(udword ir)
 350         {
 351                 udword rv;
 352
 353                 if(ir & 0x80000000) //positive?
 354                         rv = ir & ~0x80000000; //flip sign
 355                 else
 356                         rv = ~ir; //undo reversal
 357
 358                 return FR(rv);
 359         }
 360
 361         enum FPUMode
 362         {
 363                 FPU_FLOOR               = 0,
 364                 FPU_CEIL                = 1,
 365                 FPU_BEST                = 2,
 366
 367                 FPU_FORCE_DWORD = 0x7fffffff
 368         };
 369
 370         FUNCTION ICECORE_API FPUMode    GetFPUMode();
 371         FUNCTION ICECORE_API void               SaveFPU();
 372         FUNCTION ICECORE_API void               RestoreFPU();
 373         FUNCTION ICECORE_API void               SetFPUFloorMode();
 374         FUNCTION ICECORE_API void               SetFPUCeilMode();
 375         FUNCTION ICECORE_API void               SetFPUBestMode();
 376
 377         FUNCTION ICECORE_API void               SetFPUPrecision24();
 378         FUNCTION ICECORE_API void               SetFPUPrecision53();
 379         FUNCTION ICECORE_API void               SetFPUPrecision64();
 380         FUNCTION ICECORE_API void               SetFPURoundingChop();
 381         FUNCTION ICECORE_API void               SetFPURoundingUp();
 382         FUNCTION ICECORE_API void               SetFPURoundingDown();
 383         FUNCTION ICECORE_API void               SetFPURoundingNear();
 384
 385         FUNCTION ICECORE_API int                intChop(const float& f);
 386         FUNCTION ICECORE_API int                intFloor(const float& f);
 387         FUNCTION ICECORE_API int                intCeil(const float& f);
 388
 389         inline_ sdword MyFloor(float f)
 390         {
 391                 return (sdword)f - (IR(f)>>31);
 392         }
 393
 394         class ICECORE_API FPUGuard
 395         {
 396                 public:
 397                                 FPUGuard();
 398                                 ~FPUGuard();
 399                 private:
 400                 uword   mControlWord;
 401         };
 402
 403 #endif // ICEFPU_H