1 ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
3 * Contains FPU related code.
5 * \author Pierre Terdiman
8 ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
10 ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
15 #define SIGN_BITMASK 0x80000000
17 //! Integer representation of a floating-point value.
18 #define IR(x) ((udword&)(x))
20 //! Signed integer representation of a floating-point value.
21 #define SIR(x) ((sdword&)(x))
23 //! Absolute integer representation of a floating-point value
24 #define AIR(x) (IR(x)&0x7fffffff)
26 //! Floating-point representation of an integer value.
27 #define FR(x) ((float&)(x))
29 //! Integer-based comparison of a floating point value.
30 //! Don't use it blindly, it can be faster or slower than the FPU comparison, depends on the context.
31 #define IS_NEGATIVE_FLOAT(x) (IR(x)&0x80000000)
33 //! Checks 2 values have different signs
34 inline_ BOOL DifferentSign(float f0, float f1)
36 return (IR(f0)^IR(f1))&SIGN_BITMASK;
39 //! Fast fabs for floating-point values. It just clears the sign bit.
40 //! Don't use it blindy, it can be faster or slower than the FPU comparison, depends on the context.
41 inline_ float FastFabs(float x)
43 udword FloatBits = IR(x)&0x7fffffff;
47 //! Fast square root for floating-point values.
48 inline_ float FastSqrt(float square)
62 //! Saturates positive to zero.
63 inline_ float fsat(float f)
65 udword y = (udword&)f & ~((sdword&)f >>31);
69 //! Computes 1.0f / sqrtf(x).
70 inline_ float frsqrt(float f)
73 udword y = 0x5f3759df - ((udword&)f >> 1);
75 (float&)y = (float&)y * ( 1.5f - ( x * (float&)y * (float&)y ) );
80 //! Computes 1.0f / sqrtf(x). Comes from NVIDIA.
81 inline_ float InvSqrt(const float& x)
83 udword tmp = (udword(IEEE_1_0 << 1) + IEEE_1_0 - *(udword*)&x) >> 1;
84 float y = *(float*)&tmp;
85 return y * (1.47f - 0.47f * x * y * y);
88 //! Computes 1.0f / sqrtf(x). Comes from Quake3. Looks like the first one I had above.
89 //! See http://www.magic-software.com/3DGEDInvSqrt.html
90 inline_ float RSqrt(float number)
94 const float threehalfs = 1.5f;
99 i = 0x5f3759df - (i >> 1);
101 y = y * (threehalfs - (x2 * y * y));
107 inline_ float fsqrt(float f)
109 udword y = ( ( (sdword&)f - 0x3f800000 ) >> 1 ) + 0x3f800000;
111 // (float&)y = (3.0f - ((float&)y * (float&)y) / f) * (float&)y * 0.5f;
116 //! Returns the float ranged espilon value.
117 inline_ float fepsilon(float f)
119 udword b = (udword&)f & 0xff800000;
120 udword a = b | 0x00000001;
121 (float&)a -= (float&)b;
126 //! Is the float valid ?
127 inline_ bool IsNAN(float value) { return (IR(value)&0x7f800000) == 0x7f800000; }
128 inline_ bool IsIndeterminate(float value) { return IR(value) == 0xffc00000; }
129 inline_ bool IsPlusInf(float value) { return IR(value) == 0x7f800000; }
130 inline_ bool IsMinusInf(float value) { return IR(value) == 0xff800000; }
132 inline_ bool IsValidFloat(float value)
134 if(IsNAN(value)) return false;
135 if(IsIndeterminate(value)) return false;
136 if(IsPlusInf(value)) return false;
137 if(IsMinusInf(value)) return false;
141 #define CHECK_VALID_FLOAT(x) ASSERT(IsValidFloat(x));
144 //! FPU precision setting function.
145 inline_ void SetFPU()
147 // This function evaluates whether the floating-point
148 // control word is set to single precision/round to nearest/
149 // exceptions disabled. If these conditions don't hold, the
150 // function changes the control word to set them and returns
151 // TRUE, putting the old control word value in the passback
152 // location pointed to by pwOldCW.
157 if (wSave & 0x300 || // Not single mode
158 0x3f != (wSave & 0x3f) || // Exceptions enabled
159 wSave & 0xC00) // Not round to nearest mode
164 and ax, not 300h ;; single mode
165 or ax, 3fh ;; disable all exceptions
166 and ax, not 0xC00 ;; round to nearest mode
174 //! This function computes the slowest possible floating-point value (you can also directly use FLT_EPSILON)
175 inline_ float ComputeFloatEpsilon()
179 return f - 1.0f; // You can check it's the same as FLT_EPSILON
182 inline_ bool IsFloatZero(float x, float epsilon=1e-6f)
184 return x*x < epsilon;
187 #define FCOMI_ST0 _asm _emit 0xdb _asm _emit 0xf0
188 #define FCOMIP_ST0 _asm _emit 0xdf _asm _emit 0xf0
189 #define FCMOVB_ST0 _asm _emit 0xda _asm _emit 0xc0
190 #define FCMOVNB_ST0 _asm _emit 0xdb _asm _emit 0xc0
192 #define FCOMI_ST1 _asm _emit 0xdb _asm _emit 0xf1
193 #define FCOMIP_ST1 _asm _emit 0xdf _asm _emit 0xf1
194 #define FCMOVB_ST1 _asm _emit 0xda _asm _emit 0xc1
195 #define FCMOVNB_ST1 _asm _emit 0xdb _asm _emit 0xc1
197 #define FCOMI_ST2 _asm _emit 0xdb _asm _emit 0xf2
198 #define FCOMIP_ST2 _asm _emit 0xdf _asm _emit 0xf2
199 #define FCMOVB_ST2 _asm _emit 0xda _asm _emit 0xc2
200 #define FCMOVNB_ST2 _asm _emit 0xdb _asm _emit 0xc2
202 #define FCOMI_ST3 _asm _emit 0xdb _asm _emit 0xf3
203 #define FCOMIP_ST3 _asm _emit 0xdf _asm _emit 0xf3
204 #define FCMOVB_ST3 _asm _emit 0xda _asm _emit 0xc3
205 #define FCMOVNB_ST3 _asm _emit 0xdb _asm _emit 0xc3
207 #define FCOMI_ST4 _asm _emit 0xdb _asm _emit 0xf4
208 #define FCOMIP_ST4 _asm _emit 0xdf _asm _emit 0xf4
209 #define FCMOVB_ST4 _asm _emit 0xda _asm _emit 0xc4
210 #define FCMOVNB_ST4 _asm _emit 0xdb _asm _emit 0xc4
212 #define FCOMI_ST5 _asm _emit 0xdb _asm _emit 0xf5
213 #define FCOMIP_ST5 _asm _emit 0xdf _asm _emit 0xf5
214 #define FCMOVB_ST5 _asm _emit 0xda _asm _emit 0xc5
215 #define FCMOVNB_ST5 _asm _emit 0xdb _asm _emit 0xc5
217 #define FCOMI_ST6 _asm _emit 0xdb _asm _emit 0xf6
218 #define FCOMIP_ST6 _asm _emit 0xdf _asm _emit 0xf6
219 #define FCMOVB_ST6 _asm _emit 0xda _asm _emit 0xc6
220 #define FCMOVNB_ST6 _asm _emit 0xdb _asm _emit 0xc6
222 #define FCOMI_ST7 _asm _emit 0xdb _asm _emit 0xf7
223 #define FCOMIP_ST7 _asm _emit 0xdf _asm _emit 0xf7
224 #define FCMOVB_ST7 _asm _emit 0xda _asm _emit 0xc7
225 #define FCMOVNB_ST7 _asm _emit 0xdb _asm _emit 0xc7
227 //! A global function to find MAX(a,b) using FCOMI/FCMOV
228 inline_ float FCMax2(float a, float b)
240 //! A global function to find MIN(a,b) using FCOMI/FCMOV
241 inline_ float FCMin2(float a, float b)
253 //! A global function to find MAX(a,b,c) using FCOMI/FCMOV
254 inline_ float FCMax3(float a, float b, float c)
269 //! A global function to find MIN(a,b,c) using FCOMI/FCMOV
270 inline_ float FCMin3(float a, float b, float c)
285 //! A global function to find MAX(a,b,c,d) using FCOMI/FCMOV
286 inline_ float FCMax4(float a, float b, float c, float d)
305 //! A global function to find MIN(a,b,c,d) using FCOMI/FCMOV
306 inline_ float FCMin4(float a, float b, float c, float d)
325 inline_ int ConvertToSortable(float f)
328 int Fmask = (Fi>>31);
335 inline_ udword EncodeFloat(const float val)
337 // We may need to check on -0 and 0
338 // But it should make no practical difference.
341 if(ir & 0x80000000) //negative?
342 ir = ~ir;//reverse sequence of negative numbers
344 ir |= 0x80000000; // flip sign
349 inline_ float DecodeFloat(udword ir)
353 if(ir & 0x80000000) //positive?
354 rv = ir & ~0x80000000; //flip sign
356 rv = ~ir; //undo reversal
367 FPU_FORCE_DWORD = 0x7fffffff
370 FUNCTION ICECORE_API FPUMode GetFPUMode();
371 FUNCTION ICECORE_API void SaveFPU();
372 FUNCTION ICECORE_API void RestoreFPU();
373 FUNCTION ICECORE_API void SetFPUFloorMode();
374 FUNCTION ICECORE_API void SetFPUCeilMode();
375 FUNCTION ICECORE_API void SetFPUBestMode();
377 FUNCTION ICECORE_API void SetFPUPrecision24();
378 FUNCTION ICECORE_API void SetFPUPrecision53();
379 FUNCTION ICECORE_API void SetFPUPrecision64();
380 FUNCTION ICECORE_API void SetFPURoundingChop();
381 FUNCTION ICECORE_API void SetFPURoundingUp();
382 FUNCTION ICECORE_API void SetFPURoundingDown();
383 FUNCTION ICECORE_API void SetFPURoundingNear();
385 FUNCTION ICECORE_API int intChop(const float& f);
386 FUNCTION ICECORE_API int intFloor(const float& f);
387 FUNCTION ICECORE_API int intCeil(const float& f);
389 inline_ sdword MyFloor(float f)
391 return (sdword)f - (IR(f)>>31);
394 class ICECORE_API FPUGuard