From fa95d68378b7ae03d91fad41e3ec02733253445b Mon Sep 17 00:00:00 2001 From: Ruiling Song Date: Fri, 10 Jan 2014 13:39:42 +0800 Subject: [PATCH] GBE: Improve precision of atan2 Signed-off-by: Ruiling Song Tested-by: Zhigang Gong --- backend/src/ocl_stdlib.tmpl.h | 95 +++++++++++++++++++++++++++++++++++-------- 1 file changed, 78 insertions(+), 17 deletions(-) diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h index ecbca20..ca53ab8 100755 --- a/backend/src/ocl_stdlib.tmpl.h +++ b/backend/src/ocl_stdlib.tmpl.h @@ -2414,25 +2414,86 @@ INLINE_OVERLOADABLE float __gen_ocl_internal_erfc(float x) { #define sqrt native_sqrt INLINE_OVERLOADABLE float rsqrt(float x) { return native_rsqrt(x); } INLINE_OVERLOADABLE float __gen_ocl_internal_atan2(float y, float x) { - uint hx = *(uint *)(&x), ix = hx & 0x7FFFFFFF; - uint hy = *(uint *)(&y), iy = hy & 0x7FFFFFFF; - if (ix > 0x7F800000 || iy > 0x7F800000) - return nan(0u); - if (ix == 0) { - if (y > 0) - return M_PI_2_F; - if (y < 0) - return - M_PI_2_F; - return nan(0u); - } else { - float z = __gen_ocl_internal_atan(y / x); - if (x > 0) - return z; - if (y >= 0) - return M_PI_F + z; - return - M_PI_F + z; + /* copied from fdlibm */ + float z; + int k,m,hx,hy,ix,iy; + const float + tiny = 1.0e-30, + zero = 0.0, + pi_o_4 = 7.8539818525e-01, /* 0x3f490fdb */ + pi_o_2 = 1.5707963705e+00, /* 0x3fc90fdb */ + pi = 3.1415927410e+00, /* 0x40490fdb */ + pi_lo = -8.7422776573e-08; /* 0xb3bbbd2e */ + + GEN_OCL_GET_FLOAT_WORD(hx,x); + ix = hx&0x7fffffff; + GEN_OCL_GET_FLOAT_WORD(hy,y); + iy = hy&0x7fffffff; + + if((ix>0x7f800000)|| + (iy>0x7f800000)) /* x or y is NaN */ + return x+y; + if(hx==0x3f800000) return z=__gen_ocl_internal_atan(y); /* x=1.0 */ + m = ((hy>>31)&1)|((hx>>30)&2); /* 2*sign(x)+sign(y) */ + + /* when y = 0 */ + if(iy==0) { + switch(m) { + case 0: + case 1: return y; /* atan(+-0,+anything)=+-0 */ + case 2: return pi+tiny;/* atan(+0,-anything) = pi */ + case 3: return -pi-tiny;/* atan(-0,-anything) =-pi */ + } + } + /* when x = 0 */ + if(ix==0) return (hy<0)? -pi_o_2-tiny: pi_o_2+tiny; + + /* both are denorms. Gen does not support denorm, so we convert to normal float number*/ + if(ix <= 0x7fffff && iy <= 0x7fffff) { + x = (float)(ix) * (1.0f - ((hx>>30) & 0x2)); + y = (float)(iy) * (1.0f - ((hy>>30) & 0x2)); + } + + /* when x is INF */ + if(ix==0x7f800000) { + if(iy==0x7f800000) { + switch(m) { + case 0: return pi_o_4+tiny;/* atan(+INF,+INF) */ + case 1: return -pi_o_4-tiny;/* atan(-INF,+INF) */ + case 2: return (float)3.0*pi_o_4+tiny;/*atan(+INF,-INF)*/ + case 3: return (float)-3.0*pi_o_4-tiny;/*atan(-INF,-INF)*/ + } + } else { + switch(m) { + case 0: return zero ; /* atan(+...,+INF) */ + case 1: return -zero ; /* atan(-...,+INF) */ + case 2: return pi+tiny ; /* atan(+...,-INF) */ + case 3: return -pi-tiny ; /* atan(-...,-INF) */ + } + } + } + /* when y is INF */ + if(iy==0x7f800000) return (hy<0)? -pi_o_2-tiny: pi_o_2+tiny; + + /* compute y/x */ + k = (iy-ix)>>23; + if(k > 60) z=pi_o_2+(float)0.5*pi_lo; /* |y/x| > 2**60 */ + else if(hx<0&&k<-60) z=0.0; /* |y|/x < -2**60 */ + else z=__gen_ocl_internal_atan(__gen_ocl_fabs(y/x)); /* safe to do y/x */ + switch (m) { + case 0: return z ; /* atan(+,+) */ + case 1: { + uint zh; + GEN_OCL_GET_FLOAT_WORD(zh,z); + GEN_OCL_SET_FLOAT_WORD(z,zh ^ 0x80000000); + } + return z ; /* atan(-,+) */ + case 2: return pi-(z-pi_lo);/* atan(+,-) */ + default: /* case 3 */ + return (z-pi_lo)-pi;/* atan(-,-) */ } } + INLINE_OVERLOADABLE float __gen_ocl_internal_atan2pi(float y, float x) { uint ix = as_uint(x), iy = as_uint(y), pos_zero = 0, neg_zero = 0x80000000u, -- 2.7.4