From de1ed28c2f1ca789d5d167339150da676700d4bf Mon Sep 17 00:00:00 2001 From: Ruiling Song Date: Fri, 10 Jan 2014 13:39:41 +0800 Subject: [PATCH] GBE: Improve atan precision Signed-off-by: Ruiling Song Tested-by: Zhigang Gong --- backend/src/ocl_stdlib.tmpl.h | 85 ++++++++++++++++++++++++++++++++++--------- 1 file changed, 68 insertions(+), 17 deletions(-) diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h index 24613cd..ecbca20 100755 --- a/backend/src/ocl_stdlib.tmpl.h +++ b/backend/src/ocl_stdlib.tmpl.h @@ -2330,24 +2330,75 @@ INLINE_OVERLOADABLE float __gen_ocl_internal_acospi(float x) { return __gen_ocl_internal_acos(x) / M_PI_F; } INLINE_OVERLOADABLE float __gen_ocl_internal_atan(float x) { - float a = 0, c = 1; - if (x <= -1) { - a = - M_PI_2_F; - x = 1 / x; - c = -1; - } - if (x >= 1) { - a = M_PI_2_F; - x = 1 / x; - c = -1; - } - a += c*x; - int i; - int sign; - for(i=3, sign=-1; i<63; i+=2, sign=-sign) { - a += c*sign*__gen_ocl_pow(x,i)/i; + /* copied from fdlibm */ + float atanhi[4]; + atanhi[0] = 4.6364760399e-01; /* atan(0.5)hi 0x3eed6338 */ + atanhi[1] = 7.8539812565e-01; /* atan(1.0)hi 0x3f490fda */ + atanhi[2] = 9.8279368877e-01; /* atan(1.5)hi 0x3f7b985e */ + atanhi[3] = 1.5707962513e+00; /* atan(inf)hi 0x3fc90fda */ + + float atanlo[4]; + atanlo[0] = 5.0121582440e-09; /* atan(0.5)lo 0x31ac3769 */ + atanlo[1] = 3.7748947079e-08; /* atan(1.0)lo 0x33222168 */ + atanlo[2] = 3.4473217170e-08; /* atan(1.5)lo 0x33140fb4 */ + atanlo[3] = 7.5497894159e-08; /* atan(inf)lo 0x33a22168 */ + + float aT[11]; + aT[0] = 3.3333334327e-01; /* 0x3eaaaaaa */ + aT[1] = -2.0000000298e-01; /* 0xbe4ccccd */ + aT[2] = 1.4285714924e-01; /* 0x3e124925 */ + aT[3] = -1.1111110449e-01; /* 0xbde38e38 */ + aT[4] = 9.0908870101e-02; /* 0x3dba2e6e */ + aT[5] = -7.6918758452e-02; /* 0xbd9d8795 */ + aT[6] = 6.6610731184e-02; /* 0x3d886b35 */ + aT[7] = -5.8335702866e-02; /* 0xbd6ef16b */ + aT[8] = 4.9768779427e-02; /* 0x3d4bda59 */ + aT[9] = -3.6531571299e-02; /* 0xbd15a221 */ + aT[10] = 1.6285819933e-02; /* 0x3c8569d7 */ + const float one = 1.0, huge = 1.0e30; + + float w,s1,s2,z; + int ix,hx,id; + + GEN_OCL_GET_FLOAT_WORD(hx,x); + ix = hx&0x7fffffff; + if(ix>=0x50800000) { /* if |x| >= 2^34 */ + if(ix>0x7f800000) + return x+x; /* NaN */ + if(hx>0) return atanhi[3]+atanlo[3]; + else return -atanhi[3]-atanlo[3]; + } if (ix < 0x3ee00000) { /* |x| < 0.4375 */ + if (ix < 0x31000000) { /* |x| < 2^-29 */ + if(huge+x>one) return x; /* raise inexact */ + } + id = -1; + } else { + x = __gen_ocl_fabs(x); + if (ix < 0x3f980000) { /* |x| < 1.1875 */ + if (ix < 0x3f300000) { /* 7/16 <=|x|<11/16 */ + id = 0; x = ((float)2.0*x-one)/((float)2.0+x); + } else { /* 11/16<=|x|< 19/16 */ + id = 1; x = (x-one)/(x+one); + } + } else { + if (ix < 0x401c0000) { /* |x| < 2.4375 */ + id = 2; x = (x-(float)1.5)/(one+(float)1.5*x); + } else { /* 2.4375 <= |x| < 2^66 */ + id = 3; x = -(float)1.0/x; + } + }} + /* end of argument reduction */ + z = x*x; + w = z*z; + /* break sum from i=0 to 10 aT[i]z**(i+1) into odd and even poly */ + s1 = z*(aT[0]+w*(aT[2]+w*(aT[4]+w*(aT[6]+w*(aT[8]+w*aT[10]))))); + s2 = w*(aT[1]+w*(aT[3]+w*(aT[5]+w*(aT[7]+w*aT[9])))); + if (id<0) return x - x*(s1+s2); + else { + z = atanhi[id] - ((x*(s1+s2) - atanlo[id]) - x); + return (hx<0)? -z:z; } - return a; + } INLINE_OVERLOADABLE float __gen_ocl_internal_atanpi(float x) { return __gen_ocl_internal_atan(x) / M_PI_F; -- 2.7.4