From: Homer Hsing Date: Thu, 29 Aug 2013 05:41:24 +0000 (+0800) Subject: add built-in function "atan2" X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=d02d0ebca3bc8d05eb9784ffaf1019d805a98790;p=contrib%2Fbeignet.git add built-in function "atan2" also improve the accuracy of built-in function "atan" also add a test case Signed-off-by: Homer Hsing Reviewed-by: "Yang, Rong R" --- diff --git a/backend/src/builtin_vector_proto.def b/backend/src/builtin_vector_proto.def index 37206a6..23ed8d3 100644 --- a/backend/src/builtin_vector_proto.def +++ b/backend/src/builtin_vector_proto.def @@ -6,8 +6,7 @@ gentype asin (gentype) gentype asinh (gentype) gentype asinpi (gentype x) gentype atan (gentype y_over_x) -# XXX atan2 is a builtin function -#gentype atan2 (gentype y, gentype x) +gentype atan2 (gentype y, gentype x) gentype atanh (gentype) gentype atanpi (gentype x) #gentype atan2pi (gentype y, gentype x) diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h index 939d5be..b76f5c0 100644 --- a/backend/src/ocl_stdlib.tmpl.h +++ b/backend/src/ocl_stdlib.tmpl.h @@ -1331,7 +1331,13 @@ INLINE_OVERLOADABLE float __gen_ocl_internal_atan(float x) { x = 1 / x; c = -1; } - return a + c * (x - __gen_ocl_pow(x, 3) / 3 + __gen_ocl_pow(x, 5) / 5 - __gen_ocl_pow(x, 7) / 7 + __gen_ocl_pow(x, 9) / 9 - __gen_ocl_pow(x, 11) / 11); + a += c*x; + int i; + int sign; + for(i=3, sign=-1; i<63; i+=2, sign=-sign) { + a += c*sign*__gen_ocl_pow(x,i)/i; + } + return a; } INLINE_OVERLOADABLE float __gen_ocl_internal_atanpi(float x) { return __gen_ocl_internal_atan(x) / M_PI_F; @@ -1358,6 +1364,26 @@ INLINE_OVERLOADABLE float __gen_ocl_internal_erfc(float x) { // XXX work-around PTX profile #define sqrt native_sqrt INLINE_OVERLOADABLE float rsqrt(float x) { return native_rsqrt(x); } +INLINE_OVERLOADABLE float __gen_ocl_internal_atan2(float y, float x) { + uint hx = *(uint *)(&x), ix = hx & 0x7FFFFFFF; + uint hy = *(uint *)(&y), iy = hy & 0x7FFFFFFF; + if (ix > 0x7F800000 || iy > 0x7F800000) + return nan(0u); + if (ix == 0) { + if (y > 0) + return M_PI_2_F; + if (y < 0) + return - M_PI_2_F; + return nan(0u); + } else { + float z = __gen_ocl_internal_atan(y / x); + if (x > 0) + return z; + if (y >= 0) + return M_PI_F + z; + return - M_PI_F + z; + } +} INLINE_OVERLOADABLE float __gen_ocl_internal_fabs(float x) { return __gen_ocl_fabs(x); } INLINE_OVERLOADABLE float __gen_ocl_internal_trunc(float x) { return __gen_ocl_rndz(x); } INLINE_OVERLOADABLE float __gen_ocl_internal_round(float x) { return __gen_ocl_rnde(x); } @@ -1390,6 +1416,7 @@ INLINE_OVERLOADABLE float __gen_ocl_internal_rint(float x) { #define tanpi __gen_ocl_internal_tanpi #define tanh __gen_ocl_internal_tanh #define atan __gen_ocl_internal_atan +#define atan2 __gen_ocl_internal_atan2 #define atanpi __gen_ocl_internal_atanpi #define atanh __gen_ocl_internal_atanh #define pow powr diff --git a/kernels/builtin_atan2.cl b/kernels/builtin_atan2.cl new file mode 100644 index 0000000..aba73be --- /dev/null +++ b/kernels/builtin_atan2.cl @@ -0,0 +1,4 @@ +kernel void builtin_atan2(global float *y, global float *x, global float *dst) { + int i = get_global_id(0); + dst[i] = atan2(y[i], x[i]); +}; diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt index cd4ca16..ffabc39 100644 --- a/utests/CMakeLists.txt +++ b/utests/CMakeLists.txt @@ -104,6 +104,7 @@ set (utests_sources compiler_vector_load_store.cpp compiler_cl_finish.cpp get_cl_info.cpp + builtin_atan2.cpp builtin_bitselect.cpp builtin_frexp.cpp builtin_mad_sat.cpp diff --git a/utests/builtin_atan2.cpp b/utests/builtin_atan2.cpp new file mode 100644 index 0000000..29dd7b4 --- /dev/null +++ b/utests/builtin_atan2.cpp @@ -0,0 +1,43 @@ +#include +#include "utest_helper.hpp" + +void builtin_atan2(void) { + const int n = 1024; + float y[n], x[n]; + + // Setup kernel and buffers + OCL_CREATE_KERNEL("builtin_atan2"); + OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(float), NULL); + OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(float), NULL); + OCL_CREATE_BUFFER(buf[2], 0, n * sizeof(float), NULL); + OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); + OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); + OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]); + globals[0] = n; + locals[0] = 16; + + OCL_MAP_BUFFER(0); + OCL_MAP_BUFFER(1); + for (int i = 0; i < n; ++i) { + y[i] = ((float*) buf_data[0])[i] = (rand()&255) * 0.01f; + x[i] = ((float*) buf_data[1])[i] = (rand()&255) * 0.01f; + } + OCL_UNMAP_BUFFER(0); + OCL_UNMAP_BUFFER(1); + + OCL_NDRANGE(1); + + OCL_MAP_BUFFER(2); + float *dst = (float*) buf_data[2]; + for (int i = 0; i < n; ++i) { + float cpu = atan2f(y[i], x[i]); + float gpu = dst[i]; + if (fabsf(cpu - gpu) >= 1e-2) { + printf("%f %f %f %f\n", y[i], x[i], cpu, gpu); + OCL_ASSERT(0); + } + } + OCL_UNMAP_BUFFER(2); +} + +MAKE_UTEST_FROM_FUNCTION (builtin_atan2);