From 4e476f3eb2111eee8f2ac9304b045dda9962bf0c Mon Sep 17 00:00:00 2001 From: Homer Hsing Date: Fri, 30 Aug 2013 15:24:42 +0800 Subject: [PATCH] improve built-in function "sinpi" "sinpi" was calculated as "sin(pi * x)". But that was not a quite-good way. This patch improved the function, also included a test case. v2: fix compiling warning Signed-off-by: Homer Hsing Reviewed-by: Zhigang Gong --- backend/src/ocl_stdlib.tmpl.h | 56 ++++++++++++++++++++++- kernels/builtin_sinpi.cl | 4 ++ utests/CMakeLists.txt | 1 + utests/builtin_sinpi.cpp | 104 ++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 164 insertions(+), 1 deletion(-) create mode 100644 kernels/builtin_sinpi.cl create mode 100644 utests/builtin_sinpi.cpp diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h index c428fac..a256a8d 100644 --- a/backend/src/ocl_stdlib.tmpl.h +++ b/backend/src/ocl_stdlib.tmpl.h @@ -579,7 +579,61 @@ INLINE_OVERLOADABLE float __gen_ocl_internal_cospi(float x) { } INLINE_OVERLOADABLE float native_sin(float x) { return __gen_ocl_sin(x); } INLINE_OVERLOADABLE float __gen_ocl_internal_sinpi(float x) { - return __gen_ocl_sin(x * M_PI_F); +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + float y, z; + int n, ix; + ix = *(int *) (&x) & 0x7fffffff; + if (ix < 0x3e800000) + return __gen_ocl_sin(M_PI_F * x); + y = -x; + z = __gen_ocl_rndd(y); + if (z != y) { + y *= 0.5f; + y = 2.f * (y - __gen_ocl_rndd(y)); + n = y * 4.f; + } else { + if (ix >= 0x4b800000) { + y = 0; + n = 0; + } else { + if (ix < 0x4b000000) + z = y + 8.3886080000e+06f; + int n = *(int *) (&z); + n &= 1; + y = n; + n <<= 2; + } + } + switch (n) { + case 0: + y = __gen_ocl_sin(M_PI_F * y); + break; + case 1: + case 2: + y = __gen_ocl_cos(M_PI_F * (0.5f - y)); + break; + case 3: + case 4: + y = __gen_ocl_sin(M_PI_F * (1.f - y)); + break; + case 5: + case 6: + y = -__gen_ocl_cos(M_PI_F * (y - 1.5f)); + break; + default: + y = __gen_ocl_sin(M_PI_F * (y - 2.f)); + break; + } + return -y; } INLINE_OVERLOADABLE float native_sqrt(float x) { return __gen_ocl_sqrt(x); } INLINE_OVERLOADABLE float native_rsqrt(float x) { return __gen_ocl_rsqrt(x); } diff --git a/kernels/builtin_sinpi.cl b/kernels/builtin_sinpi.cl new file mode 100644 index 0000000..134152d --- /dev/null +++ b/kernels/builtin_sinpi.cl @@ -0,0 +1,4 @@ +kernel void builtin_sinpi(global float *src, global float *dst) { + int i = get_global_id(0); + dst[i] = sinpi(src[i]); +}; diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt index 5a152e5..03bf7fd 100644 --- a/utests/CMakeLists.txt +++ b/utests/CMakeLists.txt @@ -113,6 +113,7 @@ set (utests_sources builtin_shuffle.cpp builtin_shuffle2.cpp builtin_sign.cpp + builtin_sinpi.cpp buildin_work_dim.cpp builtin_global_size.cpp builtin_local_size.cpp diff --git a/utests/builtin_sinpi.cpp b/utests/builtin_sinpi.cpp new file mode 100644 index 0000000..0e11a0d --- /dev/null +++ b/utests/builtin_sinpi.cpp @@ -0,0 +1,104 @@ +#include +#include "utest_helper.hpp" + +static int as_int(float x) { + union {float f; int i;} u; + u.f = x; + return u.i; +} + +static float sinpi(float x) { +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + float y, z; + int n = 0, ix; + const float pi = 3.1415927410e+00f; + + ix = as_int(x) & 0x7fffffff; + + if (ix < 0x3e800000) + return sinf(pi * x); + y = -x; + z = floorf(y); + if (z != y) { + y *= 0.5f; + y = 2.f * (y - floorf(y)); + n = y * 4.f; + } else { + if (ix >= 0x4b800000) { + y = 0; + n = 0; + } else { + if (ix < 0x4b000000) + z = y + 8.3886080000e+06f; + int n = as_int(z); + n &= 1; + y = n; + n <<= 2; + } + } + switch (n) { + case 0: + y = sinf(pi * y); + break; + case 1: + case 2: + y = cosf(pi * ((float) 0.5 - y)); + break; + case 3: + case 4: + y = sinf(pi * (1.f - y)); + break; + case 5: + case 6: + y = -cosf(pi * (y - (float) 1.5)); + break; + default: + y = sinf(pi * (y - (float) 2.0)); + break; + } + return -y; +} + +void builtin_sinpi(void) +{ + const int n = 1024; + float src[n]; + + // Setup kernel and buffers + OCL_CREATE_KERNEL("builtin_sinpi"); + OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(float), NULL); + OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(float), NULL); + OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); + OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); + globals[0] = n; + locals[0] = 16; + + for (int j = 0; j < 1000; j ++) { + OCL_MAP_BUFFER(0); + for (int i = 0; i < n; ++i) { + src[i] = ((float*)buf_data[0])[i] = (j*n + i) * 0.01f; + } + OCL_UNMAP_BUFFER(0); + + OCL_NDRANGE(1); + + OCL_MAP_BUFFER(1); + float *dst = (float*)buf_data[1]; + for (int i = 0; i < n; ++i) { + float cpu = sinpi(src[i]); + OCL_ASSERT (fabsf(cpu - dst[i]) < 1e-4); + } + OCL_UNMAP_BUFFER(1); + } +} + +MAKE_UTEST_FROM_FUNCTION(builtin_sinpi); -- 2.7.4