From bfa6135c9e7a53d2aab21c8c3257e85eb61b1212 Mon Sep 17 00:00:00 2001 From: Zhigang Gong Date: Fri, 29 Aug 2014 10:04:38 +0800 Subject: [PATCH] GBE: fix error in the rootn fastpath function for some special input. The fastpath is to lose some accuracy but get fast speed. It is not to generate error result. The rootn has many special input and need to be taken care before we call the native pow directly. This patch fix all the pow related failures at the OpenCV 3.0 test suite. Signed-off-by: Zhigang Gong Reviewed-by: "Song, Ruiling" --- backend/src/ocl_stdlib.tmpl.h | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h index 01bcbef..2e37513 100755 --- a/backend/src/ocl_stdlib.tmpl.h +++ b/backend/src/ocl_stdlib.tmpl.h @@ -3731,7 +3731,9 @@ INLINE_OVERLOADABLE float pown(float x, int n) { return 1; return powr(x, n); } -INLINE_OVERLOADABLE float rootn(float x, int n) { + +INLINE_OVERLOADABLE float internal_rootn(float x, int n, const bool isFastpath) +{ float ax,re; int sign = 0; if( n == 0 )return NAN; @@ -3758,12 +3760,19 @@ INLINE_OVERLOADABLE float rootn(float x, int n) { ax = __gen_ocl_fabs(x); if(x <0.0f && (n&1)) sign = 1; - re = __gen_ocl_internal_pow(ax,1.f/n); + if (isFastpath) + re = __gen_ocl_pow(ax,1.f/n); + else + re = __gen_ocl_internal_pow(ax,1.f/n); if(sign) re = -re; return re; } +INLINE_OVERLOADABLE float rootn(float x, int n) { + return internal_rootn(x, n, 0); +} + ///////////////////////////////////////////////////////////////////////////// // Geometric functions (see 6.11.5 of OCL 1.1 spec) ///////////////////////////////////////////////////////////////////////////// @@ -5082,7 +5091,7 @@ INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_remainder (float x, float INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_rootn(float x, int n) { - return __gen_ocl_pow(x, 1.f / n); + return internal_rootn(x, n, 1); } INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_sin (float x) -- 2.7.4