From 316c044e0653a21320136572f2f4bccff7e12525 Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Wed, 11 Jun 2014 18:54:43 +0400 Subject: [PATCH] used abs in reduction operations --- modules/core/src/opencl/minmaxloc.cl | 14 ++++++++--- modules/core/src/opencl/reduce.cl | 46 +++++++++++++++++++++--------------- modules/core/src/stat.cpp | 4 ++-- 3 files changed, 40 insertions(+), 24 deletions(-) diff --git a/modules/core/src/opencl/minmaxloc.cl b/modules/core/src/opencl/minmaxloc.cl index eb57347..ca708a8 100644 --- a/modules/core/src/opencl/minmaxloc.cl +++ b/modules/core/src/opencl/minmaxloc.cl @@ -39,6 +39,14 @@ #define noconvert #define INDEX_MAX UINT_MAX +#if wdepth <= 4 +#define MIN_ABS(a) convertToDT(abs(a)) +#define MIN_ABS2(a, b) convertToDT(abs_diff(a, b)) +#else +#define MIN_ABS(a) fabs(a) +#define MIN_ABS2(a, b) fabs(a - b) +#endif + #if kercn != 3 #define loadpix(addr) *(__global const srcT *)(addr) #define srcTSIZE (int)sizeof(srcT) @@ -182,7 +190,7 @@ __kernel void minmaxloc(__global const uchar * srcptr, int src_step, int src_off #endif temp = convertToDT(loadpix(srcptr + src_index)); #ifdef OP_ABS - temp = temp >= (dstT)(0) ? temp : -temp; + temp = MIN_ABS(temp); #endif #ifdef HAVE_SRC2 @@ -192,9 +200,9 @@ __kernel void minmaxloc(__global const uchar * srcptr, int src_step, int src_off src2_index = mad24(id / cols, src2_step, mul24(id % cols, srcTSIZE)); #endif temp2 = convertToDT(loadpix(src2ptr + src2_index)); - temp = temp > temp2 ? temp - temp2 : (temp2 - temp); + temp = MIN_ABS2(temp, temp2); #ifdef OP_CALC2 - temp2 = temp2 >= (dstT)(0) ? temp2 : -temp2; + temp2 = MIN_ABS(temp2); #endif #endif diff --git a/modules/core/src/opencl/reduce.cl b/modules/core/src/opencl/reduce.cl index 888b5df..8c5193f 100644 --- a/modules/core/src/opencl/reduce.cl +++ b/modules/core/src/opencl/reduce.cl @@ -108,6 +108,14 @@ #define dstTSIZE ((int)sizeof(dstT1)*3) #endif +#if ddepth <= 4 +#define SUM_ABS(a) convertToDT(abs(a)) +#define SUM_ABS2(a, b) convertToDT(abs_diff(a, b)) +#else +#define SUM_ABS(a) fabs(a) +#define SUM_ABS2(a, b) fabs(a - b) +#endif + #ifdef HAVE_MASK #ifdef HAVE_SRC2 #define EXTRA_PARAMS , __global const uchar * mask, int mask_step, int mask_offset, __global const uchar * src2ptr, int src2_step, int src2_offset @@ -136,7 +144,7 @@ #define FUNC(a, b) a += b #elif defined OP_SUM_ABS -#define FUNC(a, b) a += b >= (dstT)(0) ? b : -b +#define FUNC(a, b) a += SUM_ABS(b) #elif defined OP_SUM_SQR #if ddepth <= 4 @@ -163,15 +171,15 @@ #define PROCESS_ELEMS \ dstT temp = convertToDT(loadpix(srcptr + src_index)); \ dstT temp2 = convertToDT(loadpix(src2ptr + src2_index)); \ - temp = temp > temp2 ? temp - temp2 : (temp2 - temp); \ - temp2 = temp2 >= (dstT)(0) ? temp2 : -temp2; \ + temp = SUM_ABS2(temp, temp2); \ + temp2 = SUM_ABS(temp2); \ FUNC(accumulator2, temp2); \ FUNC(accumulator, temp) #else #define PROCESS_ELEMS \ dstT temp = convertToDT(loadpix(srcptr + src_index)); \ dstT temp2 = convertToDT(loadpix(src2ptr + src2_index)); \ - temp = temp > temp2 ? temp - temp2 : (temp2 - temp); \ + temp = SUM_ABS2(temp, temp2); \ FUNC(accumulator, temp) #endif #else @@ -255,16 +263,16 @@ #define REDUCE_GLOBAL \ dstTK temp = convertToDT(loadpix(srcptr + src_index)); \ dstTK temp2 = convertToDT(loadpix(src2ptr + src2_index)); \ - temp = temp > temp2 ? temp - temp2 : (temp2 - temp); \ - temp2 = temp2 >= (dstT)(0) ? temp2 : -temp2; \ + temp = SUM_ABS2(temp, temp2); \ + temp2 = SUM_ABS(temp2); \ FUNC(accumulator, temp); \ FUNC(accumulator2, temp2) #elif kercn == 2 #define REDUCE_GLOBAL \ dstTK temp = convertToDT(loadpix(srcptr + src_index)); \ dstTK temp2 = convertToDT(loadpix(src2ptr + src2_index)); \ - temp = temp > temp2 ? temp - temp2 : (temp2 - temp); \ - temp2 = temp2 >= (dstT)(0) ? temp2 : -temp2; \ + temp = SUM_ABS2(temp, temp2); \ + temp2 = SUM_ABS(temp2); \ FUNC(accumulator, temp.s0); \ FUNC(accumulator, temp.s1); \ FUNC(accumulator2, temp2.s0); \ @@ -273,8 +281,8 @@ #define REDUCE_GLOBAL \ dstTK temp = convertToDT(loadpix(srcptr + src_index)); \ dstTK temp2 = convertToDT(loadpix(src2ptr + src2_index)); \ - temp = temp > temp2 ? temp - temp2 : (temp2 - temp); \ - temp2 = temp2 >= (dstT)(0) ? temp2 : -temp2; \ + temp = SUM_ABS2(temp, temp2); \ + temp2 = SUM_ABS(temp2); \ FUNC(accumulator, temp.s0); \ FUNC(accumulator, temp.s1); \ FUNC(accumulator, temp.s2); \ @@ -287,8 +295,8 @@ #define REDUCE_GLOBAL \ dstTK temp = convertToDT(loadpix(srcptr + src_index)); \ dstTK temp2 = convertToDT(loadpix(src2ptr + src2_index)); \ - temp = temp > temp2 ? temp - temp2 : (temp2 - temp); \ - temp2 = temp2 >= (dstT)(0) ? temp2 : -temp2; \ + temp = SUM_ABS2(temp, temp2); \ + temp2 = SUM_ABS(temp2); \ FUNC(accumulator, temp.s0); \ FUNC(accumulator, temp.s1); \ FUNC(accumulator, temp.s2); \ @@ -309,8 +317,8 @@ #define REDUCE_GLOBAL \ dstTK temp = convertToDT(loadpix(srcptr + src_index)); \ dstTK temp2 = convertToDT(loadpix(src2ptr + src2_index)); \ - temp = temp > temp2 ? temp - temp2 : (temp2 - temp); \ - temp2 = temp2 >= (dstT)(0) ? temp2 : -temp2; \ + temp = SUM_ABS2(temp, temp2); \ + temp2 = SUM_ABS(temp2); \ FUNC(accumulator, temp.s0); \ FUNC(accumulator, temp.s1); \ FUNC(accumulator, temp.s2); \ @@ -349,20 +357,20 @@ #define REDUCE_GLOBAL \ dstTK temp = convertToDT(loadpix(srcptr + src_index)); \ dstTK temp2 = convertToDT(loadpix(src2ptr + src2_index)); \ - temp = temp > temp2 ? temp - temp2 : (temp2 - temp); \ + temp = SUM_ABS2(temp, temp2); \ FUNC(accumulator, temp) #elif kercn == 2 #define REDUCE_GLOBAL \ dstTK temp = convertToDT(loadpix(srcptr + src_index)); \ dstTK temp2 = convertToDT(loadpix(src2ptr + src2_index)); \ - temp = temp > temp2 ? temp - temp2 : (temp2 - temp); \ + temp = SUM_ABS2(temp, temp2); \ FUNC(accumulator, temp.s0); \ FUNC(accumulator, temp.s1) #elif kercn == 4 #define REDUCE_GLOBAL \ dstTK temp = convertToDT(loadpix(srcptr + src_index)); \ dstTK temp2 = convertToDT(loadpix(src2ptr + src2_index)); \ - temp = temp > temp2 ? temp - temp2 : (temp2 - temp); \ + temp = SUM_ABS2(temp, temp2); \ FUNC(accumulator, temp.s0); \ FUNC(accumulator, temp.s1); \ FUNC(accumulator, temp.s2); \ @@ -371,7 +379,7 @@ #define REDUCE_GLOBAL \ dstTK temp = convertToDT(loadpix(srcptr + src_index)); \ dstTK temp2 = convertToDT(loadpix(src2ptr + src2_index)); \ - temp = temp > temp2 ? temp - temp2 : (temp2 - temp); \ + temp = SUM_ABS2(temp, temp2)); \ FUNC(accumulator, temp.s0); \ FUNC(accumulator, temp.s1); \ FUNC(accumulator, temp.s2); \ @@ -384,7 +392,7 @@ #define REDUCE_GLOBAL \ dstTK temp = convertToDT(loadpix(srcptr + src_index)); \ dstTK temp2 = convertToDT(loadpix(src2ptr + src2_index)); \ - temp = temp > temp2 ? temp - temp2 : (temp2 - temp); \ + temp = SUM_ABS2(temp, temp2); \ FUNC(accumulator, temp.s0); \ FUNC(accumulator, temp.s1); \ FUNC(accumulator, temp.s2); \ diff --git a/modules/core/src/stat.cpp b/modules/core/src/stat.cpp index 79da3c6..eb13247 100644 --- a/modules/core/src/stat.cpp +++ b/modules/core/src/stat.cpp @@ -1471,7 +1471,7 @@ static bool ocl_minMaxIdx( InputArray _src, double* minVal, double* maxVal, int* char cvt[40]; String opts = format("-D DEPTH_%d -D srcT1=%s%s -D WGS=%d -D srcT=%s" " -D WGS2_ALIGNED=%d%s%s%s -D kercn=%d%s%s%s%s" - " -D dstT1=%s -D dstT=%s -D convertToDT=%s%s%s%s%s", + " -D dstT1=%s -D dstT=%s -D convertToDT=%s%s%s%s%s -D wdepth=%d", depth, ocl::typeToStr(depth), haveMask ? " -D HAVE_MASK" : "", (int)wgs, ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)), wgs2_aligned, doubleSupport ? " -D DOUBLE_SUPPORT" : "", @@ -1482,7 +1482,7 @@ static bool ocl_minMaxIdx( InputArray _src, double* minVal, double* maxVal, int* ocl::typeToStr(ddepth), ocl::typeToStr(CV_MAKE_TYPE(ddepth, kercn)), ocl::convertTypeStr(depth, ddepth, kercn, cvt), absValues ? " -D OP_ABS" : "", haveSrc2 ? " -D HAVE_SRC2" : "", maxVal2 ? " -D OP_CALC2" : "", - haveSrc2 && _src2.isContinuous() ? " -D HAVE_SRC2_CONT" : ""); + haveSrc2 && _src2.isContinuous() ? " -D HAVE_SRC2_CONT" : "", ddepth); ocl::Kernel k("minmaxloc", ocl::core::minmaxloc_oclsrc, opts); if (k.empty()) -- 2.7.4