#define dstTSIZE ((int)sizeof(dstT1)*3)
#endif
+#if ddepth <= 4
+#define SUM_ABS(a) convertToDT(abs(a))
+#define SUM_ABS2(a, b) convertToDT(abs_diff(a, b))
+#else
+#define SUM_ABS(a) fabs(a)
+#define SUM_ABS2(a, b) fabs(a - b)
+#endif
+
#ifdef HAVE_MASK
#ifdef HAVE_SRC2
#define EXTRA_PARAMS , __global const uchar * mask, int mask_step, int mask_offset, __global const uchar * src2ptr, int src2_step, int src2_offset
#define FUNC(a, b) a += b
#elif defined OP_SUM_ABS
-#define FUNC(a, b) a += b >= (dstT)(0) ? b : -b
+#define FUNC(a, b) a += SUM_ABS(b)
#elif defined OP_SUM_SQR
#if ddepth <= 4
#define PROCESS_ELEMS \
dstT temp = convertToDT(loadpix(srcptr + src_index)); \
dstT temp2 = convertToDT(loadpix(src2ptr + src2_index)); \
- temp = temp > temp2 ? temp - temp2 : (temp2 - temp); \
- temp2 = temp2 >= (dstT)(0) ? temp2 : -temp2; \
+ temp = SUM_ABS2(temp, temp2); \
+ temp2 = SUM_ABS(temp2); \
FUNC(accumulator2, temp2); \
FUNC(accumulator, temp)
#else
#define PROCESS_ELEMS \
dstT temp = convertToDT(loadpix(srcptr + src_index)); \
dstT temp2 = convertToDT(loadpix(src2ptr + src2_index)); \
- temp = temp > temp2 ? temp - temp2 : (temp2 - temp); \
+ temp = SUM_ABS2(temp, temp2); \
FUNC(accumulator, temp)
#endif
#else
#define REDUCE_GLOBAL \
dstTK temp = convertToDT(loadpix(srcptr + src_index)); \
dstTK temp2 = convertToDT(loadpix(src2ptr + src2_index)); \
- temp = temp > temp2 ? temp - temp2 : (temp2 - temp); \
- temp2 = temp2 >= (dstT)(0) ? temp2 : -temp2; \
+ temp = SUM_ABS2(temp, temp2); \
+ temp2 = SUM_ABS(temp2); \
FUNC(accumulator, temp); \
FUNC(accumulator2, temp2)
#elif kercn == 2
#define REDUCE_GLOBAL \
dstTK temp = convertToDT(loadpix(srcptr + src_index)); \
dstTK temp2 = convertToDT(loadpix(src2ptr + src2_index)); \
- temp = temp > temp2 ? temp - temp2 : (temp2 - temp); \
- temp2 = temp2 >= (dstT)(0) ? temp2 : -temp2; \
+ temp = SUM_ABS2(temp, temp2); \
+ temp2 = SUM_ABS(temp2); \
FUNC(accumulator, temp.s0); \
FUNC(accumulator, temp.s1); \
FUNC(accumulator2, temp2.s0); \
#define REDUCE_GLOBAL \
dstTK temp = convertToDT(loadpix(srcptr + src_index)); \
dstTK temp2 = convertToDT(loadpix(src2ptr + src2_index)); \
- temp = temp > temp2 ? temp - temp2 : (temp2 - temp); \
- temp2 = temp2 >= (dstT)(0) ? temp2 : -temp2; \
+ temp = SUM_ABS2(temp, temp2); \
+ temp2 = SUM_ABS(temp2); \
FUNC(accumulator, temp.s0); \
FUNC(accumulator, temp.s1); \
FUNC(accumulator, temp.s2); \
#define REDUCE_GLOBAL \
dstTK temp = convertToDT(loadpix(srcptr + src_index)); \
dstTK temp2 = convertToDT(loadpix(src2ptr + src2_index)); \
- temp = temp > temp2 ? temp - temp2 : (temp2 - temp); \
- temp2 = temp2 >= (dstT)(0) ? temp2 : -temp2; \
+ temp = SUM_ABS2(temp, temp2); \
+ temp2 = SUM_ABS(temp2); \
FUNC(accumulator, temp.s0); \
FUNC(accumulator, temp.s1); \
FUNC(accumulator, temp.s2); \
#define REDUCE_GLOBAL \
dstTK temp = convertToDT(loadpix(srcptr + src_index)); \
dstTK temp2 = convertToDT(loadpix(src2ptr + src2_index)); \
- temp = temp > temp2 ? temp - temp2 : (temp2 - temp); \
- temp2 = temp2 >= (dstT)(0) ? temp2 : -temp2; \
+ temp = SUM_ABS2(temp, temp2); \
+ temp2 = SUM_ABS(temp2); \
FUNC(accumulator, temp.s0); \
FUNC(accumulator, temp.s1); \
FUNC(accumulator, temp.s2); \
#define REDUCE_GLOBAL \
dstTK temp = convertToDT(loadpix(srcptr + src_index)); \
dstTK temp2 = convertToDT(loadpix(src2ptr + src2_index)); \
- temp = temp > temp2 ? temp - temp2 : (temp2 - temp); \
+ temp = SUM_ABS2(temp, temp2); \
FUNC(accumulator, temp)
#elif kercn == 2
#define REDUCE_GLOBAL \
dstTK temp = convertToDT(loadpix(srcptr + src_index)); \
dstTK temp2 = convertToDT(loadpix(src2ptr + src2_index)); \
- temp = temp > temp2 ? temp - temp2 : (temp2 - temp); \
+ temp = SUM_ABS2(temp, temp2); \
FUNC(accumulator, temp.s0); \
FUNC(accumulator, temp.s1)
#elif kercn == 4
#define REDUCE_GLOBAL \
dstTK temp = convertToDT(loadpix(srcptr + src_index)); \
dstTK temp2 = convertToDT(loadpix(src2ptr + src2_index)); \
- temp = temp > temp2 ? temp - temp2 : (temp2 - temp); \
+ temp = SUM_ABS2(temp, temp2); \
FUNC(accumulator, temp.s0); \
FUNC(accumulator, temp.s1); \
FUNC(accumulator, temp.s2); \
#define REDUCE_GLOBAL \
dstTK temp = convertToDT(loadpix(srcptr + src_index)); \
dstTK temp2 = convertToDT(loadpix(src2ptr + src2_index)); \
- temp = temp > temp2 ? temp - temp2 : (temp2 - temp); \
+ temp = SUM_ABS2(temp, temp2)); \
FUNC(accumulator, temp.s0); \
FUNC(accumulator, temp.s1); \
FUNC(accumulator, temp.s2); \
#define REDUCE_GLOBAL \
dstTK temp = convertToDT(loadpix(srcptr + src_index)); \
dstTK temp2 = convertToDT(loadpix(src2ptr + src2_index)); \
- temp = temp > temp2 ? temp - temp2 : (temp2 - temp); \
+ temp = SUM_ABS2(temp, temp2); \
FUNC(accumulator, temp.s0); \
FUNC(accumulator, temp.s1); \
FUNC(accumulator, temp.s2); \
char cvt[40];
String opts = format("-D DEPTH_%d -D srcT1=%s%s -D WGS=%d -D srcT=%s"
" -D WGS2_ALIGNED=%d%s%s%s -D kercn=%d%s%s%s%s"
- " -D dstT1=%s -D dstT=%s -D convertToDT=%s%s%s%s%s",
+ " -D dstT1=%s -D dstT=%s -D convertToDT=%s%s%s%s%s -D wdepth=%d",
depth, ocl::typeToStr(depth), haveMask ? " -D HAVE_MASK" : "", (int)wgs,
ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)), wgs2_aligned,
doubleSupport ? " -D DOUBLE_SUPPORT" : "",
ocl::typeToStr(ddepth), ocl::typeToStr(CV_MAKE_TYPE(ddepth, kercn)),
ocl::convertTypeStr(depth, ddepth, kercn, cvt), absValues ? " -D OP_ABS" : "",
haveSrc2 ? " -D HAVE_SRC2" : "", maxVal2 ? " -D OP_CALC2" : "",
- haveSrc2 && _src2.isContinuous() ? " -D HAVE_SRC2_CONT" : "");
+ haveSrc2 && _src2.isContinuous() ? " -D HAVE_SRC2_CONT" : "", ddepth);
ocl::Kernel k("minmaxloc", ocl::core::minmaxloc_oclsrc, opts);
if (k.empty())