From 0528d2e2b3fb461a1ae81cf3bb02df1e84c038db Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Wed, 11 Jun 2014 19:30:10 +0400 Subject: [PATCH] added 32s to 32u conversion --- modules/core/src/arithm.cpp | 6 ++++-- modules/core/src/ocl.cpp | 4 ++-- modules/core/src/opencl/arithm.cl | 16 ++++++++-------- modules/core/src/opencl/minmaxloc.cl | 4 ++-- modules/core/src/opencl/reduce.cl | 4 ++-- modules/core/src/stat.cpp | 19 +++++++++++-------- 6 files changed, 29 insertions(+), 24 deletions(-) diff --git a/modules/core/src/arithm.cpp b/modules/core/src/arithm.cpp index 98d9567..b969ad4 100644 --- a/modules/core/src/arithm.cpp +++ b/modules/core/src/arithm.cpp @@ -1396,7 +1396,7 @@ static bool ocl_arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst, char cvtstr[4][32], opts[1024]; sprintf(opts, "-D %s%s -D %s -D srcT1=%s -D srcT1_C1=%s -D srcT2=%s -D srcT2_C1=%s " "-D dstT=%s -D dstT_C1=%s -D workT=%s -D workST=%s -D scaleT=%s -D wdepth=%d -D convertToWT1=%s " - "-D convertToWT2=%s -D convertToDT=%s%s -D cn=%d -D rowsPerWI=%d", + "-D convertToWT2=%s -D convertToDT=%s%s -D cn=%d -D rowsPerWI=%d -D convertFromU=%s", (haveMask ? "MASK_" : ""), (haveScalar ? "UNARY_OP" : "BINARY_OP"), oclop2str[oclop], ocl::typeToStr(CV_MAKETYPE(depth1, kercn)), ocl::typeToStr(depth1), ocl::typeToStr(CV_MAKETYPE(depth2, kercn)), @@ -1407,7 +1407,9 @@ static bool ocl_arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst, ocl::convertTypeStr(depth1, wdepth, kercn, cvtstr[0]), ocl::convertTypeStr(depth2, wdepth, kercn, cvtstr[1]), ocl::convertTypeStr(wdepth, ddepth, kercn, cvtstr[2]), - doubleSupport ? " -D DOUBLE_SUPPORT" : "", kercn, rowsPerWI); + doubleSupport ? " -D DOUBLE_SUPPORT" : "", kercn, rowsPerWI, + oclop == OCL_OP_ABSDIFF && wdepth == CV_32S && ddepth == wdepth ? + ocl::convertTypeStr(CV_8U, ddepth, kercn, cvtstr[3]) : "noconvert"); size_t usrdata_esz = CV_ELEM_SIZE(wdepth); const uchar* usrdata_p = (const uchar*)usrdata; diff --git a/modules/core/src/ocl.cpp b/modules/core/src/ocl.cpp index b580df1..24ca6ee 100644 --- a/modules/core/src/ocl.cpp +++ b/modules/core/src/ocl.cpp @@ -4591,7 +4591,7 @@ struct Image2D::Impl CV_OclDbgAssert(err == CL_SUCCESS); size_t origin[] = { 0, 0, 0 }; - size_t region[] = { src.cols, src.rows, 1 }; + size_t region[] = { static_cast(src.cols), static_cast(src.rows), 1 }; cl_mem devData; if (!alias && !src.isContinuous()) @@ -4599,7 +4599,7 @@ struct Image2D::Impl devData = clCreateBuffer(context, CL_MEM_READ_ONLY, src.cols * src.rows * src.elemSize(), NULL, &err); CV_OclDbgAssert(err == CL_SUCCESS); - const size_t roi[3] = {src.cols * src.elemSize(), src.rows, 1}; + const size_t roi[3] = {static_cast(src.cols) * src.elemSize(), static_cast(src.rows), 1}; CV_Assert(clEnqueueCopyBufferRect(queue, (cl_mem)src.handle(ACCESS_READ), devData, origin, origin, roi, src.step, 0, src.cols * src.elemSize(), 0, 0, NULL, NULL) == CL_SUCCESS); CV_OclDbgAssert(clFlush(queue) == CL_SUCCESS); diff --git a/modules/core/src/opencl/arithm.cl b/modules/core/src/opencl/arithm.cl index 8945ed4..9dfb5f2 100644 --- a/modules/core/src/opencl/arithm.cl +++ b/modules/core/src/opencl/arithm.cl @@ -66,9 +66,9 @@ #endif #ifdef INTEL_DEVICE -#pragma OPENCL FP_CONTRACT : on -#pragma OPENCL FP_FAST_FMAF : on -#pragma OPENCL FP_FAST_FMA : on +#pragma OPENCL FP_CONTRACT ON +#pragma OPENCL FP_FAST_FMAF ON +#pragma OPENCL FP_FAST_FMA ON #endif #if depth <= 5 @@ -165,7 +165,7 @@ #elif defined OP_ABSDIFF #if wdepth <= 4 #define PROCESS_ELEM \ - storedst(convertToDT(abs_diff(srcelem1, srcelem2))) + storedst(convertToDT(convertFromU(abs_diff(srcelem1, srcelem2)))) #else #define PROCESS_ELEM \ storedst(convertToDT(fabs(srcelem1 - srcelem2))) @@ -247,7 +247,7 @@ #if wdepth <= 4 #define PROCESS_ELEM storedst(convertToDT(mad24(srcelem1, alpha, mad24(srcelem2, beta, gamma)))) #else -#define PROCESS_ELEM storedst(convertToDT(fma(srcelem1, alpha, mad(srcelem2, beta, gamma)))) +#define PROCESS_ELEM storedst(convertToDT(fma(srcelem1, alpha, fma(srcelem2, beta, gamma)))) #endif #elif defined OP_MAG @@ -257,7 +257,7 @@ #define PROCESS_ELEM \ workT tmp = atan2(srcelem2, srcelem1); \ if (tmp < 0) \ - tmp += 6.283185307179586232f; \ + tmp += 2 * CV_PI; \ storedst(tmp) #elif defined OP_PHASE_DEGREES @@ -295,7 +295,7 @@ #define convertToWT1 #endif #define PROCESS_ELEM \ - storedst(srcelem1 CMP_OPERATOR srcelem2 ? (dstT)(255) : (dstT)(0))) + storedst(srcelem1 CMP_OPERATOR srcelem2 ? (dstT)(255) : (dstT)(0)) #elif defined OP_CONVERT_SCALE_ABS #undef EXTRA_PARAMS @@ -351,7 +351,7 @@ #define PROCESS_ELEM \ dstT x = srcelem1, y = srcelem2, cosval; \ FROM_DEGREE; \ - storedst2(sincos(y, &srcelem2) * x); \ + storedst2(sincos(y, &cosval) * x); \ storedst(cosval * x); #elif defined OP_PATCH_NANS diff --git a/modules/core/src/opencl/minmaxloc.cl b/modules/core/src/opencl/minmaxloc.cl index ca708a8..a51c5d9 100644 --- a/modules/core/src/opencl/minmaxloc.cl +++ b/modules/core/src/opencl/minmaxloc.cl @@ -40,8 +40,8 @@ #define INDEX_MAX UINT_MAX #if wdepth <= 4 -#define MIN_ABS(a) convertToDT(abs(a)) -#define MIN_ABS2(a, b) convertToDT(abs_diff(a, b)) +#define MIN_ABS(a) convertFromU(abs(a)) +#define MIN_ABS2(a, b) convertFromU(abs_diff(a, b)) #else #define MIN_ABS(a) fabs(a) #define MIN_ABS2(a, b) fabs(a - b) diff --git a/modules/core/src/opencl/reduce.cl b/modules/core/src/opencl/reduce.cl index 8c5193f..f16a742 100644 --- a/modules/core/src/opencl/reduce.cl +++ b/modules/core/src/opencl/reduce.cl @@ -109,8 +109,8 @@ #endif #if ddepth <= 4 -#define SUM_ABS(a) convertToDT(abs(a)) -#define SUM_ABS2(a, b) convertToDT(abs_diff(a, b)) +#define SUM_ABS(a) convertFromU(abs(a)) +#define SUM_ABS2(a, b) convertFromU(abs_diff(a, b)) #else #define SUM_ABS(a) fabs(a) #define SUM_ABS2(a, b) fabs(a - b) diff --git a/modules/core/src/stat.cpp b/modules/core/src/stat.cpp index eb13247..3dd0428 100644 --- a/modules/core/src/stat.cpp +++ b/modules/core/src/stat.cpp @@ -499,20 +499,21 @@ static bool ocl_sum( InputArray _src, Scalar & res, int sum_op, InputArray _mask wgs2_aligned >>= 1; static const char * const opMap[3] = { "OP_SUM", "OP_SUM_ABS", "OP_SUM_SQR" }; - char cvt[40]; + char cvt[2][40]; String opts = format("-D srcT=%s -D srcT1=%s -D dstT=%s -D dstTK=%s -D dstT1=%s -D ddepth=%d -D cn=%d" - " -D convertToDT=%s -D %s -D WGS=%d -D WGS2_ALIGNED=%d%s%s%s%s -D kercn=%d%s%s%s", + " -D convertToDT=%s -D %s -D WGS=%d -D WGS2_ALIGNED=%d%s%s%s%s -D kercn=%d%s%s%s -D convertFromU=%s", ocl::typeToStr(CV_MAKE_TYPE(depth, mcn)), ocl::typeToStr(depth), ocl::typeToStr(dtype), ocl::typeToStr(CV_MAKE_TYPE(ddepth, mcn)), ocl::typeToStr(ddepth), ddepth, cn, - ocl::convertTypeStr(depth, ddepth, mcn, cvt), + ocl::convertTypeStr(depth, ddepth, mcn, cvt[0]), opMap[sum_op], (int)wgs, wgs2_aligned, doubleSupport ? " -D DOUBLE_SUPPORT" : "", haveMask ? " -D HAVE_MASK" : "", _src.isContinuous() ? " -D HAVE_SRC_CONT" : "", haveMask && _mask.isContinuous() ? " -D HAVE_MASK_CONT" : "", kercn, haveSrc2 ? " -D HAVE_SRC2" : "", calc2 ? " -D OP_CALC2" : "", - haveSrc2 && _src2.isContinuous() ? " -D HAVE_SRC2_CONT" : ""); + haveSrc2 && _src2.isContinuous() ? " -D HAVE_SRC2_CONT" : "", + depth <= CV_32S && ddepth == CV_32S ? ocl::convertTypeStr(CV_8U, ddepth, mcn, cvt[1]) : "noconvert"); ocl::Kernel k("reduce", ocl::core::reduce_oclsrc, opts); if (k.empty()) @@ -1468,10 +1469,10 @@ static bool ocl_minMaxIdx( InputArray _src, double* minVal, double* maxVal, int* needMaxLoc = true; } - char cvt[40]; + char cvt[2][40]; String opts = format("-D DEPTH_%d -D srcT1=%s%s -D WGS=%d -D srcT=%s" " -D WGS2_ALIGNED=%d%s%s%s -D kercn=%d%s%s%s%s" - " -D dstT1=%s -D dstT=%s -D convertToDT=%s%s%s%s%s -D wdepth=%d", + " -D dstT1=%s -D dstT=%s -D convertToDT=%s%s%s%s%s -D wdepth=%d -D convertFromU=%s", depth, ocl::typeToStr(depth), haveMask ? " -D HAVE_MASK" : "", (int)wgs, ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)), wgs2_aligned, doubleSupport ? " -D DOUBLE_SUPPORT" : "", @@ -1480,9 +1481,11 @@ static bool ocl_minMaxIdx( InputArray _src, double* minVal, double* maxVal, int* needMinVal ? " -D NEED_MINVAL" : "", needMaxVal ? " -D NEED_MAXVAL" : "", needMinLoc ? " -D NEED_MINLOC" : "", needMaxLoc ? " -D NEED_MAXLOC" : "", ocl::typeToStr(ddepth), ocl::typeToStr(CV_MAKE_TYPE(ddepth, kercn)), - ocl::convertTypeStr(depth, ddepth, kercn, cvt), absValues ? " -D OP_ABS" : "", + ocl::convertTypeStr(depth, ddepth, kercn, cvt[0]), + absValues ? " -D OP_ABS" : "", haveSrc2 ? " -D HAVE_SRC2" : "", maxVal2 ? " -D OP_CALC2" : "", - haveSrc2 && _src2.isContinuous() ? " -D HAVE_SRC2_CONT" : "", ddepth); + haveSrc2 && _src2.isContinuous() ? " -D HAVE_SRC2_CONT" : "", ddepth, + depth <= CV_32S && ddepth == CV_32S ? ocl::convertTypeStr(CV_8U, ddepth, kercn, cvt[1]) : "noconvert"); ocl::Kernel k("minmaxloc", ocl::core::minmaxloc_oclsrc, opts); if (k.empty()) -- 2.7.4