From eb8b4c58c7c717a3c2f03c22e186267c4e44f265 Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Thu, 20 Mar 2014 00:28:21 +0400 Subject: [PATCH] fixed bug in cv::ocl::predictOptimalVectorWidth --- modules/core/src/arithm.cpp | 21 +++++++++------------ modules/core/src/mathfuncs.cpp | 9 +++------ modules/core/src/ocl.cpp | 7 ++++--- modules/core/test/ocl/test_arithm.cpp | 2 +- 4 files changed, 17 insertions(+), 22 deletions(-) diff --git a/modules/core/src/arithm.cpp b/modules/core/src/arithm.cpp index 5672c02..f59eefd 100644 --- a/modules/core/src/arithm.cpp +++ b/modules/core/src/arithm.cpp @@ -1299,7 +1299,7 @@ static bool ocl_arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst, int type1 = _src1.type(), depth1 = CV_MAT_DEPTH(type1), cn = CV_MAT_CN(type1); bool haveMask = !_mask.empty(); - if( ((haveMask || haveScalar) && cn > 4) ) + if ( (haveMask || haveScalar) && cn > 4 ) return false; int dtype = _dst.type(), ddepth = CV_MAT_DEPTH(dtype), wdepth = std::max(CV_32S, CV_MAT_DEPTH(wtype)); @@ -1320,14 +1320,11 @@ static bool ocl_arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst, "-D convertToWT2=%s -D convertToDT=%s%s -D cn=%d", (haveMask ? "MASK_" : ""), (haveScalar ? "UNARY_OP" : "BINARY_OP"), oclop2str[oclop], ocl::typeToStr(CV_MAKETYPE(depth1, kercn)), - ocl::typeToStr(CV_MAKETYPE(depth1, 1)), - ocl::typeToStr(CV_MAKETYPE(depth2, kercn)), - ocl::typeToStr(CV_MAKETYPE(depth2, 1)), - ocl::typeToStr(CV_MAKETYPE(ddepth, kercn)), - ocl::typeToStr(CV_MAKETYPE(ddepth, 1)), - ocl::typeToStr(CV_MAKETYPE(wdepth, kercn)), + ocl::typeToStr(depth1), ocl::typeToStr(CV_MAKETYPE(depth2, kercn)), + ocl::typeToStr(depth2), ocl::typeToStr(CV_MAKETYPE(ddepth, kercn)), + ocl::typeToStr(ddepth), ocl::typeToStr(CV_MAKETYPE(wdepth, kercn)), ocl::typeToStr(CV_MAKETYPE(wdepth, scalarcn)), - ocl::typeToStr(CV_MAKETYPE(wdepth, 1)), wdepth, + ocl::typeToStr(wdepth), wdepth, ocl::convertTypeStr(depth1, wdepth, kercn, cvtstr[0]), ocl::convertTypeStr(depth2, wdepth, kercn, cvtstr[1]), ocl::convertTypeStr(wdepth, ddepth, kercn, cvtstr[2]), @@ -1347,7 +1344,7 @@ static bool ocl_arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst, } ocl::Kernel k("KF", ocl::core::arithm_oclsrc, opts); - if( k.empty() ) + if (k.empty()) return false; UMat src1 = _src1.getUMat(), src2; @@ -1388,12 +1385,12 @@ static bool ocl_arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst, if( !haveMask ) { - if(n == 0) + if (n == 0) k.args(src1arg, src2arg, dstarg); - else if(n == 1) + else if (n == 1) k.args(src1arg, src2arg, dstarg, ocl::KernelArg(0, 0, 0, 0, usrdata_p, usrdata_esz)); - else if(n == 3) + else if (n == 3) k.args(src1arg, src2arg, dstarg, ocl::KernelArg(0, 0, 0, 0, usrdata_p, usrdata_esz), ocl::KernelArg(0, 0, 0, 0, usrdata_p + usrdata_esz, usrdata_esz), diff --git a/modules/core/src/mathfuncs.cpp b/modules/core/src/mathfuncs.cpp index 095460c..16df02c 100644 --- a/modules/core/src/mathfuncs.cpp +++ b/modules/core/src/mathfuncs.cpp @@ -2041,7 +2041,7 @@ static bool ocl_pow(InputArray _src, double power, OutputArray _dst, const char * const op = issqrt ? "OP_SQRT" : is_ipower ? "OP_POWN" : "OP_POW"; ocl::Kernel k("KF", ocl::core::arithm_oclsrc, - format("-D dstT=%s -D %s -D UNARY_OP%s", ocl::typeToStr(CV_MAKE_TYPE(depth, 1)), + format("-D dstT=%s -D %s -D UNARY_OP%s", ocl::typeToStr(depth), op, doubleSupport ? " -D DOUBLE_SUPPORT" : "")); if (k.empty()) return false; @@ -2081,7 +2081,7 @@ void pow( InputArray _src, double power, OutputArray _dst ) { if( ipower < 0 ) { - divide( 1., _src, _dst ); + divide( Scalar::all(1), _src, _dst ); if( ipower == -1 ) return; ipower = -ipower; @@ -2115,10 +2115,7 @@ void pow( InputArray _src, double power, OutputArray _dst ) Mat src, dst; if (same) - { - dst = _dst.getMat(); - src = dst; - } + src = dst = _dst.getMat(); else { src = _src.getMat(); diff --git a/modules/core/src/ocl.cpp b/modules/core/src/ocl.cpp index 7c4f8de..5a522d4 100644 --- a/modules/core/src/ocl.cpp +++ b/modules/core/src/ocl.cpp @@ -4347,7 +4347,7 @@ int predictOptimalVectorWidth(InputArray src1, InputArray src2, InputArray src3, InputArray src4, InputArray src5, InputArray src6, InputArray src7, InputArray src8, InputArray src9) { - int type = src1.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); + int type = src1.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type), esz = CV_ELEM_SIZE(depth); Size ssize = src1.size(); const ocl::Device & d = ocl::Device::getDefault(); @@ -4371,7 +4371,8 @@ int predictOptimalVectorWidth(InputArray src1, InputArray src2, InputArray src3, PROCESS_SRC(src9); size_t size = offsets.size(); - std::vector dividers(size, width); + int wsz = width * esz; + std::vector dividers(size, wsz); for (size_t i = 0; i < size; ++i) while (offsets[i] % dividers[i] != 0 || steps[i] % dividers[i] != 0 || cols[i] % dividers[i] != 0) @@ -4379,7 +4380,7 @@ int predictOptimalVectorWidth(InputArray src1, InputArray src2, InputArray src3, // default strategy for (size_t i = 0; i < size; ++i) - if (dividers[i] != width) + if (dividers[i] != wsz) { width = 1; break; diff --git a/modules/core/test/ocl/test_arithm.cpp b/modules/core/test/ocl/test_arithm.cpp index 8618746..d2b26e1 100644 --- a/modules/core/test/ocl/test_arithm.cpp +++ b/modules/core/test/ocl/test_arithm.cpp @@ -839,7 +839,7 @@ OCL_TEST_P(Pow, Mat) OCL_OFF(cv::pow(src1_roi, pows[k], dst1_roi)); OCL_ON(cv::pow(usrc1_roi, pows[k], udst1_roi)); - OCL_EXPECT_MATS_NEAR_RELATIVE(dst1, 1e-6); + OCL_EXPECT_MATS_NEAR_RELATIVE(dst1, 1e-5); } } -- 2.7.4