From f138b613b7361a465223dbb8c78848b4b58829c5 Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Tue, 11 Mar 2014 23:41:44 +0400 Subject: [PATCH] cv::compare --- modules/core/src/arithm.cpp | 23 +++++++++++++++-------- modules/core/src/convert.cpp | 2 +- modules/core/src/mathfuncs.cpp | 2 +- modules/core/src/matmul.cpp | 2 +- modules/core/src/opencl/arithm.cl | 3 +-- modules/imgproc/src/opencl/threshold.cl | 16 ++++++++-------- modules/imgproc/src/thresh.cpp | 12 ++++++------ modules/ts/src/ocl_test.cpp | 19 +++++++++++++++++++ 8 files changed, 52 insertions(+), 27 deletions(-) diff --git a/modules/core/src/arithm.cpp b/modules/core/src/arithm.cpp index 2702c21..436239c 100644 --- a/modules/core/src/arithm.cpp +++ b/modules/core/src/arithm.cpp @@ -2624,16 +2624,23 @@ static bool ocl_compare(InputArray _src1, InputArray _src2, OutputArray _dst, in bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0; int type = _src1.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type), type2 = _src2.type(); - if (!doubleSupport && (depth == CV_64F || _src2.depth() == CV_64F)) + if ( (!doubleSupport && (depth == CV_64F || _src2.depth() == CV_64F)) || + !_src1.sameSize(_src2) || type != type2) return false; + int kercn = ocl::predictOptimalVectorWidth(_src1, _src2, _dst); const char * const operationMap[] = { "==", ">", ">=", "<", "<=", "!=" }; + char cvt[40]; + ocl::Kernel k("KF", ocl::core::arithm_oclsrc, - format("-D BINARY_OP -D srcT1=%s -D workT=srcT1 -D cn=1" - " -D OP_CMP -D CMP_OPERATOR=%s%s", - ocl::typeToStr(CV_MAKE_TYPE(depth, 1)), - operationMap[op], - doubleSupport ? " -D DOUBLE_SUPPORT" : "")); + format("-D BINARY_OP -D srcT1=%s -D dstT=%s -D workT=srcT1 -D cn=%d" + " -D convertToDT=%s -D OP_CMP -D CMP_OPERATOR=%s%s -D srcT1_C1=%s" + " -D srcT2_C1=%s -D dstT_C1=%s", + ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)), + ocl::typeToStr(CV_8UC(kercn)), kercn, + ocl::convertTypeStr(depth, CV_8U, kercn, cvt), + operationMap[op], doubleSupport ? " -D DOUBLE_SUPPORT" : "", + ocl::typeToStr(depth), ocl::typeToStr(depth), ocl::typeToStr(CV_8U))); if (k.empty()) return false; @@ -2647,9 +2654,9 @@ static bool ocl_compare(InputArray _src1, InputArray _src2, OutputArray _dst, in k.args(ocl::KernelArg::ReadOnlyNoSize(src1), ocl::KernelArg::ReadOnlyNoSize(src2), - ocl::KernelArg::WriteOnly(dst, cn)); + ocl::KernelArg::WriteOnly(dst, cn, kercn)); - size_t globalsize[2] = { dst.cols * cn, dst.rows }; + size_t globalsize[2] = { dst.cols * cn / kercn, dst.rows }; return k.run(2, globalsize, NULL, false); } diff --git a/modules/core/src/convert.cpp b/modules/core/src/convert.cpp index af93cdb..0139f6a 100644 --- a/modules/core/src/convert.cpp +++ b/modules/core/src/convert.cpp @@ -1311,7 +1311,7 @@ static BinaryFunc getConvertScaleFunc(int sdepth, int ddepth) static bool ocl_convertScaleAbs( InputArray _src, OutputArray _dst, double alpha, double beta ) { int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type), - kercn = cn > 4 || cn == 3 ? 1 : ocl::predictOptimalVectorWidth(_src, _dst); + kercn = ocl::predictOptimalVectorWidth(_src, _dst); bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0; if (!doubleSupport && depth == CV_64F) diff --git a/modules/core/src/mathfuncs.cpp b/modules/core/src/mathfuncs.cpp index 8cc61ab..095460c 100644 --- a/modules/core/src/mathfuncs.cpp +++ b/modules/core/src/mathfuncs.cpp @@ -63,7 +63,7 @@ static const char* oclop2str[] = { "OP_LOG", "OP_EXP", "OP_MAG", "OP_PHASE_DEGRE static bool ocl_math_op(InputArray _src1, InputArray _src2, OutputArray _dst, int oclop) { int type = _src1.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); - int kercn = cn == 3 || cn > 4 || oclop == OCL_OP_PHASE_DEGREES || + int kercn = oclop == OCL_OP_PHASE_DEGREES || oclop == OCL_OP_PHASE_RADIANS ? 1 : ocl::predictOptimalVectorWidth(_src1, _src2, _dst); bool double_support = ocl::Device::getDefault().doubleFPConfig() > 0; diff --git a/modules/core/src/matmul.cpp b/modules/core/src/matmul.cpp index 409c30f..daad86a 100644 --- a/modules/core/src/matmul.cpp +++ b/modules/core/src/matmul.cpp @@ -2158,7 +2158,7 @@ typedef void (*ScaleAddFunc)(const uchar* src1, const uchar* src2, uchar* dst, i static bool ocl_scaleAdd( InputArray _src1, double alpha, InputArray _src2, OutputArray _dst, int type ) { int depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type), wdepth = std::max(depth, CV_32F), - kercn = cn == 3 || cn > 4 ? 1 : ocl::predictOptimalVectorWidth(_src1, _src2, _dst); + kercn = ocl::predictOptimalVectorWidth(_src1, _src2, _dst); bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0; Size size = _src1.size(); diff --git a/modules/core/src/opencl/arithm.cl b/modules/core/src/opencl/arithm.cl index f7278e1..865b433 100644 --- a/modules/core/src/opencl/arithm.cl +++ b/modules/core/src/opencl/arithm.cl @@ -279,10 +279,9 @@ storedst(v > (dstT)(0) ? log(v) : log(-v)) #elif defined OP_CMP -#define dstT uchar #define srcT2 srcT1 #define convertToWT1 -#define PROCESS_ELEM storedst(convert_uchar(srcelem1 CMP_OPERATOR srcelem2 ? 255 : 0)) +#define PROCESS_ELEM storedst(convertToDT(srcelem1 CMP_OPERATOR srcelem2 ? (dstT)(255) : (dstT)(0))) #elif defined OP_CONVERT_SCALE_ABS #undef EXTRA_PARAMS diff --git a/modules/imgproc/src/opencl/threshold.cl b/modules/imgproc/src/opencl/threshold.cl index f5b6fbb..5049426 100644 --- a/modules/imgproc/src/opencl/threshold.cl +++ b/modules/imgproc/src/opencl/threshold.cl @@ -53,29 +53,29 @@ __kernel void threshold(__global const uchar * srcptr, int src_step, int src_offset, __global uchar * dstptr, int dst_step, int dst_offset, int rows, int cols, - T thresh, T max_val) + T1 thresh, T1 max_val) { int gx = get_global_id(0); int gy = get_global_id(1); if (gx < cols && gy < rows) { - int src_index = mad24(gy, src_step, src_offset + gx * (int)sizeof(T)); - int dst_index = mad24(gy, dst_step, dst_offset + gx * (int)sizeof(T)); + int src_index = mad24(gy, src_step, mad24(gx, (int)sizeof(T), src_offset)); + int dst_index = mad24(gy, dst_step, mad24(gx, (int)sizeof(T), dst_offset)); T sdata = *(__global const T *)(srcptr + src_index); __global T * dst = (__global T *)(dstptr + dst_index); #ifdef THRESH_BINARY - dst[0] = sdata > thresh ? max_val : (T)(0); + dst[0] = sdata > (T)(thresh) ? (T)(max_val) : (T)(0); #elif defined THRESH_BINARY_INV - dst[0] = sdata > thresh ? (T)(0) : max_val; + dst[0] = sdata > (T)(thresh) ? (T)(0) : (T)(max_val); #elif defined THRESH_TRUNC - dst[0] = sdata > thresh ? thresh : sdata; + dst[0] = sdata > (T)(thresh) ? (T)(thresh) : sdata; #elif defined THRESH_TOZERO - dst[0] = sdata > thresh ? sdata : (T)(0); + dst[0] = sdata > (T)(thresh) ? sdata : (T)(0); #elif defined THRESH_TOZERO_INV - dst[0] = sdata > thresh ? (T)(0) : sdata; + dst[0] = sdata > (T)(thresh) ? (T)(0) : sdata; #endif } } diff --git a/modules/imgproc/src/thresh.cpp b/modules/imgproc/src/thresh.cpp index cb280ec..7fd0b23 100644 --- a/modules/imgproc/src/thresh.cpp +++ b/modules/imgproc/src/thresh.cpp @@ -711,8 +711,7 @@ private: static bool ocl_threshold( InputArray _src, OutputArray _dst, double & thresh, double maxval, int thresh_type ) { int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type), - kercn = cn <= 4 && cn != 3 ? cn : ocl::predictOptimalVectorWidth(_src, _dst), - ktype = CV_MAKE_TYPE(depth, kercn); + kercn = ocl::predictOptimalVectorWidth(_src, _dst), ktype = CV_MAKE_TYPE(depth, kercn); bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0; if ( !(thresh_type == THRESH_BINARY || thresh_type == THRESH_BINARY_INV || thresh_type == THRESH_TRUNC || @@ -723,8 +722,9 @@ static bool ocl_threshold( InputArray _src, OutputArray _dst, double & thresh, d const char * const thresholdMap[] = { "THRESH_BINARY", "THRESH_BINARY_INV", "THRESH_TRUNC", "THRESH_TOZERO", "THRESH_TOZERO_INV" }; ocl::Kernel k("threshold", ocl::imgproc::threshold_oclsrc, - format("-D %s -D T=%s%s", thresholdMap[thresh_type], - ocl::typeToStr(ktype), doubleSupport ? " -D DOUBLE_SUPPORT" : "")); + format("-D %s -D T=%s -D T1=%s%s", thresholdMap[thresh_type], + ocl::typeToStr(ktype), ocl::typeToStr(depth), + doubleSupport ? " -D DOUBLE_SUPPORT" : "")); if (k.empty()) return false; @@ -736,8 +736,8 @@ static bool ocl_threshold( InputArray _src, OutputArray _dst, double & thresh, d thresh = cvFloor(thresh); k.args(ocl::KernelArg::ReadOnlyNoSize(src), ocl::KernelArg::WriteOnly(dst, cn, kercn), - ocl::KernelArg::Constant(Mat(1, 1, ktype, Scalar::all(thresh))), - ocl::KernelArg::Constant(Mat(1, 1, ktype, Scalar::all(maxval)))); + ocl::KernelArg::Constant(Mat(1, 1, depth, Scalar::all(thresh))), + ocl::KernelArg::Constant(Mat(1, 1, depth, Scalar::all(maxval)))); size_t globalsize[2] = { dst.cols * cn / kercn, dst.rows }; return k.run(2, globalsize, NULL, false); diff --git a/modules/ts/src/ocl_test.cpp b/modules/ts/src/ocl_test.cpp index 7da04f6..caf5bf4 100644 --- a/modules/ts/src/ocl_test.cpp +++ b/modules/ts/src/ocl_test.cpp @@ -174,6 +174,25 @@ void dumpOpenCLDevice() const char* haveAmdFftStr = haveAmdFft() ? "Yes" : "No"; DUMP_MESSAGE_STDOUT(" Has AMD Fft = "<< haveAmdFftStr); DUMP_PROPERTY_XML("cv_ocl_current_AmdFft", haveAmdFft()); + + + DUMP_MESSAGE_STDOUT(" Preferred vector width char = "<< device.preferredVectorWidthChar()); + DUMP_PROPERTY_XML("cv_ocl_current_preferredVectorWidthChar", device.preferredVectorWidthChar()); + + DUMP_MESSAGE_STDOUT(" Preferred vector width short = "<< device.preferredVectorWidthShort()); + DUMP_PROPERTY_XML("cv_ocl_current_preferredVectorWidthShort", device.preferredVectorWidthShort()); + + DUMP_MESSAGE_STDOUT(" Preferred vector width int = "<< device.preferredVectorWidthInt()); + DUMP_PROPERTY_XML("cv_ocl_current_preferredVectorWidthInt", device.preferredVectorWidthInt()); + + DUMP_MESSAGE_STDOUT(" Preferred vector width long = "<< device.preferredVectorWidthLong()); + DUMP_PROPERTY_XML("cv_ocl_current_preferredVectorWidthLong", device.preferredVectorWidthLong()); + + DUMP_MESSAGE_STDOUT(" Preferred vector width float = "<< device.preferredVectorWidthFloat()); + DUMP_PROPERTY_XML("cv_ocl_current_preferredVectorWidthFloat", device.preferredVectorWidthFloat()); + + DUMP_MESSAGE_STDOUT(" Preferred vector width double = "<< device.preferredVectorWidthDouble()); + DUMP_PROPERTY_XML("cv_ocl_current_preferredVectorWidthDouble", device.preferredVectorWidthDouble()); } catch (...) { -- 2.7.4