#define ROUNDUP(sz, n) ((sz) + (n) - 1 - (((sz) + (n) - 1) % (n)))
+static bool ocl_morph3x3_8UC1( InputArray _src, OutputArray _dst, InputArray _kernel, Point anchor,
+ int op, int actual_op = -1, InputArray _extraMat = noArray())
+{
+ int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
+ Size ksize = _kernel.size();
+
+ Mat kernel8u;
+ String processing;
+
+ bool haveExtraMat = !_extraMat.empty();
+ CV_Assert(actual_op <= 3 || haveExtraMat);
+
+ _kernel.getMat().convertTo(kernel8u, CV_8U);
+ for (int y = 0; y < kernel8u.rows; ++y)
+ for (int x = 0; x < kernel8u.cols; ++x)
+ if (kernel8u.at<uchar>(y, x) != 0)
+ processing += format("PROCESS(%d,%d)", y, x);
+
+ if (anchor.x < 0)
+ anchor.x = ksize.width / 2;
+ if (anchor.y < 0)
+ anchor.y = ksize.height / 2;
+
+ if (actual_op < 0)
+ actual_op = op;
+
+ if (type != CV_8UC1 ||
+ !((_src.offset() == 0) && (_src.step() % 4 == 0)) ||
+ !((_src.cols() % 16 == 0) && (_src.rows() % 2 == 0)) ||
+ !(anchor.x == 1 && anchor.y == 1) ||
+ !(ksize.width == 3 && ksize.height == 3))
+ return false;
+
+ Size size = _src.size();
+ size_t globalsize[2] = { 0, 0 };
+ size_t localsize[2] = { 0, 0 };
+
+ globalsize[0] = size.width / 16;
+ globalsize[1] = size.height / 2;
+
+ static const char * const op2str[] = { "OP_ERODE", "OP_DILATE", NULL, NULL, "OP_GRADIENT", "OP_TOPHAT", "OP_BLACKHAT" };
+ String opts = format("-D PROCESS_ELEM_=%s -D %s%s", processing.c_str(), op2str[op],
+ actual_op == op ? "" : cv::format(" -D %s", op2str[actual_op]).c_str());
+
+ ocl::Kernel k;
+ k.create("morph3x3_8UC1_cols16_rows2", cv::ocl::imgproc::morph3x3_oclsrc, opts);
+
+ if (k.empty())
+ return false;
+
+ UMat src = _src.getUMat();
+ _dst.create(size, CV_MAKETYPE(depth, cn));
+ if (!(_dst.offset() == 0 && _dst.step() % 4 == 0))
+ return false;
+ UMat dst = _dst.getUMat();
+ UMat extraMat = _extraMat.getUMat();
+
+ int idxArg = k.set(0, ocl::KernelArg::PtrReadOnly(src));
+ idxArg = k.set(idxArg, (int)src.step);
+ idxArg = k.set(idxArg, ocl::KernelArg::PtrWriteOnly(dst));
+ idxArg = k.set(idxArg, (int)dst.step);
+ idxArg = k.set(idxArg, (int)dst.rows);
+ idxArg = k.set(idxArg, (int)dst.cols);
+
+ if (haveExtraMat)
+ {
+ idxArg = k.set(idxArg, ocl::KernelArg::ReadOnlyNoSize(extraMat));
+ }
+
+ return k.run(2, globalsize, (localsize[0] == 0) ? NULL : localsize, false);
+}
+
static bool ocl_morphSmall( InputArray _src, OutputArray _dst, InputArray _kernel, Point anchor, int borderType,
int op, int actual_op = -1, InputArray _extraMat = noArray())
{
#endif
)
{
+ if (ocl_morph3x3_8UC1(_src, _dst, kernel, anchor, op, actual_op, _extraMat))
+ return true;
+
if (ocl_morphSmall(_src, _dst, kernel, anchor, borderType, op, actual_op, _extraMat))
return true;
}
--- /dev/null
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#ifdef OP_ERODE
+#define OP(m1, m2) min(m1, m2)
+#define VAL UCHAR_MAX
+#endif
+
+#ifdef OP_DILATE
+#define OP(m1, m2) max(m1, m2)
+#define VAL 0
+#endif
+
+#if defined OP_GRADIENT || defined OP_TOPHAT || defined OP_BLACKHAT
+#define EXTRA_PARAMS , __global const uchar * matptr, int mat_step, int mat_offset
+#else
+#define EXTRA_PARAMS
+#endif
+
+#define PROCESS(_y, _x) \
+ line_out[0] = OP(line_out[0], arr[_x + 3 * _y]); \
+ line_out[1] = OP(line_out[1], arr[_x + 3 * (_y + 1)]);
+
+#define PROCESS_ELEM \
+ line_out[0] = (uchar16)VAL; \
+ line_out[1] = (uchar16)VAL; \
+ PROCESS_ELEM_
+
+__kernel void morph3x3_8UC1_cols16_rows2(__global const uint* src, int src_step,
+ __global uint* dst, int dst_step,
+ int rows, int cols
+ EXTRA_PARAMS)
+{
+ int block_x = get_global_id(0);
+ int y = get_global_id(1) * 2;
+ int ssx = 1, dsx = 1;
+
+ if ((block_x * 16) >= cols || y >= rows) return;
+
+ uchar a; uchar16 b; uchar c;
+ uchar d; uchar16 e; uchar f;
+ uchar g; uchar16 h; uchar i;
+ uchar j; uchar16 k; uchar l;
+
+ uchar16 line[4];
+ uchar16 line_out[2];
+
+ int src_index = block_x * 4 * ssx + (y - 1) * (src_step / 4);
+ line[0] = (y == 0) ? (uchar16)VAL: as_uchar16(vload4(0, src + src_index));
+ line[1] = as_uchar16(vload4(0, src + src_index + (src_step / 4)));
+ line[2] = as_uchar16(vload4(0, src + src_index + 2 * (src_step / 4)));
+ line[3] = (y == (rows - 2)) ? (uchar16)VAL: as_uchar16(vload4(0, src + src_index + 3 * (src_step / 4)));
+
+ __global uchar *src_p = (__global uchar *)src;
+ bool line_end = ((block_x + 1) * 16 == cols);
+
+ src_index = block_x * 16 * ssx + (y - 1) * src_step;
+
+ a = (block_x == 0 || y == 0) ? VAL : src_p[src_index - 1];
+ b = line[0];
+ c = (line_end || y == 0) ? VAL : src_p[src_index + 16];
+
+ d = (block_x == 0) ? VAL : src_p[src_index + src_step - 1];
+ e = line[1];
+ f = line_end ? VAL : src_p[src_index + src_step + 16];
+
+ g = (block_x == 0) ? VAL : src_p[src_index + 2 * src_step - 1];
+ h = line[2];
+ i = line_end ? VAL : src_p[src_index + 2 * src_step + 16];
+
+ j = (block_x == 0 || y == (rows - 2)) ? VAL : src_p[src_index + 3 * src_step - 1];
+ k = line[3];
+ l = (line_end || y == (rows - 2)) ? VAL : src_p[src_index + 3 * src_step + 16];
+
+ uchar16 arr[12];
+ arr[0] = (uchar16)(a, b.s01234567, b.s89ab, b.scde);
+ arr[1] = b;
+ arr[2] = (uchar16)(b.s12345678, b.s9abc, b.sdef, c);
+ arr[3] = (uchar16)(d, e.s01234567, e.s89ab, e.scde);
+ arr[4] = e;
+ arr[5] = (uchar16)(e.s12345678, e.s9abc, e.sdef, f);
+ arr[6] = (uchar16)(g, h.s01234567, h.s89ab, h.scde);
+ arr[7] = h;
+ arr[8] = (uchar16)(h.s12345678, h.s9abc, h.sdef, i);
+ arr[9] = (uchar16)(j, k.s01234567, k.s89ab, k.scde);
+ arr[10] = k;
+ arr[11] = (uchar16)(k.s12345678, k.s9abc, k.sdef, l);
+
+ PROCESS_ELEM;
+
+ int dst_index = block_x * 4 * dsx + y * (dst_step / 4);
+
+#if defined OP_GRADIENT || defined OP_TOPHAT || defined OP_BLACKHAT
+ int mat_index = y * mat_step + block_x * 16 * ssx + mat_offset;
+ uchar16 val0 = vload16(0, matptr + mat_index);
+ uchar16 val1 = vload16(0, matptr + mat_index + mat_step);
+
+#ifdef OP_GRADIENT
+ line_out[0] = convert_uchar16_sat(convert_int16(line_out[0]) - convert_int16(val0));
+ line_out[1] = convert_uchar16_sat(convert_int16(line_out[1]) - convert_int16(val1));
+ vstore4(as_uint4(line_out[0]), 0, dst + dst_index);
+ vstore4(as_uint4(line_out[1]), 0, dst + dst_index + (dst_step / 4));
+#elif defined OP_TOPHAT
+ line_out[0] = convert_uchar16_sat(convert_int16(val0) - convert_int16(line_out[0]));
+ line_out[1] = convert_uchar16_sat(convert_int16(val1) - convert_int16(line_out[1]));
+ vstore4(as_uint4(line_out[0]), 0, dst + dst_index);
+ vstore4(as_uint4(line_out[1]), 0, dst + dst_index + (dst_step / 4));
+#elif defined OP_BLACKHAT
+ line_out[0] = convert_uchar16_sat(convert_int16(line_out[0]) - convert_int16(val0));
+ line_out[1] = convert_uchar16_sat(convert_int16(line_out[1]) - convert_int16(val1));
+ vstore4(as_uint4(line_out[0]), 0, dst + dst_index);
+ vstore4(as_uint4(line_out[1]), 0, dst + dst_index + (dst_step / 4));
+#endif
+#else
+ vstore4(as_uint4(line_out[0]), 0, dst + dst_index);
+ vstore4(as_uint4(line_out[1]), 0, dst + dst_index + (dst_step / 4));
+#endif
+}
}
}
+PARAM_TEST_CASE(MorphFilter3x3_cols16_rows2_Base, MatType,
+ int, // kernel size
+ Size, // dx, dy
+ BorderType, // border type
+ double, // optional parameter
+ bool, // roi or not
+ int) // width multiplier
+{
+ int type, borderType, ksize;
+ Size size;
+ double param;
+ bool useRoi;
+ int widthMultiple;
+
+ TEST_DECLARE_INPUT_PARAMETER(src);
+ TEST_DECLARE_OUTPUT_PARAMETER(dst);
+
+ virtual void SetUp()
+ {
+ type = GET_PARAM(0);
+ ksize = GET_PARAM(1);
+ size = GET_PARAM(2);
+ borderType = GET_PARAM(3);
+ param = GET_PARAM(4);
+ useRoi = GET_PARAM(5);
+ widthMultiple = GET_PARAM(6);
+ }
+
+ void random_roi()
+ {
+ size = Size(3, 3);
+
+ Size roiSize = randomSize(size.width, MAX_VALUE, size.height, MAX_VALUE);
+ roiSize.width = std::max(size.width + 13, roiSize.width & (~0xf));
+ roiSize.height = std::max(size.height + 1, roiSize.height & (~0x1));
+
+ Border srcBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
+ randomSubMat(src, src_roi, roiSize, srcBorder, type, 5, 256);
+
+ Border dstBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
+ randomSubMat(dst, dst_roi, roiSize, dstBorder, type, -60, 70);
+
+ UMAT_UPLOAD_INPUT_PARAMETER(src);
+ UMAT_UPLOAD_OUTPUT_PARAMETER(dst);
+ }
+
+ void Near()
+ {
+ Near(1, false);
+ }
+
+ void Near(double threshold, bool relative)
+ {
+ if (relative)
+ OCL_EXPECT_MATS_NEAR_RELATIVE(dst, threshold);
+ else
+ OCL_EXPECT_MATS_NEAR(dst, threshold);
+ }
+};
+
+typedef MorphFilter3x3_cols16_rows2_Base MorphFilter3x3_cols16_rows2;
+
+OCL_TEST_P(MorphFilter3x3_cols16_rows2, Mat)
+{
+ Size kernelSize(ksize, ksize);
+ int iterations = (int)param;
+
+ for (int j = 0; j < test_loop_times; j++)
+ {
+ random_roi();
+ Mat kernel = ksize==0 ? Mat() : randomMat(kernelSize, CV_8UC1, 0, 3);
+
+ OCL_OFF(cv::dilate(src_roi, dst_roi, kernel, Point(-1, -1), iterations) );
+ OCL_ON(cv::dilate(usrc_roi, udst_roi, kernel, Point(-1, -1), iterations) );
+
+ Near();
+ }
+}
+
/////////////////////////////////////////////////////////////////////////////////////////////////
// MorphologyEx
IMPLEMENT_PARAM_CLASS(MorphOp, int)
Bool(),
Values(1))); // not used
+OCL_INSTANTIATE_TEST_CASE_P(Filter, MorphFilter3x3_cols16_rows2, Combine(
+ Values((MatType)CV_8UC1),
+ Values(0, 3), // kernel size, 0 means kernel = Mat()
+ Values(Size(0, 0)), // not used
+ Values((BorderType)BORDER_CONSTANT),
+ Values(1.0, 2.0, 3.0),
+ Bool(),
+ Values(1))); // not used
+
OCL_INSTANTIATE_TEST_CASE_P(Filter, MorphologyEx, Combine(
Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC3, CV_32FC4),
Values(3, 5, 7), // kernel size