return k.run(2, globalsize, NULL, false);
}
+static bool ocl_Laplacian3_8UC1(InputArray _src, OutputArray _dst, int ddepth,
+ InputArray _kernel, double delta, int borderType)
+{
+ const ocl::Device & dev = ocl::Device::getDefault();
+ int type = _src.type(), sdepth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
+
+ if ( !(dev.isIntel() && (type == CV_8UC1) && (ddepth == CV_8U) &&
+ (borderType != BORDER_WRAP) &&
+ (_src.offset() == 0) && (_src.step() % 4 == 0) &&
+ (_src.cols() % 16 == 0) && (_src.rows() % 2 == 0)) )
+ return false;
+
+ Mat kernel = _kernel.getMat().reshape(1, 1);
+
+ if (ddepth < 0)
+ ddepth = sdepth;
+
+ Size size = _src.size();
+ size_t globalsize[2] = { 0, 0 };
+ size_t localsize[2] = { 0, 0 };
+
+ globalsize[0] = size.width / 16;
+ globalsize[1] = size.height / 2;
+
+ const char * const borderMap[] = { "BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT", 0, "BORDER_REFLECT_101" };
+ char build_opts[1024];
+ sprintf(build_opts, "-D %s %s", borderMap[borderType],
+ ocl::kernelToStr(kernel, CV_32F, "KERNEL_MATRIX").c_str());
+
+ ocl::Kernel k("laplacian3_8UC1_cols16_rows2", cv::ocl::imgproc::laplacian3_oclsrc, build_opts);
+ if (k.empty())
+ return false;
+
+ UMat src = _src.getUMat();
+ _dst.create(size, CV_MAKETYPE(ddepth, cn));
+ if (!(_dst.offset() == 0 && _dst.step() % 4 == 0))
+ return false;
+ UMat dst = _dst.getUMat();
+
+ int idxArg = k.set(0, ocl::KernelArg::PtrReadOnly(src));
+ idxArg = k.set(idxArg, (int)src.step);
+ idxArg = k.set(idxArg, ocl::KernelArg::PtrWriteOnly(dst));
+ idxArg = k.set(idxArg, (int)dst.step);
+ idxArg = k.set(idxArg, (int)dst.rows);
+ idxArg = k.set(idxArg, (int)dst.cols);
+ idxArg = k.set(idxArg, static_cast<float>(delta));
+
+ return k.run(2, globalsize, (localsize[0] == 0) ? NULL : localsize, false);
}
+}
#endif
#if defined(HAVE_IPP)
ddepth = sdepth;
_dst.create( _src.size(), CV_MAKETYPE(ddepth, cn) );
+ if( ksize == 1 || ksize == 3 )
+ {
+ float K[2][9] =
+ {
+ { 0, 1, 0, 1, -4, 1, 0, 1, 0 },
+ { 2, 0, 2, 0, -8, 0, 2, 0, 2 }
+ };
+
+ Mat kernel(3, 3, CV_32F, K[ksize == 3]);
+ if( scale != 1 )
+ kernel *= scale;
+
+ CV_OCL_RUN(_dst.isUMat() && _src.dims() <= 2,
+ ocl_Laplacian3_8UC1(_src, _dst, ddepth, kernel, delta, borderType));
+ }
+
CV_IPP_RUN((ksize == 3 || ksize == 5) && ((borderType & BORDER_ISOLATED) != 0 || !_src.isSubmatrix()) &&
((stype == CV_8UC1 && ddepth == CV_16S) || (ddepth == CV_32F && stype == CV_32FC1)) && (!cv::ocl::useOpenCL()),
ipp_Laplacian(_src, _dst, ddepth, ksize, scale, delta, borderType));
Mat kernel(3, 3, CV_32F, K[ksize == 3]);
if( scale != 1 )
kernel *= scale;
+
filter2D( _src, _dst, ddepth, kernel, Point(-1, -1), delta, borderType );
}
else
--- /dev/null
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#define DIG(a) a,
+__constant float kx[] = { KERNEL_MATRIX };
+
+#define OP(delta, x) (convert_float16(arr[delta + x]) * kx[x])
+
+__kernel void laplacian3_8UC1_cols16_rows2(__global const uint* src, int src_step,
+ __global uint* dst, int dst_step,
+ int rows, int cols, float delta)
+{
+ int block_x = get_global_id(0);
+ int y = get_global_id(1) * 2;
+ int ssx, dsx;
+
+ if ((block_x * 16) >= cols || y >= rows) return;
+
+ uint4 line[4];
+ uint4 line_out[2];
+ uchar a; uchar16 b; uchar c;
+ uchar d; uchar16 e; uchar f;
+ uchar g; uchar16 h; uchar i;
+ uchar j; uchar16 k; uchar l;
+
+ ssx = dsx = 1;
+ int src_index = block_x * 4 * ssx + (y - 1) * (src_step / 4);
+ line[1] = vload4(0, src + src_index + (src_step / 4));
+ line[2] = vload4(0, src + src_index + 2 * (src_step / 4));
+
+#ifdef BORDER_CONSTANT
+ line[0] = (y == 0) ? (uint4)0 : vload4(0, src + src_index);
+ line[3] = (y == (rows - 2)) ? (uint4)0 : vload4(0, src + src_index + 3 * (src_step / 4));
+#elif defined BORDER_REFLECT_101
+ line[0] = (y == 0) ? line[2] : vload4(0, src + src_index);
+ line[3] = (y == (rows - 2)) ? line[1] : vload4(0, src + src_index + 3 * (src_step / 4));
+#elif defined (BORDER_REPLICATE) || defined(BORDER_REFLECT)
+ line[0] = (y == 0) ? line[1] : vload4(0, src + src_index);
+ line[3] = (y == (rows - 2)) ? line[2] : vload4(0, src + src_index + 3 * (src_step / 4));
+#endif
+
+ __global uchar *src_p = (__global uchar *)src;
+
+ src_index = block_x * 16 * ssx + (y - 1) * src_step;
+ bool line_end = ((block_x + 1) * 16 == cols);
+
+ b = as_uchar16(line[0]);
+ e = as_uchar16(line[1]);
+ h = as_uchar16(line[2]);
+ k = as_uchar16(line[3]);
+
+#ifdef BORDER_CONSTANT
+ a = (block_x == 0 || y == 0) ? 0 : src_p[src_index - 1];
+ c = (line_end || y == 0) ? 0 : src_p[src_index + 16];
+
+ d = (block_x == 0) ? 0 : src_p[src_index + src_step - 1];
+ f = line_end ? 0 : src_p[src_index + src_step + 16];
+
+ g = (block_x == 0) ? 0 : src_p[src_index + 2 * src_step - 1];
+ i = line_end ? 0 : src_p[src_index + 2 * src_step + 16];
+
+ j = (block_x == 0 || y == (rows - 2)) ? 0 : src_p[src_index + 3 * src_step - 1];
+ l = (line_end || y == (rows - 2))? 0 : src_p[src_index + 3 * src_step + 16];
+
+#elif defined BORDER_REFLECT_101
+ int offset;
+ offset = (y == 0) ? (2 * src_step) : 0;
+
+ a = (block_x == 0) ? src_p[src_index + offset + 1] : src_p[src_index + offset - 1];
+ c = line_end ? src_p[src_index + offset + 14] : src_p[src_index + offset + 16];
+
+ d = (block_x == 0) ? src_p[src_index + src_step + 1] : src_p[src_index + src_step - 1];
+ f = line_end ? src_p[src_index + src_step + 14] : src_p[src_index + src_step + 16];
+
+ g = (block_x == 0) ? src_p[src_index + 2 * src_step + 1] : src_p[src_index + 2 * src_step - 1];
+ i = line_end ? src_p[src_index + 2 * src_step + 14] : src_p[src_index + 2 * src_step + 16];
+
+ offset = (y == (rows - 2)) ? (1 * src_step) : (3 * src_step);
+
+ j = (block_x == 0) ? src_p[src_index + offset + 1] : src_p[src_index + offset - 1];
+ l = line_end ? src_p[src_index + offset + 14] : src_p[src_index + offset + 16];
+
+#elif defined (BORDER_REPLICATE) || defined(BORDER_REFLECT)
+ int offset;
+ offset = (y == 0) ? (1 * src_step) : 0;
+
+ a = (block_x == 0) ? src_p[src_index + offset] : src_p[src_index + offset - 1];
+ c = line_end ? src_p[src_index + offset + 15] : src_p[src_index + offset + 16];
+
+ d = (block_x == 0) ? src_p[src_index + src_step] : src_p[src_index + src_step - 1];
+ f = line_end ? src_p[src_index + src_step + 15] : src_p[src_index + src_step + 16];
+
+ g = (block_x == 0) ? src_p[src_index + 2 * src_step] : src_p[src_index + 2 * src_step - 1];
+ i = line_end ? src_p[src_index + 2 * src_step + 15] : src_p[src_index + 2 * src_step + 16];
+
+ offset = (y == (rows - 2)) ? (2 * src_step) : (3 * src_step);
+
+ j = (block_x == 0) ? src_p[src_index + offset] : src_p[src_index + offset - 1];
+ l = line_end ? src_p[src_index + offset + 15] : src_p[src_index + offset + 16];
+
+#endif
+
+ uchar16 arr[12];
+ float16 sum[2];
+
+ arr[0] = (uchar16)(a, b.s0123, b.s456789ab, b.scde);
+ arr[1] = b;
+ arr[2] = (uchar16)(b.s123, b.s4567, b.s89abcdef, c);
+ arr[3] = (uchar16)(d, e.s0123, e.s456789ab, e.scde);
+ arr[4] = e;
+ arr[5] = (uchar16)(e.s123, e.s4567, e.s89abcdef, f);
+ arr[6] = (uchar16)(g, h.s0123, h.s456789ab, h.scde);
+ arr[7] = h;
+ arr[8] = (uchar16)(h.s123, h.s4567, h.s89abcdef, i);
+ arr[9] = (uchar16)(j, k.s0123, k.s456789ab, k.scde);
+ arr[10] = k;
+ arr[11] = (uchar16)(k.s123, k.s4567, k.s89abcdef, l);
+
+ sum[0] = OP(0, 0) + OP(0, 1) + OP(0, 2) +
+ OP(0, 3) + OP(0, 4) + OP(0, 5) +
+ OP(0, 6) + OP(0, 7) + OP(0, 8);
+
+ sum[1] = OP(3, 0) + OP(3, 1) + OP(3, 2) +
+ OP(3, 3) + OP(3, 4) + OP(3, 5) +
+ OP(3, 6) + OP(3, 7) + OP(3, 8);
+
+ line_out[0] = as_uint4(convert_uchar16_sat_rte(sum[0] + delta));
+ line_out[1] = as_uint4(convert_uchar16_sat_rte(sum[1] + delta));
+
+ int dst_index = block_x * 4 * dsx + y * (dst_step / 4);
+ vstore4(line_out[0], 0, dst + dst_index);
+ vstore4(line_out[1], 0, dst + dst_index + (dst_step / 4));
+}
}
}
-
-/////////////////////////////////////////////////////////////////////////////////////////////////
-// Sobel
-
-typedef FilterTestBase SobelTest;
-
-OCL_TEST_P(SobelTest, Mat)
-{
- int dx = size.width, dy = size.height;
- double scale = param;
-
- for (int j = 0; j < test_loop_times; j++)
- {
- random_roi();
-
- OCL_OFF(cv::Sobel(src_roi, dst_roi, -1, dx, dy, ksize, scale, /* delta */0, borderType));
- OCL_ON(cv::Sobel(usrc_roi, udst_roi, -1, dx, dy, ksize, scale, /* delta */0, borderType));
-
- Near();
- }
-}
-
PARAM_TEST_CASE(Deriv3x3_cols16_rows2_Base, MatType,
int, // kernel size
Size, // dx, dy
}
};
+typedef Deriv3x3_cols16_rows2_Base Laplacian3_cols16_rows2;
+
+OCL_TEST_P(Laplacian3_cols16_rows2, Accuracy)
+{
+ double scale = param;
+
+ for (int j = 0; j < test_loop_times; j++)
+ {
+ random_roi();
+
+ OCL_OFF(cv::Laplacian(src_roi, dst_roi, -1, ksize, scale, 10, borderType));
+ OCL_ON(cv::Laplacian(usrc_roi, udst_roi, -1, ksize, scale, 10, borderType));
+
+ Near();
+ }
+}
+
+
+/////////////////////////////////////////////////////////////////////////////////////////////////
+// Sobel
+
+typedef FilterTestBase SobelTest;
+
+OCL_TEST_P(SobelTest, Mat)
+{
+ int dx = size.width, dy = size.height;
+ double scale = param;
+
+ for (int j = 0; j < test_loop_times; j++)
+ {
+ random_roi();
+
+ OCL_OFF(cv::Sobel(src_roi, dst_roi, -1, dx, dy, ksize, scale, /* delta */0, borderType));
+ OCL_ON(cv::Sobel(usrc_roi, udst_roi, -1, dx, dy, ksize, scale, /* delta */0, borderType));
+
+ Near();
+ }
+}
+
typedef Deriv3x3_cols16_rows2_Base Sobel3x3_cols16_rows2;
OCL_TEST_P(Sobel3x3_cols16_rows2, Mat)
Bool(),
Values(1))); // not used
+OCL_INSTANTIATE_TEST_CASE_P(Filter, Laplacian3_cols16_rows2, Combine(
+ Values((MatType)CV_8UC1),
+ Values(3), // kernel size
+ Values(Size(0, 0)), // not used
+ FILTER_BORDER_SET_NO_WRAP_NO_ISOLATED,
+ Values(1.0, 0.2, 3.0), // kernel scale
+ Bool(),
+ Values(1))); // not used
+
OCL_INSTANTIATE_TEST_CASE_P(Filter, SobelTest, Combine(
FILTER_TYPES,
Values(3, 5), // kernel size