oclDst.download(dst);
- SANITY_CHECK(dst);
+ SANITY_CHECK(dst, 1e-3);
}
else if (RUN_PLAIN_IMPL)
{
TEST_CYCLE() cv::Sobel(src, dst, -1, dx, dy);
- SANITY_CHECK(dst);
+ SANITY_CHECK(dst, 1e-3);
}
else
OCL_PERF_ELSE
Mat src(srcSize, type), dst(srcSize, type);
declare.in(src, WARMUP_RNG).out(dst);
- const double eps = src.depth() == CV_8U ? 1 + DBL_EPSILON : 3e-4;
+ const double eps = src.depth() == CV_8U ? 1 + DBL_EPSILON : 5e-4;
if (RUN_OCL_IMPL)
{
option += " -D KERNEL_MATRIX_X=";
for(int i=0; i<row_kernel.rows; i++)
- option += cv::format("0x%x,", *reinterpret_cast<const unsigned int*>( &row_kernel.at<float>(i) ) );
+ option += cv::format("DIG(0x%x)", *reinterpret_cast<const unsigned int*>( &row_kernel.at<float>(i) ) );
option += "0x0";
option += " -D KERNEL_MATRIX_Y=";
for(int i=0; i<col_kernel.rows; i++)
- option += cv::format("0x%x,", *reinterpret_cast<const unsigned int*>( &col_kernel.at<float>(i) ) );
+ option += cv::format("DIG(0x%x)", *reinterpret_cast<const unsigned int*>( &col_kernel.at<float>(i) ) );
option += "0x0";
switch(src.type())
//if image size is non-degenerate and large enough
//and if filter support is reasonable to satisfy larger local memory requirements,
//then we can use single pass routine to avoid extra runtime calls overhead
- if( clCxt && clCxt->supportsFeature(FEATURE_CL_INTEL_DEVICE) &&
+ if( clCxt &&
rowKernel.rows <= 21 && columnKernel.rows <= 21 &&
(rowKernel.rows & 1) == 1 && (columnKernel.rows & 1) == 1 &&
imgSize.width > optimizedSepFilterLocalSize + (rowKernel.rows>>1) &&
#define DST(_x,_y) (((global DSTTYPE*)(Dst+DstOffset+(_y)*DstPitch))[_x])
+#define DIG(a) a,
+
//horizontal and vertical filter kernels
//should be defined on host during compile time to avoid overhead
__constant uint mat_kernelX[] = {KERNEL_MATRIX_X};