From b1c248fcc9eaa24baabf55fde7fcff096d62dcb2 Mon Sep 17 00:00:00 2001 From: peng xiao Date: Fri, 31 May 2013 10:53:52 +0800 Subject: [PATCH] Fix ocl::filter2D. In current implementation, this function only works when anchor point is in the kernel center and kernel size supported is either 3x3 or 5x5. --- modules/ocl/include/opencv2/ocl/ocl.hpp | 2 ++ modules/ocl/src/filtering.cpp | 10 +++++++--- modules/ocl/src/opencl/filtering_laplacian.cl | 12 ++++++------ 3 files changed, 15 insertions(+), 9 deletions(-) diff --git a/modules/ocl/include/opencv2/ocl/ocl.hpp b/modules/ocl/include/opencv2/ocl/ocl.hpp index 785248c..01b0f72 100644 --- a/modules/ocl/include/opencv2/ocl/ocl.hpp +++ b/modules/ocl/include/opencv2/ocl/ocl.hpp @@ -689,6 +689,8 @@ namespace cv } //! applies non-separable 2D linear filter to the image + // Note, at the moment this function only works when anchor point is in the kernel center + // and kernel size supported is either 3x3 or 5x5; otherwise the function will fail to output valid result CV_EXPORTS void filter2D(const oclMat &src, oclMat &dst, int ddepth, const Mat &kernel, Point anchor = Point(-1, -1), int borderType = BORDER_DEFAULT); diff --git a/modules/ocl/src/filtering.cpp b/modules/ocl/src/filtering.cpp index 56a70ae..f35a26e 100644 --- a/modules/ocl/src/filtering.cpp +++ b/modules/ocl/src/filtering.cpp @@ -645,7 +645,11 @@ static void GPUFilter2D(const oclMat &src, oclMat &dst, oclMat &mat_kernel, args.push_back(make_pair(sizeof(cl_int), (void *)&src.wholecols)); args.push_back(make_pair(sizeof(cl_int), (void *)&src.wholerows)); - openCLExecuteKernel(clCxt, &filtering_laplacian, kernelName, globalThreads, localThreads, args, cn, depth); + const int buffer_size = 100; + char opt_buffer [buffer_size] = ""; + sprintf(opt_buffer, "-DANCHOR=%d -DANX=%d -DANY=%d", ksize.width, anchor.x, anchor.y); + + openCLExecuteKernel(clCxt, &filtering_laplacian, kernelName, globalThreads, localThreads, args, cn, depth, opt_buffer); } Ptr cv::ocl::getLinearFilter_GPU(int srcType, int dstType, const Mat &kernel, const Size &ksize, Point anchor, int borderType) @@ -656,7 +660,7 @@ Ptr cv::ocl::getLinearFilter_GPU(int srcType, int dstType, const oclMat gpu_krnl; int nDivisor; - normalizeKernel(kernel, gpu_krnl, CV_32S, &nDivisor, true); + normalizeKernel(kernel, gpu_krnl, CV_32S, &nDivisor, false); normalizeAnchor(anchor, ksize); return Ptr(new LinearFilter_GPU(ksize, anchor, gpu_krnl, GPUFilter2D_callers[CV_MAT_CN(srcType)], @@ -1172,7 +1176,7 @@ void linearRowFilter_gpu(const oclMat &src, const oclMat &dst, oclMat mat_kernel args.push_back(make_pair(sizeof(cl_int), (void *)&ridusy)); args.push_back(make_pair(sizeof(cl_mem), (void *)&mat_kernel.data)); - openCLExecuteKernel2(clCxt, &filter_sep_row, kernelName, globalThreads, localThreads, args, channels, src.depth(), compile_option, CLFLUSH); + openCLExecuteKernel(clCxt, &filter_sep_row, kernelName, globalThreads, localThreads, args, channels, src.depth(), compile_option); } Ptr cv::ocl::getLinearRowFilter_GPU(int srcType, int /*bufType*/, const Mat &rowKernel, int anchor, int bordertype) diff --git a/modules/ocl/src/opencl/filtering_laplacian.cl b/modules/ocl/src/opencl/filtering_laplacian.cl index 96a2f51..8535eb1 100644 --- a/modules/ocl/src/opencl/filtering_laplacian.cl +++ b/modules/ocl/src/opencl/filtering_laplacian.cl @@ -82,9 +82,9 @@ ////////////////////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////Macro for define elements number per thread///////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////// -#define ANCHOR 3 -#define ANX 1 -#define ANY 1 +//#define ANCHOR 3 +//#define ANX 1 +//#define ANY 1 #define ROWS_PER_GROUP 4 #define ROWS_PER_GROUP_BITS 2 @@ -185,7 +185,7 @@ __kernel void filter2D_C1_D0(__global uchar *src, int src_step, int src_offset_x for(int i = 0; i < ANCHOR; i++) { -#pragma unroll 3 +#pragma unroll for(int j = 0; j < ANCHOR; j++) { if(dst_rows_index < dst_rows_end) @@ -295,7 +295,7 @@ __kernel void filter2D_C1_D5(__global float *src, int src_step, int src_offset_x for(int i = 0; i < ANCHOR; i++) { -#pragma unroll 3 +#pragma unroll for(int j = 0; j < ANCHOR; j++) { if(dst_rows_index < dst_rows_end) @@ -410,7 +410,7 @@ __kernel void filter2D_C4_D0(__global uchar4 *src, int src_step, int src_offset_ for(int i = 0; i < ANCHOR; i++) { -#pragma unroll 3 +#pragma unroll for(int j = 0; j < ANCHOR; j++) { if(dst_rows_index < dst_rows_end) -- 2.7.4