added 3-channels support to cv::boxFilter, cv::blur, cv::sqrBoxFilter
authorIlya Lavrenov <ilya.lavrenov@itseez.com>
Tue, 18 Mar 2014 20:33:01 +0000 (00:33 +0400)
committerIlya Lavrenov <ilya.lavrenov@itseez.com>
Mon, 24 Mar 2014 10:31:42 +0000 (14:31 +0400)
modules/imgproc/src/opencl/boxFilter.cl
modules/imgproc/src/smooth.cpp
modules/imgproc/test/ocl/test_boxfilter.cpp

index 986fc78..00eec46 100644 (file)
 #endif
 #endif
 
+#if cn != 3
+#define loadpix(addr) *(__global const ST *)(addr)
+#define storepix(val, addr)  *(__global DT *)(addr) = val
+#define SRCSIZE (int)sizeof(ST)
+#define DSTSIZE (int)sizeof(DT)
+#else
+#define loadpix(addr) vload3(0, (__global const ST1 *)(addr))
+#define storepix(val, addr) vstore3(val, 0, (__global DT1 *)(addr))
+#define SRCSIZE (int)sizeof(ST1)*cn
+#define DSTSIZE (int)sizeof(DT1)*cn
+#endif
+
 #ifdef BORDER_CONSTANT
 #elif defined BORDER_REPLICATE
 #define EXTRAPOLATE(x, y, minX, minY, maxX, maxY) \
@@ -123,8 +135,8 @@ inline WT readSrcPixel(int2 pos, __global const uchar * srcptr, int src_step, co
     if (pos.x >= 0 && pos.y >= 0 && pos.x < srcCoords.x2 && pos.y < srcCoords.y2)
 #endif
     {
-        int src_index = mad24(pos.y, src_step, pos.x * (int)sizeof(ST));
-        WT value = convertToWT(*(__global const ST *)(srcptr + src_index));
+        int src_index = mad24(pos.y, src_step, pos.x * SRCSIZE);
+        WT value = convertToWT(loadpix(srcptr + src_index));
 
         return PROCESS_ELEM(value);
     }
@@ -143,8 +155,8 @@ inline WT readSrcPixel(int2 pos, __global const uchar * srcptr, int src_step, co
 #endif
             srcCoords.x2, srcCoords.y2);
 
-        int src_index = mad24(selected_row, src_step, selected_col * (int)sizeof(ST));
-        WT value = convertToWT(*(__global const ST *)(srcptr + src_index));
+        int src_index = mad24(selected_row, src_step, selected_col * SRCSIZE);
+        WT value = convertToWT(loadpix(srcptr + src_index));
 
         return PROCESS_ELEM(value);
 #endif
@@ -180,7 +192,7 @@ __kernel void boxFilter(__global const uchar * srcptr, int src_step, int srcOffs
     sumOfCols[local_id] = tmp_sum;
     barrier(CLK_LOCAL_MEM_FENCE);
 
-    int dst_index = mad24(y, dst_step, x * (int)sizeof(DT) + dst_offset);
+    int dst_index = mad24(y, dst_step, mad24(x, DSTSIZE, dst_offset));
     __global DT * dst = (__global DT *)(dstptr + dst_index);
 
     int sy_index = 0; // current index in data[] array
@@ -196,10 +208,11 @@ __kernel void boxFilter(__global const uchar * srcptr, int src_step, int srcOffs
                 total_sum += sumOfCols[local_id + sx - ANCHOR_X];
 
 #ifdef NORMALIZE
-            dst[0] = convertToDT((WT)(alpha) * total_sum);
+            DT dstval = convertToDT((WT)(alpha) * total_sum);
 #else
-            dst[0] = convertToDT(total_sum);
+            DT dstval = convertToDT(total_sum);
 #endif
+            storepix(dstval, dst);
         }
         barrier(CLK_LOCAL_MEM_FENCE);
 
index 0641bc8..0a657f4 100644 (file)
@@ -41,6 +41,7 @@
 //M*/
 
 #include "precomp.hpp"
+#define CV_OPENCL_RUN_ASSERT
 #include "opencl_kernels.hpp"
 
 /*
@@ -639,9 +640,12 @@ static bool ocl_boxFilter( InputArray _src, OutputArray _dst, int ddepth,
     if (ddepth < 0)
         ddepth = sdepth;
 
-    if (!(cn == 1 || cn == 2 || cn == 4) || (!doubleSupport && (sdepth == CV_64F || ddepth == CV_64F)) ||
+    if (cn > 4 || (!doubleSupport && (sdepth == CV_64F || ddepth == CV_64F)) ||
         _src.offset() % esz != 0 || _src.step() % esz != 0)
+    {
+        printf("!!!!!!!!!!!!!!!!!!!!!!!\n");
         return false;
+    }
 
     if (anchor.x < 0)
         anchor.x = ksize.width / 2;
@@ -687,15 +691,17 @@ static bool ocl_boxFilter( InputArray _src, OutputArray _dst, int ddepth,
             return false;
 
         char cvt[2][50];
-        String opts = format("-D LOCAL_SIZE_X=%d -D BLOCK_SIZE_Y=%d -D ST=%s -D DT=%s -D WT=%s -D convertToDT=%s -D convertToWT=%s "
-                             "-D ANCHOR_X=%d -D ANCHOR_Y=%d -D KERNEL_SIZE_X=%d -D KERNEL_SIZE_Y=%d -D %s%s%s%s%s",
+        String opts = format("-D LOCAL_SIZE_X=%d -D BLOCK_SIZE_Y=%d -D ST=%s -D DT=%s -D WT=%s -D convertToDT=%s -D convertToWT=%s"
+                             " -D ANCHOR_X=%d -D ANCHOR_Y=%d -D KERNEL_SIZE_X=%d -D KERNEL_SIZE_Y=%d -D %s%s%s%s%s"
+                             " -D ST1=%s -D DT1=%s -D cn=%d",
                              BLOCK_SIZE_X, BLOCK_SIZE_Y, ocl::typeToStr(type), ocl::typeToStr(CV_MAKE_TYPE(ddepth, cn)),
                              ocl::typeToStr(CV_MAKE_TYPE(wdepth, cn)),
                              ocl::convertTypeStr(wdepth, ddepth, cn, cvt[0]),
                              ocl::convertTypeStr(sdepth, wdepth, cn, cvt[1]),
                              anchor.x, anchor.y, ksize.width, ksize.height, borderMap[borderType],
                              isolated ? " -D BORDER_ISOLATED" : "", doubleSupport ? " -D DOUBLE_SUPPORT" : "",
-                             normalize ? " -D NORMALIZE" : "", sqr ? " -D SQR" : "");
+                             normalize ? " -D NORMALIZE" : "", sqr ? " -D SQR" : "",
+                             ocl::typeToStr(sdepth), ocl::typeToStr(ddepth), cn);
 
         localsize[0] = BLOCK_SIZE_X;
         globalsize[0] = DIVUP(size.width, BLOCK_SIZE_X - (ksize.width - 1)) * BLOCK_SIZE_X;
index e1e936a..63f4ebf 100644 (file)
@@ -133,7 +133,7 @@ OCL_TEST_P(SqrBoxFilter, Mat)
 OCL_INSTANTIATE_TEST_CASE_P(ImageProc, BoxFilter,
                             Combine(
                                 Values(CV_8U, CV_16U, CV_16S, CV_32S, CV_32F),
-                                Values(1, 2, 4),
+                                OCL_ALL_CHANNELS,
                                 Values((BorderType)BORDER_CONSTANT,
                                        (BorderType)BORDER_REPLICATE,
                                        (BorderType)BORDER_REFLECT,
@@ -146,7 +146,7 @@ OCL_INSTANTIATE_TEST_CASE_P(ImageProc, BoxFilter,
 OCL_INSTANTIATE_TEST_CASE_P(ImageProc, SqrBoxFilter,
                             Combine(
                                 Values(CV_8U, CV_16U, CV_16S, CV_32F, CV_64F),
-                                Values(1, 2, 4),
+                                OCL_ALL_CHANNELS,
                                 Values((BorderType)BORDER_CONSTANT,
                                        (BorderType)BORDER_REPLICATE,
                                        (BorderType)BORDER_REFLECT,