A little optimization on ocl/pyrdown, ocl/canny
authoryao <bitwangyaoyao@gmail.com>
Thu, 6 Sep 2012 06:39:20 +0000 (14:39 +0800)
committeryao <bitwangyaoyao@gmail.com>
Thu, 6 Sep 2012 06:39:20 +0000 (14:39 +0800)
modules/ocl/src/canny.cpp
modules/ocl/src/kernels/pyr_down.cl
modules/ocl/test/test_canny.cpp

index 5881a4c..4fd5a3f 100644 (file)
@@ -66,7 +66,7 @@ namespace cv
     }
 }
 
-cv::ocl::CannyBuf::CannyBuf(const oclMat& dx_, const oclMat& dy_) : dx(dx_), dy(dy_)
+cv::ocl::CannyBuf::CannyBuf(const oclMat& dx_, const oclMat& dy_) : dx(dx_), dy(dy_), counter(NULL)
 {
     CV_Assert(dx_.type() == CV_32SC1 && dy_.type() == CV_32SC1 && dx_.size() == dy_.size());
 
@@ -102,6 +102,10 @@ void cv::ocl::CannyBuf::create(const Size& image_size, int apperture_size)
 
     float counter_f [1] = { 0 };
     int err = 0;
+    if(counter)
+    {
+        openCLFree(counter);
+    }
     counter = clCreateBuffer( Context::getContext()->impl->clContext, CL_MEM_COPY_HOST_PTR, sizeof(float), counter_f, &err );
     openCLSafeCall(err);
 }
@@ -322,15 +326,11 @@ void canny::calcMap_gpu(oclMat& dx, oclMat& dy, oclMat& mag, oclMat& map, int ro
     args.push_back( make_pair( sizeof(cl_int), (void *)&map.step));
     args.push_back( make_pair( sizeof(cl_int), (void *)&map.offset));
 
-#if CALCMAP_FIXED
+
     size_t globalThreads[3] = {cols, rows, 1};
     string kernelName = "calcMap";
     size_t localThreads[3]  = {16, 16, 1};
-#else
-    size_t globalThreads[3] = {cols, rows, 1};
-    string kernelName = "calcMap_2";
-    size_t localThreads[3]  = {256, 1, 1};
-#endif
+
     openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
 }
 
index 38b4ec7..19d631e 100644 (file)
@@ -16,6 +16,7 @@
 //
 // @Authors
 //    Dachuan Zhao, dachuan@multicorewareinc.com
+//    Yao Wang, bitwangyaoyao@gmail.com
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
@@ -118,81 +119,19 @@ uchar4 round_uchar4_float4(float4 v)
     return round_uchar4_int4(iv); 
 }
 
-
-
-
-int idx_row_low(int y, int last_row)
-{
-       if(y < 0)
-       {
-               y = -y;
-       }
-    return y % (last_row + 1);
-}
-
-int idx_row_high(int y, int last_row) 
-{
-       int i;
-       int j;
-       if(last_row - y < 0)
-       {
-               i = (y - last_row);
-       }
-       else
-       {
-               i = (last_row - y);
-       }
-       if(last_row - i < 0)
-       {
-               j = i - last_row;
-       }
-       else
-       {
-               j = last_row - i;
-       }
-    return j % (last_row + 1);
-}
+#define IDX_ROW_HIGH(y,last_row) (abs_diff((int)abs_diff(last_row,y),last_row) % ((last_row)+1))
+#define IDX_ROW_LOW(y,last_row) (abs(y) % ((last_row) + 1))
+#define IDX_COL_HIGH(x,last_col) abs_diff((int)abs_diff(x,last_col),last_col)
+#define IDX_COL_LOW(x,last_col) (abs(x) % ((last_col) + 1))
 
 int idx_row(int y, int last_row)
 {
-    return idx_row_low(idx_row_high(y, last_row), last_row);
-}
-
-int idx_col_low(int x, int last_col)
-{
-       if(x < 0)
-       {
-               x = -x;
-       }
-    return x % (last_col + 1);
-}
-
-int idx_col_high(int x, int last_col) 
-{
-       int i;
-       int j;
-       if(last_col - x < 0)
-       {
-               i = (x - last_col);
-       }
-       else
-       {
-               i = (last_col - x);
-       }
-       if(last_col - i < 0)
-       {
-               j = i - last_col;
-       }
-       else
-       {
-               j = last_col - i;
-       }
-    return j % (last_col + 1);
+       return IDX_ROW_LOW(IDX_ROW_HIGH(y,last_row),last_row);
 }
 
 int idx_col(int x, int last_col)
 {
-    return idx_col_low(idx_col_high(x, last_col), last_col);
+       return IDX_COL_LOW(IDX_COL_HIGH(x,last_col),last_col);
 }
 
 __kernel void pyrDown_C1_D0(__global uchar * srcData, int srcStep, int srcOffset, int srcRows, int srcCols, __global uchar *dst, int dstStep, int dstOffset, int dstCols)
@@ -210,11 +149,11 @@ __kernel void pyrDown_C1_D0(__global uchar * srcData, int srcStep, int srcOffset
 
     sum = 0;
 
-    sum = sum + 0.0625f * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y - 2, last_row) * srcStep))[idx_col(x, last_col)]);
-    sum = sum + 0.25f   * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y - 1, last_row) * srcStep))[idx_col(x, last_col)]);
-    sum = sum + 0.375f  * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y    , last_row) * srcStep))[idx_col(x, last_col)]);
-    sum = sum + 0.25f   * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y + 1, last_row) * srcStep))[idx_col(x, last_col)]);
-    sum = sum + 0.0625f * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y + 2, last_row) * srcStep))[idx_col(x, last_col)]);
+    sum = sum + 0.0625f * ((__global uchar*)((__global char*)srcData + idx_row(src_y - 2, last_row) * srcStep))[idx_col(x, last_col)];
+    sum = sum + 0.25f   * ((__global uchar*)((__global char*)srcData + idx_row(src_y - 1, last_row) * srcStep))[idx_col(x, last_col)];
+    sum = sum + 0.375f  * ((__global uchar*)((__global char*)srcData + idx_row(src_y    , last_row) * srcStep))[idx_col(x, last_col)];
+    sum = sum + 0.25f   * ((__global uchar*)((__global char*)srcData + idx_row(src_y + 1, last_row) * srcStep))[idx_col(x, last_col)];
+    sum = sum + 0.0625f * ((__global uchar*)((__global char*)srcData + idx_row(src_y + 2, last_row) * srcStep))[idx_col(x, last_col)];
 
     smem[2 + get_local_id(0)] = sum;
 
@@ -224,11 +163,11 @@ __kernel void pyrDown_C1_D0(__global uchar * srcData, int srcStep, int srcOffset
 
         sum = 0;
 
-        sum = sum + 0.0625f * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y - 2, last_row) * srcStep))[idx_col(left_x, last_col)]);
-               sum = sum + 0.25f   * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y - 1, last_row) * srcStep))[idx_col(left_x, last_col)]);
-               sum = sum + 0.375f  * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y    , last_row) * srcStep))[idx_col(left_x, last_col)]);
-               sum = sum + 0.25f   * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y + 1, last_row) * srcStep))[idx_col(left_x, last_col)]);
-               sum = sum + 0.0625f * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y + 2, last_row) * srcStep))[idx_col(left_x, last_col)]);
+        sum = sum + 0.0625f * ((__global uchar*)((__global char*)srcData + idx_row(src_y - 2, last_row) * srcStep))[idx_col(left_x, last_col)];
+               sum = sum + 0.25f   * ((__global uchar*)((__global char*)srcData + idx_row(src_y - 1, last_row) * srcStep))[idx_col(left_x, last_col)];
+               sum = sum + 0.375f  * ((__global uchar*)((__global char*)srcData + idx_row(src_y    , last_row) * srcStep))[idx_col(left_x, last_col)];
+               sum = sum + 0.25f   * ((__global uchar*)((__global char*)srcData + idx_row(src_y + 1, last_row) * srcStep))[idx_col(left_x, last_col)];
+               sum = sum + 0.0625f * ((__global uchar*)((__global char*)srcData + idx_row(src_y + 2, last_row) * srcStep))[idx_col(left_x, last_col)];
 
         smem[get_local_id(0)] = sum;
     }
@@ -239,11 +178,11 @@ __kernel void pyrDown_C1_D0(__global uchar * srcData, int srcStep, int srcOffset
 
         sum = 0;
 
-        sum = sum + 0.0625f * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y - 2, last_row) * srcStep))[idx_col(right_x, last_col)]);
-               sum = sum + 0.25f   * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y - 1, last_row) * srcStep))[idx_col(right_x, last_col)]);
-               sum = sum + 0.375f  * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y    , last_row) * srcStep))[idx_col(right_x, last_col)]);
-               sum = sum + 0.25f   * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y + 1, last_row) * srcStep))[idx_col(right_x, last_col)]);
-               sum = sum + 0.0625f * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y + 2, last_row) * srcStep))[idx_col(right_x, last_col)]);
+        sum = sum + 0.0625f * ((__global uchar*)((__global char*)srcData + idx_row(src_y - 2, last_row) * srcStep))[idx_col(right_x, last_col)];
+               sum = sum + 0.25f   * ((__global uchar*)((__global char*)srcData + idx_row(src_y - 1, last_row) * srcStep))[idx_col(right_x, last_col)];
+               sum = sum + 0.375f  * ((__global uchar*)((__global char*)srcData + idx_row(src_y    , last_row) * srcStep))[idx_col(right_x, last_col)];
+               sum = sum + 0.25f   * ((__global uchar*)((__global char*)srcData + idx_row(src_y + 1, last_row) * srcStep))[idx_col(right_x, last_col)];
+               sum = sum + 0.0625f * ((__global uchar*)((__global char*)srcData + idx_row(src_y + 2, last_row) * srcStep))[idx_col(right_x, last_col)];
 
         smem[4 + get_local_id(0)] = sum;
     }
@@ -288,11 +227,11 @@ __kernel void pyrDown_C4_D0(__global uchar4 * srcData, int srcStep, int srcOffse
 
     sum = 0;
 
-       sum = sum + co3 * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y - 2, last_row) * srcStep / 4))[idx_col(x, last_col)]));
-       sum = sum + co2   * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y - 1, last_row) * srcStep / 4))[idx_col(x, last_col)]));
-       sum = sum + co1  * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y    , last_row) * srcStep / 4))[idx_col(x, last_col)]));
-       sum = sum + co2   * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y + 1, last_row) * srcStep / 4))[idx_col(x, last_col)]));
-       sum = sum + co3 * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y + 2, last_row) * srcStep / 4))[idx_col(x, last_col)]));
+       sum = sum + co3 * convert_float4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y - 2, last_row) * srcStep / 4))[idx_col(x, last_col)]);
+       sum = sum + co2   * convert_float4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y - 1, last_row) * srcStep / 4))[idx_col(x, last_col)]);
+       sum = sum + co1  * convert_float4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y    , last_row) * srcStep / 4))[idx_col(x, last_col)]);
+       sum = sum + co2   * convert_float4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y + 1, last_row) * srcStep / 4))[idx_col(x, last_col)]);
+       sum = sum + co3 * convert_float4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y + 2, last_row) * srcStep / 4))[idx_col(x, last_col)]);
 
        smem[2 + get_local_id(0)] = sum;
 
@@ -302,11 +241,11 @@ __kernel void pyrDown_C4_D0(__global uchar4 * srcData, int srcStep, int srcOffse
 
                sum = 0;
 
-               sum = sum + co3 * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y - 2, last_row) * srcStep / 4))[idx_col(left_x, last_col)]));
-               sum = sum + co2   * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y - 1, last_row) * srcStep / 4))[idx_col(left_x, last_col)]));
-               sum = sum + co1  * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y    , last_row) * srcStep / 4))[idx_col(left_x, last_col)]));
-               sum = sum + co2   * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y + 1, last_row) * srcStep / 4))[idx_col(left_x, last_col)]));
-               sum = sum + co3 * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y + 2, last_row) * srcStep / 4))[idx_col(left_x, last_col)]));
+               sum = sum + co3 * convert_float4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y - 2, last_row) * srcStep / 4))[idx_col(left_x, last_col)]);
+               sum = sum + co2   * convert_float4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y - 1, last_row) * srcStep / 4))[idx_col(left_x, last_col)]);
+               sum = sum + co1  * convert_float4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y    , last_row) * srcStep / 4))[idx_col(left_x, last_col)]);
+               sum = sum + co2   * convert_float4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y + 1, last_row) * srcStep / 4))[idx_col(left_x, last_col)]);
+               sum = sum + co3 * convert_float4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y + 2, last_row) * srcStep / 4))[idx_col(left_x, last_col)]);
 
                smem[get_local_id(0)] = sum;
        }
@@ -317,11 +256,11 @@ __kernel void pyrDown_C4_D0(__global uchar4 * srcData, int srcStep, int srcOffse
 
                sum = 0;
 
-               sum = sum + co3 * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y - 2, last_row) * srcStep / 4))[idx_col(right_x, last_col)]));
-               sum = sum + co2   * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y - 1, last_row) * srcStep / 4))[idx_col(right_x, last_col)]));
-               sum = sum + co1  * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y    , last_row) * srcStep / 4))[idx_col(right_x, last_col)]));
-               sum = sum + co2   * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y + 1, last_row) * srcStep / 4))[idx_col(right_x, last_col)]));
-               sum = sum + co3 * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y + 2, last_row) * srcStep / 4))[idx_col(right_x, last_col)]));
+               sum = sum + co3 * convert_float4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y - 2, last_row) * srcStep / 4))[idx_col(right_x, last_col)]);
+               sum = sum + co2   * convert_float4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y - 1, last_row) * srcStep / 4))[idx_col(right_x, last_col)]);
+               sum = sum + co1  * convert_float4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y    , last_row) * srcStep / 4))[idx_col(right_x, last_col)]);
+               sum = sum + co2   * convert_float4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y + 1, last_row) * srcStep / 4))[idx_col(right_x, last_col)]);
+               sum = sum + co3 * convert_float4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y + 2, last_row) * srcStep / 4))[idx_col(right_x, last_col)]);
 
                smem[4 + get_local_id(0)] = sum;
        }
index e6fb885..e728c99 100644 (file)
@@ -103,6 +103,6 @@ TEST_P(Canny, Accuracy)
        EXPECT_MAT_SIMILAR(edges_gold, edges, 1e-2);
 }
 
-INSTANTIATE_TEST_CASE_P(ocl_ImgProc, Canny, testing::Combine(
+INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Canny, testing::Combine(
     testing::Values(AppertureSize(3), AppertureSize(5)),
     testing::Values(L2gradient(false), L2gradient(true))));