added ROI support to ocl::CLAHE

author Ilya Lavrenov <ilya.lavrenov@itseez.com>

Fri, 18 Oct 2013 12:31:52 +0000 (16:31 +0400)

committer Ilya Lavrenov <ilya.lavrenov@itseez.com>

Fri, 18 Oct 2013 12:31:52 +0000 (16:31 +0400)
author Ilya Lavrenov <ilya.lavrenov@itseez.com>
Fri, 18 Oct 2013 12:31:52 +0000 (16:31 +0400)
committer Ilya Lavrenov <ilya.lavrenov@itseez.com>
Fri, 18 Oct 2013 12:31:52 +0000 (16:31 +0400)
diff --git a/modules/ocl/src/imgproc.cpp b/modules/ocl/src/imgproc.cpp

index 916d25c..98ec14f 100644 (file)
--- a/modules/ocl/src/imgproc.cpp
+++ b/modules/ocl/src/imgproc.cpp
@@ -1314,6 +1314,8 @@ namespace cv
                  args.push_back( std::make_pair( sizeof(cl_int), (void *)&tilesX ));
                  args.push_back( std::make_pair( sizeof(cl_int), (void *)&clipLimit ));
                  args.push_back( std::make_pair( sizeof(cl_float), (void *)&lutScale ));
+                args.push_back( std::make_pair( sizeof(cl_int), (void *)&src.offset ));
+                args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.offset ));
  
                  String kernelName = "calcLut";
                  size_t localThreads[3]  = { 32, 8, 1 };
@@ -1333,7 +1335,7 @@ namespace cv
              }
  
              static void transform(const oclMat &src, oclMat &dst, const oclMat &lut,
-                const int tilesX, const int tilesY, const cv::Size tileSize)
+                const int tilesX, const int tilesY, const Size & tileSize)
              {
                  cl_int2 tile_size;
                  tile_size.s[0] = tileSize.width;
@@ -1351,6 +1353,9 @@ namespace cv
                  args.push_back( std::make_pair( sizeof(cl_int2), (void *)&tile_size ));
                  args.push_back( std::make_pair( sizeof(cl_int), (void *)&tilesX ));
                  args.push_back( std::make_pair( sizeof(cl_int), (void *)&tilesY ));
+                args.push_back( std::make_pair( sizeof(cl_int), (void *)&src.offset ));
+                args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.offset ));
+                args.push_back( std::make_pair( sizeof(cl_int), (void *)&lut.offset ));
  
                  size_t localThreads[3]  = { 32, 8, 1 };
                  size_t globalThreads[3] = { src.cols, src.rows, 1 };
@@ -1419,9 +1424,10 @@ namespace cv
                  }
                  else
                  {
-                    cv::ocl::copyMakeBorder(src, srcExt_, 0, tilesY_ - (src.rows % tilesY_), 0, tilesX_ - (src.cols % tilesX_), cv::BORDER_REFLECT_101, cv::Scalar());
+                    ocl::copyMakeBorder(src, srcExt_, 0, tilesY_ - (src.rows % tilesY_), 0,
+                                            tilesX_ - (src.cols % tilesX_), BORDER_REFLECT_101, Scalar::all(0));
  
-                    tileSize = cv::Size(srcExt_.cols / tilesX_, srcExt_.rows / tilesY_);
+                    tileSize = Size(srcExt_.cols / tilesX_, srcExt_.rows / tilesY_);
                      srcForLut = srcExt_;
                  }
  
diff --git a/modules/ocl/src/opencl/imgproc_clahe.cl b/modules/ocl/src/opencl/imgproc_clahe.cl

index 49c7096..55692ae 100644 (file)
--- a/modules/ocl/src/opencl/imgproc_clahe.cl
+++ b/modules/ocl/src/opencl/imgproc_clahe.cl
@@ -53,12 +53,8 @@ int calc_lut(__local int* smem, int val, int tid)
      barrier(CLK_LOCAL_MEM_FENCE);
  
      if (tid == 0)
-    {
          for (int i = 1; i < 256; ++i)
-        {
              smem[i] += smem[i - 1];
-        }
-    }
      barrier(CLK_LOCAL_MEM_FENCE);
  
      return smem[tid];
@@ -71,69 +67,51 @@ void reduce(volatile __local int* smem, int val, int tid)
      barrier(CLK_LOCAL_MEM_FENCE);
  
      if (tid < 128)
-    {
          smem[tid] = val += smem[tid + 128];
-    }
      barrier(CLK_LOCAL_MEM_FENCE);
  
      if (tid < 64)
-    {
          smem[tid] = val += smem[tid + 64];
-    }
      barrier(CLK_LOCAL_MEM_FENCE);
  
      if (tid < 32)
-    {
          smem[tid] += smem[tid + 32];
-    }
      barrier(CLK_LOCAL_MEM_FENCE);
  
      if (tid < 16)
-    {
          smem[tid] += smem[tid + 16];
-    }
      barrier(CLK_LOCAL_MEM_FENCE);
  
      if (tid < 8)
-    {
          smem[tid] += smem[tid + 8];
-    }
      barrier(CLK_LOCAL_MEM_FENCE);
  
      if (tid < 4)
-    {
          smem[tid] += smem[tid + 4];
-    }
      barrier(CLK_LOCAL_MEM_FENCE);
  
      if (tid < 2)
-    {
          smem[tid] += smem[tid + 2];
-    }
      barrier(CLK_LOCAL_MEM_FENCE);
  
      if (tid < 1)
-    {
          smem[256] = smem[tid] + smem[tid + 1];
-    }
      barrier(CLK_LOCAL_MEM_FENCE);
  }
+
  #else
+
  void reduce(__local volatile int* smem, int val, int tid)
  {
      smem[tid] = val;
      barrier(CLK_LOCAL_MEM_FENCE);
  
      if (tid < 128)
-    {
          smem[tid] = val += smem[tid + 128];
-    }
      barrier(CLK_LOCAL_MEM_FENCE);
  
      if (tid < 64)
-    {
          smem[tid] = val += smem[tid + 64];
-    }
      barrier(CLK_LOCAL_MEM_FENCE);
  
      if (tid < 32)
@@ -141,12 +119,17 @@ void reduce(__local volatile int* smem, int val, int tid)
          smem[tid] += smem[tid + 32];
  #if WAVE_SIZE < 32
      } barrier(CLK_LOCAL_MEM_FENCE);
-    if (tid < 16) {
+
+    if (tid < 16)
+    {
  #endif
          smem[tid] += smem[tid + 16];
  #if WAVE_SIZE < 16
-    } barrier(CLK_LOCAL_MEM_FENCE);
-    if (tid < 8) {
+    }
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    if (tid < 8)
+    {
  #endif
          smem[tid] += smem[tid + 8];
          smem[tid] += smem[tid + 4];
@@ -159,7 +142,8 @@ void reduce(__local volatile int* smem, int val, int tid)
  __kernel void calcLut(__global __const uchar * src, __global uchar * lut,
                        const int srcStep, const int dstStep,
                        const int2 tileSize, const int tilesX,
-                      const int clipLimit, const float lutScale)
+                      const int clipLimit, const float lutScale,
+                      const int src_offset, const int dst_offset)
  {
      __local int smem[512];
  
@@ -173,25 +157,21 @@ __kernel void calcLut(__global __const uchar * src, __global uchar * lut,
  
      for (int i = get_local_id(1); i < tileSize.y; i += get_local_size(1))
      {
-        __global const uchar* srcPtr = src + mad24( ty * tileSize.y + i,
-                                                    srcStep, tx * tileSize.x );
+        __global const uchar* srcPtr = src + mad24(ty * tileSize.y + i, srcStep, tx * tileSize.x + src_offset);
          for (int j = get_local_id(0); j < tileSize.x; j += get_local_size(0))
          {
              const int data = srcPtr[j];
              atomic_inc(&smem[data]);
          }
      }
-
      barrier(CLK_LOCAL_MEM_FENCE);
  
      int tHistVal = smem[tid];
-
      barrier(CLK_LOCAL_MEM_FENCE);
  
      if (clipLimit > 0)
      {
          // clip histogram bar
-
          int clipped = 0;
          if (tHistVal > clipLimit)
          {
@@ -200,7 +180,6 @@ __kernel void calcLut(__global __const uchar * src, __global uchar * lut,
          }
  
          // find number of overall clipped samples
-
          reduce(smem, clipped, tid);
          barrier(CLK_LOCAL_MEM_FENCE);
  #ifdef CPU
@@ -229,7 +208,7 @@ __kernel void calcLut(__global __const uchar * src, __global uchar * lut,
  
      const int lutVal = calc_lut(smem, tHistVal, tid);
      uint ires = (uint)convert_int_rte(lutScale * lutVal);
-    lut[(ty * tilesX + tx) * dstStep + tid] =
+    lut[(ty * tilesX + tx) * dstStep + tid + dst_offset] =
          convert_uchar(clamp(ires, (uint)0, (uint)255));
  }
  
@@ -239,7 +218,8 @@ __kernel void transform(__global __const uchar * src,
                          const int srcStep, const int dstStep, const int lutStep,
                          const int cols, const int rows,
                          const int2 tileSize,
-                        const int tilesX, const int tilesY)
+                        const int tilesX, const int tilesY,
+                        const int src_offset, const int dst_offset, int lut_offset)
  {
      const int x = get_global_id(0);
      const int y = get_global_id(1);
@@ -261,15 +241,15 @@ __kernel void transform(__global __const uchar * src,
      tx1 = max(tx1, 0);
      tx2 = min(tx2, tilesX - 1);
  
-    const int srcVal = src[mad24(y, srcStep, x)];
+    const int srcVal = src[mad24(y, srcStep, x + src_offset)];
  
      float res = 0;
  
-    res += lut[mad24(ty1 * tilesX + tx1, lutStep, srcVal)] * ((1.0f - xa) * (1.0f - ya));
-    res += lut[mad24(ty1 * tilesX + tx2, lutStep, srcVal)] * ((xa) * (1.0f - ya));
-    res += lut[mad24(ty2 * tilesX + tx1, lutStep, srcVal)] * ((1.0f - xa) * (ya));
-    res += lut[mad24(ty2 * tilesX + tx2, lutStep, srcVal)] * ((xa) * (ya));
+    res += lut[mad24(ty1 * tilesX + tx1, lutStep, srcVal + lut_offset)] * ((1.0f - xa) * (1.0f - ya));
+    res += lut[mad24(ty1 * tilesX + tx2, lutStep, srcVal + lut_offset)] * ((xa) * (1.0f - ya));
+    res += lut[mad24(ty2 * tilesX + tx1, lutStep, srcVal + lut_offset)] * ((1.0f - xa) * (ya));
+    res += lut[mad24(ty2 * tilesX + tx2, lutStep, srcVal + lut_offset)] * ((xa) * (ya));
  
      uint ires = (uint)convert_int_rte(res);
-    dst[mad24(y, dstStep, x)] = convert_uchar(clamp(ires, (uint)0, (uint)255));
+    dst[mad24(y, dstStep, x + dst_offset)] = convert_uchar(clamp(ires, (uint)0, (uint)255));
  }
author	Ilya Lavrenov <ilya.lavrenov@itseez.com>
	Fri, 18 Oct 2013 12:31:52 +0000 (16:31 +0400)
committer	Ilya Lavrenov <ilya.lavrenov@itseez.com>
	Fri, 18 Oct 2013 12:31:52 +0000 (16:31 +0400)
modules/ocl/src/imgproc.cpp		patch \| blob \| history
modules/ocl/src/opencl/imgproc_clahe.cl		patch \| blob \| history