removed needless divUp

author Ilya Lavrenov <ilya.lavrenov@itseez.com>

Mon, 16 Sep 2013 11:11:56 +0000 (15:11 +0400)

committer Ilya Lavrenov <ilya.lavrenov@itseez.com>

Mon, 16 Sep 2013 11:48:30 +0000 (15:48 +0400)
author Ilya Lavrenov <ilya.lavrenov@itseez.com>
Mon, 16 Sep 2013 11:11:56 +0000 (15:11 +0400)
committer Ilya Lavrenov <ilya.lavrenov@itseez.com>
Mon, 16 Sep 2013 11:48:30 +0000 (15:48 +0400)
diff --git a/modules/nonfree/src/surf.ocl.cpp b/modules/nonfree/src/surf.ocl.cpp

index de7cac2..f8c1ad7 100644 (file)
--- a/modules/nonfree/src/surf.ocl.cpp
+++ b/modules/nonfree/src/surf.ocl.cpp
@@ -82,12 +82,6 @@ namespace cv
      }
  }
  
-
-static inline size_t divUp(size_t total, size_t grain)
-{
-    return (total + grain - 1) / grain;
-}
-
  static inline int calcSize(int octave, int layer)
  {
      /* Wavelet size at first layer of first octave. */
diff --git a/modules/ocl/include/opencv2/ocl/ocl.hpp b/modules/ocl/include/opencv2/ocl/ocl.hpp

index c117d0b..42ac758 100644 (file)
--- a/modules/ocl/include/opencv2/ocl/ocl.hpp
+++ b/modules/ocl/include/opencv2/ocl/ocl.hpp
@@ -1887,6 +1887,11 @@ namespace cv
              oclMat temp4;
              oclMat temp5;
          };
+
+        static inline size_t divUp(size_t total, size_t grain)
+        {
+            return (total + grain - 1) / grain;
+        }
      }
  }
  #if defined _MSC_VER && _MSC_VER >= 1200
diff --git a/modules/ocl/src/arithm.cpp b/modules/ocl/src/arithm.cpp

index 819c013..0cc803d 100644 (file)
--- a/modules/ocl/src/arithm.cpp
+++ b/modules/ocl/src/arithm.cpp
@@ -108,13 +108,6 @@ namespace cv
      }
  }
  
-//////////////////////////////////////////////////////////////////////////
-//////////////////common/////////////////////////////////////////////////
-///////////////////////////////////////////////////////////////////////
-inline int divUp(int total, int grain)
-{
-    return (total + grain - 1) / grain;
-}
  //////////////////////////////////////////////////////////////////////////////
  /////////////////////// add subtract multiply divide /////////////////////////
  //////////////////////////////////////////////////////////////////////////////
@@ -150,10 +143,7 @@ void arithmetic_run(const oclMat &src1, const oclMat &src2, oclMat &dst,
      int cols = divUp(dst.cols * channels + offset_cols, vector_length);
  
      size_t localThreads[3]  = { 64, 4, 1 };
-    size_t globalThreads[3] = { divUp(cols, localThreads[0]) *localThreads[0],
-                                divUp(dst.rows, localThreads[1]) *localThreads[1],
-                                1
-                              };
+    size_t globalThreads[3] = { cols, dst.rows, 1 };
  
      int dst_step1 = dst.cols * dst.elemSize();
      vector<pair<size_t , const void *> > args;
@@ -226,10 +216,7 @@ static void arithmetic_run(const oclMat &src1, const oclMat &src2, oclMat &dst,
      int cols = divUp(dst.cols + offset_cols, vector_length);
  
      size_t localThreads[3]  = { 64, 4, 1 };
-    size_t globalThreads[3] = { divUp(cols, localThreads[0]) *localThreads[0],
-                                divUp(dst.rows, localThreads[1]) *localThreads[1],
-                                1
-                              };
+    size_t globalThreads[3] = { cols, dst.rows, 1 };
  
      int dst_step1 = dst.cols * dst.elemSize();
      vector<pair<size_t , const void *> > args;
@@ -338,10 +325,7 @@ void arithmetic_scalar_run(const oclMat &src1, const Scalar &src2, oclMat &dst,
      int cols = divUp(dst.cols + offset_cols, vector_length);
  
      size_t localThreads[3]  = { 64, 4, 1 };
-    size_t globalThreads[3] = { divUp(cols, localThreads[0]) *localThreads[0],
-                                divUp(dst.rows, localThreads[1]) *localThreads[1],
-                                1
-                              };
+    size_t globalThreads[3] = { cols, dst.rows, 1 };
  
      int dst_step1 = dst.cols * dst.elemSize();
      vector<pair<size_t , const void *> > args;
@@ -397,10 +381,7 @@ static void arithmetic_scalar_run(const oclMat &src, oclMat &dst, string kernelN
      int cols = divUp(dst.cols * channels + offset_cols, vector_length);
  
      size_t localThreads[3]  = { 64, 4, 1 };
-    size_t globalThreads[3] = { divUp(cols, localThreads[0]) *localThreads[0],
-                                divUp(dst.rows, localThreads[1]) *localThreads[1],
-                                1
-                              };
+    size_t globalThreads[3] = { cols, dst.rows, 1 };
  
      int dst_step1 = dst.cols * dst.elemSize();
      vector<pair<size_t , const void *> > args;
@@ -515,10 +496,8 @@ static void compare_run(const oclMat &src1, const oclMat &src2, oclMat &dst, str
      int offset_cols = (dst.offset / dst.elemSize1()) & (vector_length - 1);
      int cols = divUp(dst.cols  + offset_cols, vector_length);
      size_t localThreads[3]  = { 64, 4, 1 };
-    size_t globalThreads[3] = { divUp(cols, localThreads[0]) *localThreads[0],
-                                divUp(dst.rows, localThreads[1]) *localThreads[1],
-                                1
-                              };
+    size_t globalThreads[3] = { cols, dst.rows, 1 };
+
      int dst_step1 = dst.cols * dst.elemSize();
      vector<pair<size_t , const void *> > args;
      args.push_back( make_pair( sizeof(cl_mem), (void *)&src1.data ));
@@ -945,10 +924,7 @@ static void arithmetic_flip_rows_run(const oclMat &src, oclMat &dst, string kern
      int rows = divUp(dst.rows, 2);
  
      size_t localThreads[3]  = { 64, 4, 1 };
-    size_t globalThreads[3] = { divUp(cols, localThreads[0]) *localThreads[0],
-                                divUp(rows, localThreads[1]) *localThreads[1],
-                                1
-                              };
+    size_t globalThreads[3] = { cols, rows, 1 };
  
      int dst_step1 = dst.cols * dst.elemSize();
      vector<pair<size_t , const void *> > args;
@@ -993,10 +969,7 @@ static void arithmetic_flip_cols_run(const oclMat &src, oclMat &dst, string kern
      int rows = isVertical ?  divUp(dst.rows, 2) : dst.rows;
  
      size_t localThreads[3]  = { 64, 4, 1 };
-    size_t globalThreads[3] = { divUp(cols, localThreads[0]) *localThreads[0],
-                                divUp(rows, localThreads[1]) *localThreads[1],
-                                1
-                              };
+    size_t globalThreads[3] = { cols, rows, 1 };
  
      int dst_step1 = dst.cols * dst.elemSize();
      vector<pair<size_t , const void *> > args;
@@ -1156,10 +1129,7 @@ static void arithmetic_exp_log_run(const oclMat &src, oclMat &dst, string kernel
      int depth = dst.depth();
  
      size_t localThreads[3]  = { 64, 4, 1 };
-    size_t globalThreads[3] = { divUp(dst.cols, localThreads[0]) *localThreads[0],
-                                divUp(dst.rows, localThreads[1]) *localThreads[1],
-                                1
-                              };
+    size_t globalThreads[3] = { dst.cols, dst.rows, 1 };
  
      vector<pair<size_t , const void *> > args;
      args.push_back( make_pair( sizeof(cl_int), (void *)&src.rows ));
@@ -1201,13 +1171,9 @@ static void arithmetic_magnitude_phase_run(const oclMat &src1, const oclMat &src
      size_t vector_length = 1;
      int offset_cols = ((dst.offset % dst.step) / dst.elemSize1()) & (vector_length - 1);
      int cols = divUp(dst.cols * channels + offset_cols, vector_length);
-    int rows = dst.rows;
  
      size_t localThreads[3]  = { 64, 4, 1 };
-    size_t globalThreads[3] = { divUp(cols, localThreads[0]) *localThreads[0],
-                                divUp(rows, localThreads[1]) *localThreads[1],
-                                1
-                              };
+    size_t globalThreads[3] = { cols, dst.rows, 1 };
  
      vector<pair<size_t , const void *> > args;
      args.push_back( make_pair( sizeof(cl_mem), (void *)&src1.data ));
@@ -1252,13 +1218,9 @@ static void arithmetic_phase_run(const oclMat &src1, const oclMat &src2, oclMat
      size_t vector_length = 1;
      int offset_cols = ((dst.offset % dst.step) / dst.elemSize1()) & (vector_length - 1);
      int cols = divUp(dst.cols * channels + offset_cols, vector_length);
-    int rows = dst.rows;
  
      size_t localThreads[3]  = { 64, 4, 1 };
-    size_t globalThreads[3] = { divUp(cols, localThreads[0]) *localThreads[0],
-                                divUp(rows, localThreads[1]) *localThreads[1],
-                                1
-                              };
+    size_t globalThreads[3] = { cols, dst.rows, 1 };
  
      int dst_step1 = dst.cols * dst.elemSize();
      vector<pair<size_t , const void *> > args;
@@ -1283,15 +1245,9 @@ void cv::ocl::phase(const oclMat &x, const oclMat &y, oclMat &Angle , bool angle
      Angle.create(x.size(), x.type());
      string kernelName = angleInDegrees ? "arithm_phase_indegrees" : "arithm_phase_inradians";
      if(angleInDegrees)
-    {
          arithmetic_phase_run(x, y, Angle, kernelName, &arithm_phase);
-        //cout<<"1"<<endl;
-    }
      else
-    {
          arithmetic_phase_run(x, y, Angle, kernelName, &arithm_phase);
-        //cout<<"2"<<endl;
-    }
  }
  
  //////////////////////////////////////////////////////////////////////////////
@@ -1311,13 +1267,9 @@ static void arithmetic_cartToPolar_run(const oclMat &src1, const oclMat &src2, o
      int depth = src1.depth();
  
      int cols = src1.cols * channels;
-    int rows = src1.rows;
  
      size_t localThreads[3]  = { 64, 4, 1 };
-    size_t globalThreads[3] = { divUp(cols, localThreads[0]) *localThreads[0],
-                                divUp(rows, localThreads[1]) *localThreads[1],
-                                1
-                              };
+    size_t globalThreads[3] = { cols, src1.rows, 1 };
  
      int tmp = angleInDegrees ? 1 : 0;
      vector<pair<size_t , const void *> > args;
@@ -1333,7 +1285,7 @@ static void arithmetic_cartToPolar_run(const oclMat &src1, const oclMat &src2, o
      args.push_back( make_pair( sizeof(cl_mem), (void *)&dst_cart.data ));
      args.push_back( make_pair( sizeof(cl_int), (void *)&dst_cart.step ));
      args.push_back( make_pair( sizeof(cl_int), (void *)&dst_cart.offset ));
-    args.push_back( make_pair( sizeof(cl_int), (void *)&rows ));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&src1.rows ));
      args.push_back( make_pair( sizeof(cl_int), (void *)&cols ));
      args.push_back( make_pair( sizeof(cl_int), (void *)&tmp ));
  
@@ -1369,10 +1321,7 @@ static void arithmetic_ptc_run(const oclMat &src1, const oclMat &src2, oclMat &d
      int rows = src2.rows;
  
      size_t localThreads[3]  = { 64, 4, 1 };
-    size_t globalThreads[3] = { divUp(cols, localThreads[0]) *localThreads[0],
-                                divUp(rows, localThreads[1]) *localThreads[1],
-                                1
-                              };
+    size_t globalThreads[3] = { cols, rows, 1 };
  
      int tmp = angleInDegrees ? 1 : 0;
      vector<pair<size_t , const void *> > args;
@@ -1632,10 +1581,7 @@ static void bitwise_run(const oclMat &src1, oclMat &dst, string kernelName, cons
      int cols = divUp(dst.cols * channels + offset_cols, vector_length);
  
      size_t localThreads[3]  = { 64, 4, 1 };
-    size_t globalThreads[3] = { divUp(cols, localThreads[0]) *localThreads[0],
-                                divUp(dst.rows, localThreads[1]) *localThreads[1],
-                                1
-                              };
+    size_t globalThreads[3] = { cols, dst.rows, 1 };
  
      int dst_step1 = dst.cols * dst.elemSize();
      vector<pair<size_t , const void *> > args;
@@ -1678,10 +1624,7 @@ void bitwise_run(const oclMat &src1, const oclMat &src2, oclMat &dst, string ker
      int cols = divUp(dst.cols * channels + offset_cols, vector_length);
  
      size_t localThreads[3]  = { 64, 4, 1 };
-    size_t globalThreads[3] = { divUp(cols, localThreads[0]) *localThreads[0],
-                                divUp(dst.rows, localThreads[1]) *localThreads[1],
-                                1
-                              };
+    size_t globalThreads[3] = { cols, dst.rows, 1 };
  
      int dst_step1 = dst.cols * dst.elemSize();
      vector<pair<size_t , const void *> > args;
@@ -1739,10 +1682,7 @@ static void bitwise_run(const oclMat &src1, const oclMat &src2, oclMat &dst,
      int cols = divUp(dst.cols + offset_cols, vector_length);
  
      size_t localThreads[3]  = { 64, 4, 1 };
-    size_t globalThreads[3] = { divUp(cols, localThreads[0]) *localThreads[0],
-                                divUp(dst.rows, localThreads[1]) *localThreads[1],
-                                1
-                              };
+    size_t globalThreads[3] = { cols, dst.rows, 1 };
  
      int dst_step1 = dst.cols * dst.elemSize();
      vector<pair<size_t , const void *> > args;
@@ -1800,10 +1740,7 @@ void bitwise_scalar_run(const oclMat &src1, const Scalar &src2, oclMat &dst,
      int cols = divUp(dst.cols + offset_cols, vector_length);
  
      size_t localThreads[3]  = { 64, 4, 1 };
-    size_t globalThreads[3] = { divUp(cols, localThreads[0]) *localThreads[0],
-                                divUp(dst.rows, localThreads[1]) *localThreads[1],
-                                1
-                              };
+    size_t globalThreads[3] = { cols, dst.rows, 1 };
  
      int dst_step1 = dst.cols * dst.elemSize();
      vector<pair<size_t , const void *> > args;
@@ -2096,10 +2033,7 @@ static void transpose_run(const oclMat &src, oclMat &dst, string kernelName)
      int cols = divUp(src.cols + offset_cols, vector_length);
  
      size_t localThreads[3]  = { TILE_DIM, BLOCK_ROWS, 1 };
-    size_t globalThreads[3] = { divUp(cols, TILE_DIM) *localThreads[0],
-                                divUp(src.rows, TILE_DIM) *localThreads[1],
-                                1
-                              };
+    size_t globalThreads[3] = { cols, src.rows, 1 };
  
      vector<pair<size_t , const void *> > args;
      args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data ));
@@ -2154,10 +2088,7 @@ void cv::ocl::addWeighted(const oclMat &src1, double alpha, const oclMat &src2,
      int cols = divUp(dst.cols * channels + offset_cols, vector_length);
  
      size_t localThreads[3]  = { 256, 1, 1 };
-    size_t globalThreads[3] = { divUp(cols, localThreads[0]) *localThreads[0],
-                                divUp(dst.rows, localThreads[1]) *localThreads[1],
-                                1
-                              };
+    size_t globalThreads[3] = { cols, dst.rows, 1};
  
      int dst_step1 = dst.cols * dst.elemSize();
      int src1_step = (int) src1.step;
@@ -2220,10 +2151,7 @@ void cv::ocl::magnitudeSqr(const oclMat &src1, const oclMat &src2, oclMat &dst)
      int cols = divUp(dst.cols * channels + offset_cols, vector_length);
  
      size_t localThreads[3]  = { 256, 1, 1 };
-    size_t globalThreads[3] = { divUp(cols, localThreads[0]) *localThreads[0],
-                                divUp(dst.rows, localThreads[1]) *localThreads[1],
-                                1
-                              };
+    size_t globalThreads[3] = { cols, dst.rows, 1 };
  
      int dst_step1 = dst.cols * dst.elemSize();
      vector<pair<size_t , const void *> > args;
@@ -2268,10 +2196,7 @@ void cv::ocl::magnitudeSqr(const oclMat &src1, oclMat &dst)
      int cols = divUp(dst.cols * channels + offset_cols, vector_length);
  
      size_t localThreads[3]  = { 256, 1, 1 };
-    size_t globalThreads[3] = { divUp(cols, localThreads[0]) *localThreads[0],
-                                divUp(dst.rows, localThreads[1]) *localThreads[1],
-                                1
-                              };
+    size_t globalThreads[3] = { cols, dst.rows, 1 };
  
      int dst_step1 = dst.cols * dst.elemSize();
      vector<pair<size_t , const void *> > args;
@@ -2303,10 +2228,7 @@ static void arithmetic_pow_run(const oclMat &src1, double p, oclMat &dst, string
      int rows = dst.rows;
  
      size_t localThreads[3]  = { 64, 4, 1 };
-    size_t globalThreads[3] = { divUp(cols, localThreads[0]) *localThreads[0],
-                                divUp(rows, localThreads[1]) *localThreads[1],
-                                1
-                              };
+    size_t globalThreads[3] = { cols, rows, 1 };
  
      int dst_step1 = dst.cols * dst.elemSize();
      vector<pair<size_t , const void *> > args;
diff --git a/modules/ocl/src/canny.cpp b/modules/ocl/src/canny.cpp

index 82bb01b..4c7b988 100644 (file)
--- a/modules/ocl/src/canny.cpp
+++ b/modules/ocl/src/canny.cpp
@@ -360,14 +360,13 @@ void canny::edgesHysteresisGlobal_gpu(oclMat &map, oclMat &st1, oclMat &st2, voi
      vector< pair<size_t, const void *> > args;
      size_t localThreads[3]  = {128, 1, 1};
  
-#define DIVUP(a, b) ((a)+(b)-1)/(b)
      int count_i[1] = {0};
      while(count > 0)
      {
          openCLSafeCall(clEnqueueWriteBuffer(*(cl_command_queue*)getoclCommandQueue(), (cl_mem)counter, 1, 0, sizeof(int), &count_i, 0, NULL, NULL));
  
          args.clear();
-        size_t globalThreads[3] = {std::min(count, 65535u) * 128, DIVUP(count, 65535), 1};
+        size_t globalThreads[3] = {std::min(count, 65535u) * 128, divUp(count, 65535), 1};
          args.push_back( make_pair( sizeof(cl_mem), (void *)&map.data));
          args.push_back( make_pair( sizeof(cl_mem), (void *)&st1.data));
          args.push_back( make_pair( sizeof(cl_mem), (void *)&st2.data));
@@ -382,7 +381,6 @@ void canny::edgesHysteresisGlobal_gpu(oclMat &map, oclMat &st1, oclMat &st2, voi
          openCLSafeCall(clEnqueueReadBuffer(*(cl_command_queue*)getoclCommandQueue(), (cl_mem)counter, 1, 0, sizeof(int), &count, 0, NULL, NULL));
          std::swap(st1, st2);
      }
-#undef DIVUP
  }
  
  void canny::getEdges_gpu(oclMat &map, oclMat &dst, int rows, int cols)
diff --git a/modules/ocl/src/filtering.cpp b/modules/ocl/src/filtering.cpp

index c055798..e252d85 100644 (file)
--- a/modules/ocl/src/filtering.cpp
+++ b/modules/ocl/src/filtering.cpp
@@ -70,20 +70,10 @@ extern const char *filtering_adaptive_bilateral;
  
  namespace
  {
-inline int divUp(int total, int grain)
-{
-    return (total + grain - 1) / grain;
-}
-}
-
-namespace
-{
  inline void normalizeAnchor(int &anchor, int ksize)
  {
      if (anchor < 0)
-    {
          anchor = ksize >> 1;
-    }
  
      CV_Assert(0 <= anchor && anchor < ksize);
  }
@@ -97,9 +87,7 @@ inline void normalizeAnchor(Point &anchor, const Size &ksize)
  inline void normalizeROI(Rect &roi, const Size &ksize, const Point &anchor, const Size &src_size)
  {
      if (roi == Rect(0, 0, -1, -1))
-    {
          roi = Rect(0, 0, src_size.width, src_size.height);
-    }
  
      CV_Assert(ksize.height > 0 && ksize.width > 0 && ((ksize.height & 1) == 1) && ((ksize.width & 1) == 1));
      CV_Assert((anchor.x == -1 && anchor.y == -1) || (anchor.x == ksize.width >> 1 && anchor.y == ksize.height >> 1));
@@ -112,10 +100,7 @@ inline void normalizeKernel(const Mat &kernel, oclMat &gpu_krnl, int type = CV_8
      int scale = nDivisor && (kernel.depth() == CV_32F || kernel.depth() == CV_64F) ? 256 : 1;
  
      if (nDivisor)
-    {
          *nDivisor = scale;
-    }
-
      Mat temp(kernel.size(), type);
      kernel.convertTo(temp, type, scale);
      Mat cont_krnl = temp.reshape(1, 1);
@@ -125,9 +110,7 @@ inline void normalizeKernel(const Mat &kernel, oclMat &gpu_krnl, int type = CV_8
          int count = cont_krnl.cols >> 1;
  
          for (int i = 0; i < count; ++i)
-        {
              std::swap(cont_krnl.at<int>(0, i), cont_krnl.at<int>(0, cont_krnl.cols - 1 - i));
-        }
      }
  
      gpu_krnl.upload(cont_krnl);
@@ -627,8 +610,6 @@ static void GPUFilter2D(const oclMat &src, oclMat &dst, const oclMat &mat_kernel
      int localWidth = localThreads[0] + paddingPixels;
      int localHeight = localThreads[1] + paddingPixels;
  
-    // 260 = divup((localThreads[0] + filterWidth * 2), 4) * 4
-    // 6   = (ROWS_PER_GROUP_WHICH_IS_4 + filterWidth * 2)
      size_t localMemSize = ksize_3x3 ? 260 * 6 * src.elemSize() : (localWidth * localHeight) * src.elemSize();
  
      int vector_lengths[4][7] = {{4, 4, 4, 4, 4, 4, 4},
@@ -1713,4 +1694,4 @@ void cv::ocl::adaptiveBilateralFilter(const oclMat& src, oclMat& dst, Size ksize
  
      openCLExecuteKernel(Context::getContext(), &filtering_adaptive_bilateral, kernelName,
          globalThreads, localThreads, args, cn, depth, build_options);
-}
-\ No newline at end of file
+}
diff --git a/modules/ocl/src/hog.cpp b/modules/ocl/src/hog.cpp

index 4aafb47..5587282 100644 (file)
--- a/modules/ocl/src/hog.cpp
+++ b/modules/ocl/src/hog.cpp
@@ -124,11 +124,6 @@ namespace cv
  
  using namespace ::cv::ocl::device;
  
-static inline int divUp(int total, int grain)
-{
-    return (total + grain - 1) / grain;
-}
-
  cv::ocl::HOGDescriptor::HOGDescriptor(Size win_size_, Size block_size_, Size block_stride_,
                                        Size cell_size_, int nbins_, double win_sigma_,
                                        double threshold_L2hys_, bool gamma_correction_, int nlevels_)
@@ -1671,7 +1666,8 @@ void cv::ocl::device::hog::compute_hists(int nbins,
      {
          openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads,
              localThreads, args, -1, -1, "-D CPU");
-    }else
+    }
+    else
      {
          cl_kernel kernel = openCLGetKernelFromSource(clCxt, &objdetect_hog, kernelName);
          int wave_size = queryDeviceInfo<WAVEFRONT_SIZE, int>(kernel);
diff --git a/modules/ocl/src/imgproc.cpp b/modules/ocl/src/imgproc.cpp

index ff509fb..2ed786f 100644 (file)
--- a/modules/ocl/src/imgproc.cpp
+++ b/modules/ocl/src/imgproc.cpp
@@ -1518,11 +1518,6 @@ namespace cv
          // CLAHE
          namespace clahe
          {
-            inline int divUp(int total, int grain)
-            {
-                return (total + grain - 1) / grain * grain;
-            }
-
              static void calcLut(const oclMat &src, oclMat &dst,
                  const int tilesX, const int tilesY, const cv::Size tileSize,
                  const int clipLimit, const float lutScale)
@@ -1546,9 +1541,7 @@ namespace cv
                  size_t globalThreads[3] = { tilesX * localThreads[0], tilesY * localThreads[1], 1 };
                  bool is_cpu = queryDeviceInfo<IS_CPU_DEVICE, bool>();
                  if (is_cpu)
-                {
                      openCLExecuteKernel(Context::getContext(), &imgproc_clahe, kernelName, globalThreads, localThreads, args, -1, -1, (char*)" -D CPU");
-                }
                  else
                  {
                      cl_kernel kernel = openCLGetKernelFromSource(Context::getContext(), &imgproc_clahe, kernelName);
@@ -1583,7 +1576,7 @@ namespace cv
  
                  String kernelName = "transform";
                  size_t localThreads[3]  = { 32, 8, 1 };
-                size_t globalThreads[3] = { divUp(src.cols, localThreads[0]), divUp(src.rows, localThreads[1]), 1 };
+                size_t globalThreads[3] = { src.cols, src.rows, 1 };
  
                  openCLExecuteKernel(Context::getContext(), &imgproc_clahe, kernelName, globalThreads, localThreads, args, -1, -1);
              }
@@ -1801,10 +1794,7 @@ namespace cv
      }
  }
  //////////////////////////////////convolve////////////////////////////////////////////////////
-inline int divUp(int total, int grain)
-{
-    return (total + grain - 1) / grain;
-}
+
  static void convolve_run(const oclMat &src, const oclMat &temp1, oclMat &dst, string kernelName, const char **kernelString)
  {
      CV_Assert(src.depth() == CV_32FC1);
@@ -1826,10 +1816,7 @@ static void convolve_run(const oclMat &src, const oclMat &temp1, oclMat &dst, st
      int rows = dst.rows;
  
      size_t localThreads[3]  = { 16, 16, 1 };
-    size_t globalThreads[3] = { divUp(cols, localThreads[0]) *localThreads[0],
-                                divUp(rows, localThreads[1]) *localThreads[1],
-                                1
-                              };
+    size_t globalThreads[3] = { cols, rows, 1 };
  
      vector<pair<size_t , const void *> > args;
      args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data ));
diff --git a/modules/ocl/src/initialization.cpp b/modules/ocl/src/initialization.cpp

index 564b403..8f5fae3 100644 (file)
--- a/modules/ocl/src/initialization.cpp
+++ b/modules/ocl/src/initialization.cpp
@@ -285,11 +285,6 @@ namespace cv
              return 0;
          }
  
-        inline int divUp(int total, int grain)
-        {
-            return (total + grain - 1) / grain;
-        }
-
          int getDevice(std::vector<Info> &oclinfo, int devicetype)
          {
              //TODO: cache oclinfo vector
@@ -707,11 +702,10 @@ namespace cv
  
              if ( localThreads != NULL)
              {
-                globalThreads[0] = divUp(globalThreads[0], localThreads[0]) * localThreads[0];
-                globalThreads[1] = divUp(globalThreads[1], localThreads[1]) * localThreads[1];
-                globalThreads[2] = divUp(globalThreads[2], localThreads[2]) * localThreads[2];
+                globalThreads[0] = alignSize(globalThreads[0], localThreads[0]);
+                globalThreads[1] = alignSize(globalThreads[1], localThreads[1]);
+                globalThreads[2] = alignSize(globalThreads[2], localThreads[2]);
  
-                //size_t blockSize = localThreads[0] * localThreads[1] * localThreads[2];
                  cv::ocl::openCLVerifyKernel(clCxt, kernel, localThreads);
              }
              for(size_t i = 0; i < args.size(); i ++)
@@ -742,10 +736,6 @@ namespace cv
              execute_time = (double)(end_time - start_time) / (1000 * 1000);
              total_time = (double)(end_time - queue_time) / (1000 * 1000);
  
-            // cout << setiosflags(ios::left) << setw(15) << execute_time;
-            // cout << setiosflags(ios::left) << setw(15) << total_time - execute_time;
-            // cout << setiosflags(ios::left) << setw(15) << total_time << endl;
-
              total_execute_time += execute_time;
              total_kernel_time += total_time;
              clReleaseEvent(event);
diff --git a/modules/ocl/src/matrix_operations.cpp b/modules/ocl/src/matrix_operations.cpp

index 82189b7..58369e9 100644 (file)
--- a/modules/ocl/src/matrix_operations.cpp
+++ b/modules/ocl/src/matrix_operations.cpp
@@ -307,11 +307,6 @@ void cv::ocl::oclMat::download(cv::Mat &m) const
      m.adjustROI(-ofs.y, ofs.y + rows - wholerows, -ofs.x, ofs.x + cols - wholecols);
  }
  
-/////////////////////common//////////////////////////////////////
-inline int divUp(int total, int grain)
-{
-    return (total + grain - 1) / grain;
-}
  ///////////////////////////////////////////////////////////////////////////
  ////////////////////////////////// CopyTo /////////////////////////////////
  ///////////////////////////////////////////////////////////////////////////
@@ -331,11 +326,7 @@ static void copy_to_with_mask(const oclMat &src, oclMat &dst, const oclMat &mask
      char compile_option[32];
      sprintf(compile_option, "-D GENTYPE=%s", string_types[dst.oclchannels() - 1][dst.depth()].c_str());
      size_t localThreads[3] = {16, 16, 1};
-    size_t globalThreads[3];
-
-    globalThreads[0] = divUp(dst.cols, localThreads[0]) * localThreads[0];
-    globalThreads[1] = divUp(dst.rows, localThreads[1]) * localThreads[1];
-    globalThreads[2] = 1;
+    size_t globalThreads[3] = { dst.cols, dst.rows, 1 };
  
      int dststep_in_pixel = dst.step / dst.elemSize(), dstoffset_in_pixel = dst.offset / dst.elemSize();
      int srcstep_in_pixel = src.step / src.elemSize(), srcoffset_in_pixel = src.offset / src.elemSize();
diff --git a/modules/ocl/src/mcwutil.cpp b/modules/ocl/src/mcwutil.cpp

index 2966d53..fc94e2f 100644 (file)
--- a/modules/ocl/src/mcwutil.cpp
+++ b/modules/ocl/src/mcwutil.cpp
@@ -71,12 +71,6 @@ namespace cv
  {
      namespace ocl
      {
-
-        inline int divUp(int total, int grain)
-        {
-            return (total + grain - 1) / grain;
-        }
-
          // provide additional methods for the user to interact with the command queue after a task is fired
          static void openCLExecuteKernel_2(Context *clCxt , const char **source, string kernelName, size_t globalThreads[3],
                                     size_t localThreads[3],  vector< pair<size_t, const void *> > &args, int channels,
diff --git a/modules/ocl/src/optical_flow_farneback.cpp b/modules/ocl/src/optical_flow_farneback.cpp

index e622446..618a637 100644 (file)
--- a/modules/ocl/src/optical_flow_farneback.cpp
+++ b/modules/ocl/src/optical_flow_farneback.cpp
@@ -73,11 +73,6 @@ oclMat gKer;
  
  float ig[4];
  
-inline int divUp(int total, int grain)
-{
-    return (total + grain - 1) / grain;
-}
-
  inline void setGaussianBlurKernel(const float *c_gKer, int ksizeHalf)
  {
      cv::Mat t_gKer(1, ksizeHalf + 1, CV_32FC1, const_cast<float *>(c_gKer));
@@ -88,7 +83,7 @@ static void gaussianBlurOcl(const oclMat &src, int ksizeHalf, oclMat &dst)
  {
      string kernelName("gaussianBlur");
      size_t localThreads[3] = { 256, 1, 1 };
-    size_t globalThreads[3] = { divUp(src.cols, localThreads[0]) * localThreads[0], src.rows, 1 };
+    size_t globalThreads[3] = { src.cols, src.rows, 1 };
      int smem_size = (localThreads[0] + 2*ksizeHalf) * sizeof(float);
  
      CV_Assert(dst.size() == src.size());
@@ -138,10 +133,7 @@ static void updateMatricesOcl(const oclMat &flowx, const oclMat &flowy, const oc
  {
      string kernelName("updateMatrices");
      size_t localThreads[3] = { 32, 8, 1 };
-    size_t globalThreads[3] = { divUp(flowx.cols, localThreads[0]) * localThreads[0],
-                                divUp(flowx.rows, localThreads[1]) * localThreads[1],
-                                1
-                              };
+    size_t globalThreads[3] = { flowx.cols, flowx.rows, 1 };
  
      std::vector< std::pair<size_t, const void *> > args;
      args.push_back(std::make_pair(sizeof(cl_mem), (void *)&M.data));
@@ -166,7 +158,7 @@ static void boxFilter5Ocl(const oclMat &src, int ksizeHalf, oclMat &dst)
      string kernelName("boxFilter5");
      int height = src.rows / 5;
      size_t localThreads[3] = { 256, 1, 1 };
-    size_t globalThreads[3] = { divUp(src.cols, localThreads[0]) * localThreads[0], height, 1 };
+    size_t globalThreads[3] = { src.cols, height, 1 };
      int smem_size = (localThreads[0] + 2*ksizeHalf) * 5 * sizeof(float);
  
      std::vector< std::pair<size_t, const void *> > args;
@@ -188,10 +180,7 @@ static void updateFlowOcl(const oclMat &M, oclMat &flowx, oclMat &flowy)
      string kernelName("updateFlow");
      int cols = divUp(flowx.cols, 4);
      size_t localThreads[3] = { 32, 8, 1 };
-    size_t globalThreads[3] = { divUp(cols, localThreads[0]) * localThreads[0],
-                                divUp(flowx.rows, localThreads[1]) * localThreads[0],
-                                1
-                              };
+    size_t globalThreads[3] = { cols, flowx.rows, 1 };
  
      std::vector< std::pair<size_t, const void *> > args;
      args.push_back(std::make_pair(sizeof(cl_mem), (void *)&flowx.data));
@@ -211,9 +200,8 @@ static void gaussianBlur5Ocl(const oclMat &src, int ksizeHalf, oclMat &dst)
  {
      string kernelName("gaussianBlur5");
      int height = src.rows / 5;
-    int width = src.cols;
      size_t localThreads[3] = { 256, 1, 1 };
-    size_t globalThreads[3] = { divUp(width, localThreads[0]) * localThreads[0], height, 1 };
+    size_t globalThreads[3] = { src.cols, height, 1 };
      int smem_size = (localThreads[0] + 2*ksizeHalf) * 5 * sizeof(float);
  
      std::vector< std::pair<size_t, const void *> > args;
@@ -222,7 +210,7 @@ static void gaussianBlur5Ocl(const oclMat &src, int ksizeHalf, oclMat &dst)
      args.push_back(std::make_pair(sizeof(cl_mem), (void *)&gKer.data));
      args.push_back(std::make_pair(smem_size, (void *)NULL));
      args.push_back(std::make_pair(sizeof(cl_int), (void *)&height));
-    args.push_back(std::make_pair(sizeof(cl_int), (void *)&width));
+    args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.cols));
      args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.step));
      args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.step));
      args.push_back(std::make_pair(sizeof(cl_int), (void *)&ksizeHalf));
diff --git a/modules/ocl/src/split_merge.cpp b/modules/ocl/src/split_merge.cpp

index de3d270..79bd0f0 100644 (file)
--- a/modules/ocl/src/split_merge.cpp
+++ b/modules/ocl/src/split_merge.cpp
@@ -73,61 +73,6 @@ namespace cv
      {
          namespace split_merge
          {
-            ///////////////////////////////////////////////////////////
-            ///////////////common/////////////////////////////////////
-            /////////////////////////////////////////////////////////
-            inline int divUp(int total, int grain)
-            {
-                return (total + grain - 1) / grain;
-            }
-            ////////////////////////////////////////////////////////////////////////////
-            ////////////////////merge//////////////////////////////////////////////////
-            ////////////////////////////////////////////////////////////////////////////
-            // static void merge_vector_run_no_roi(const oclMat *mat_src, size_t n, oclMat &mat_dst)
-            // {
-            //     Context  *clCxt = mat_dst.clCxt;
-            //     int channels = mat_dst.oclchannels();
-            //     int depth = mat_dst.depth();
-
-            //     string kernelName = "merge_vector";
-
-            //     int indexes[4][7] = {{0, 0, 0, 0, 0, 0, 0},
-            //         {4, 4, 2, 2, 1, 1, 1},
-            //         {4, 4, 2, 2 , 1, 1, 1},
-            //         {4, 4, 2, 2, 1, 1, 1}
-            //     };
-
-            //     size_t index = indexes[channels - 1][mat_dst.depth()];
-            //     int    cols = divUp(mat_dst.cols, index);
-            //     size_t localThreads[3]  = { 64, 4, 1 };
-            //     size_t globalThreads[3] = { divUp(cols, localThreads[0]) *localThreads[0],
-            //                                 divUp(mat_dst.rows, localThreads[1]) *localThreads[1],
-            //                                 1
-            //                               };
-
-            //     vector<pair<size_t , const void *> > args;
-            //     args.push_back( make_pair( sizeof(cl_int), (void *)&mat_dst.rows));
-            //     args.push_back( make_pair( sizeof(cl_int), (void *)&cols));
-            //     args.push_back( make_pair( sizeof(cl_mem), (void *)&mat_dst.data));
-            //     args.push_back( make_pair( sizeof(cl_int), (void *)&mat_dst.step));
-            //     args.push_back( make_pair( sizeof(cl_mem), (void *)&mat_src[0].data));
-            //     args.push_back( make_pair( sizeof(cl_int), (void *)&mat_src[0].step));
-            //     args.push_back( make_pair( sizeof(cl_mem), (void *)&mat_src[1].data));
-            //     args.push_back( make_pair( sizeof(cl_int), (void *)&mat_src[1].step));
-            //     if(n >= 3)
-            //     {
-            //         args.push_back( make_pair( sizeof(cl_mem), (void *)&mat_src[2].data));
-            //         args.push_back( make_pair( sizeof(cl_int), (void *)&mat_src[2].step));
-            //     }
-            //     if(n >= 4)
-            //     {
-            //         args.push_back( make_pair( sizeof(cl_mem), (void *)&mat_src[3].data));
-            //         args.push_back( make_pair( sizeof(cl_int), (void *)&mat_src[3].step));
-            //     }
-
-            //     openCLExecuteKernel(clCxt, &merge_mat, kernelName, globalThreads, localThreads, args, channels, depth);
-            // }
-
              static void merge_vector_run(const oclMat *mat_src, size_t n, oclMat &mat_dst)
              {
                  if(!mat_dst.clCxt->supportsFeature(Context::CL_DOUBLE) && mat_dst.type() == CV_64F)
@@ -153,10 +98,7 @@ namespace cv
                  int cols = divUp(mat_dst.cols + offset_cols, vector_length);
  
                  size_t localThreads[3]  = { 64, 4, 1 };
-                size_t globalThreads[3] = { divUp(cols, localThreads[0]) *localThreads[0],
-                                            divUp(mat_dst.rows, localThreads[1]) *localThreads[1],
-                                            1
-                                          };
+                size_t globalThreads[3] = { cols, mat_dst.rows, 1 };
  
                  int dst_step1 = mat_dst.cols * mat_dst.elemSize();
                  vector<pair<size_t , const void *> > args;
@@ -176,10 +118,6 @@ namespace cv
                      args.push_back( make_pair( sizeof(cl_int), (void *)&mat_src[2].step));
                      args.push_back( make_pair( sizeof(cl_int), (void *)&mat_src[2].offset));
  
-                    // if channel == 3, then the matrix will convert to channel =4
-                    //if(n == 3)
-                    //   args.push_back( make_pair( sizeof(cl_int), (void *)&offset_cols));
-
                      if(n == 3)
                      {
                          args.push_back( make_pair( sizeof(cl_mem), (void *)&mat_src[2].data));
@@ -229,53 +167,6 @@ namespace cv
                  mat_dst.create(size, CV_MAKETYPE(depth, total_channels));
                  merge_vector_run(mat_src, n, mat_dst);
              }
-            ////////////////////////////////////////////////////////////////////////////////////////////////////
-            //////////////////////////////////////split/////////////////////////////////////////////////////////////
-            //////////////////////////////////////////////////////////////////////////////////////////////////
-            // static void split_vector_run_no_roi(const oclMat &mat_src, oclMat *mat_dst)
-            // {
-            //     Context  *clCxt = mat_src.clCxt;
-            //     int channels = mat_src.oclchannels();
-            //     int depth = mat_src.depth();
-
-            //     string kernelName = "split_vector";
-
-            //     int indexes[4][7] = {{0, 0, 0, 0, 0, 0, 0},
-            //         {8, 8, 8, 8, 4, 4, 2},
-            //         {8, 8, 8, 8 , 4, 4, 4},
-            //         {4, 4, 2, 2, 1, 1, 1}
-            //     };
-
-            //     size_t index = indexes[channels - 1][mat_dst[0].depth()];
-            //     int cols = divUp(mat_src.cols, index);
-            //     size_t localThreads[3]  = { 64, 4, 1 };
-            //     size_t globalThreads[3] = { divUp(cols, localThreads[0]) *localThreads[0],
-            //                                 divUp(mat_src.rows, localThreads[1]) *localThreads[1],
-            //                                 1
-            //                               };
-
-            //     vector<pair<size_t , const void *> > args;
-            //     args.push_back( make_pair( sizeof(cl_mem), (void *)&mat_src.data));
-            //     args.push_back( make_pair( sizeof(cl_int), (void *)&mat_src.step));
-            //     args.push_back( make_pair( sizeof(cl_int), (void *)&mat_src.rows));
-            //     args.push_back( make_pair( sizeof(cl_int), (void *)&cols));
-            //     args.push_back( make_pair( sizeof(cl_mem), (void *)&mat_dst[0].data));
-            //     args.push_back( make_pair( sizeof(cl_int), (void *)&mat_dst[0].step));
-            //     args.push_back( make_pair( sizeof(cl_mem), (void *)&mat_dst[1].data));
-            //     args.push_back( make_pair( sizeof(cl_int), (void *)&mat_dst[1].step));
-            //     if(channels >= 3)
-            //     {
-            //         args.push_back( make_pair( sizeof(cl_mem), (void *)&mat_dst[2].data));
-            //         args.push_back( make_pair( sizeof(cl_int), (void *)&mat_dst[2].step));
-            //     }
-            //     if(channels >= 4)
-            //     {
-            //         args.push_back( make_pair( sizeof(cl_mem), (void *)&mat_dst[3].data));
-            //         args.push_back( make_pair( sizeof(cl_int), (void *)&mat_dst[3].step));
-            //     }
-
-            //     openCLExecuteKernel(clCxt, &split_mat, kernelName, globalThreads, localThreads, args, channels, depth);
-            // }
              static void split_vector_run(const oclMat &mat_src, oclMat *mat_dst)
              {
  
@@ -311,9 +202,7 @@ namespace cv
                              : divUp(mat_src.cols + max_offset_cols, vector_length);
  
                  size_t localThreads[3]  = { 64, 4, 1 };
-                size_t globalThreads[3] = { divUp(cols, localThreads[0]) *localThreads[0],
-                                            divUp(mat_src.rows, localThreads[1]) *localThreads[1], 1
-                                          };
+                size_t globalThreads[3] = { cols, mat_src.rows, 1 };
  
                  int dst_step1 = mat_dst[0].cols * mat_dst[0].elemSize();
                  vector<pair<size_t , const void *> > args;
diff --git a/modules/ocl/src/stereo_csbp.cpp b/modules/ocl/src/stereo_csbp.cpp

index 1ae70c0..b119ead 100644 (file)
--- a/modules/ocl/src/stereo_csbp.cpp
+++ b/modules/ocl/src/stereo_csbp.cpp
@@ -96,13 +96,6 @@ namespace cv
      {
          namespace stereoCSBP
          {
-            //////////////////////////////////////////////////////////////////////////
-            //////////////////////////////common////////////////////////////////////
-            ////////////////////////////////////////////////////////////////////////
-            static inline int divUp(int total, int grain)
-            {
-                return (total + grain - 1) / grain;
-            }
              static string get_kernel_name(string kernel_name, int data_type)
              {
                  stringstream idxStr;
@@ -132,10 +125,7 @@ namespace cv
  
                  //size_t blockSize = 256;
                  size_t localThreads[]  = {32, 8 ,1};
-                size_t globalThreads[] = {divUp(w, localThreads[0]) *localThreads[0],
-                    divUp(h, localThreads[1]) *localThreads[1],
-                    1
-                };
+                size_t globalThreads[] = { w, h, 1 };
  
                  int cdisp_step1 = msg_step * h;
                  openCLVerifyKernel(clCxt, kernel,  localThreads);
@@ -177,7 +167,7 @@ namespace cv
                  const int threadsNum = 256;
                  //size_t blockSize = threadsNum;
                  size_t localThreads[3]  = {win_size, 1, threadsNum / win_size};
-                size_t globalThreads[3] = {w *localThreads[0],
+                size_t globalThreads[3] = { w *localThreads[0],
                      h * divUp(rthis.ndisp, localThreads[2]) *localThreads[1], 1 * localThreads[2]
                  };
  
@@ -222,10 +212,7 @@ namespace cv
  
                  //size_t blockSize = 256;
                  size_t localThreads[]  = {32, 8 ,1};
-                size_t globalThreads[] = {divUp(w, localThreads[0]) *localThreads[0],
-                    divUp(h, localThreads[1]) *localThreads[1],
-                    1
-                };
+                size_t globalThreads[] = { w, h, 1 };
  
                  int disp_step = msg_step * h;
                  openCLVerifyKernel(clCxt, kernel, localThreads);
@@ -257,10 +244,7 @@ namespace cv
  
                  //size_t blockSize = 256;
                  size_t localThreads[]  = {32, 8, 1};
-                size_t globalThreads[] = {divUp(w, localThreads[0]) *localThreads[0],
-                    divUp(h, localThreads[1]) *localThreads[1],
-                    1
-                };
+                size_t globalThreads[] = { w, h, 1 };
  
                  int disp_step = msg_step * h;
                  openCLVerifyKernel(clCxt, kernel, localThreads);
@@ -291,14 +275,10 @@ namespace cv
                      init_data_cost_reduce_caller(left, right, temp, rthis, msg_step, h, w, level);
  
                  if(rthis.use_local_init_data_cost == true)
-                {
                      get_first_initial_local_caller(data_cost_selected, disp_selected_pyr, temp, rthis, h, w, nr_plane, msg_step);
-                }
                  else
-                {
                      get_first_initial_global_caller(data_cost_selected, disp_selected_pyr, temp, rthis, h, w,
                          nr_plane, msg_step);
-                }
              }
  
              ///////////////////////////////////////////////////////////////////////////////////////////////////
@@ -317,12 +297,8 @@ namespace cv
  
                  cl_kernel kernel = openCLGetKernelFromSource(clCxt, &stereocsbp, kernelName);
  
-                //size_t blockSize = 256;
-                size_t localThreads[]  = {32, 8, 1};
-                size_t globalThreads[] = {divUp(w, localThreads[0]) *localThreads[0],
-                    divUp(h, localThreads[1]) *localThreads[1],
-                    1
-                };
+                size_t localThreads[]  = { 32, 8, 1 };
+                size_t globalThreads[] = { w, h, 1 };
  
                  int disp_step1 = msg_step1 * h;
                  int disp_step2 = msg_step2 * h2;
@@ -366,8 +342,8 @@ namespace cv
  
                  const size_t threadsNum = 256;
                  //size_t blockSize = threadsNum;
-                size_t localThreads[3]  = {win_size, 1, threadsNum / win_size};
-                size_t globalThreads[3] = {w *localThreads[0],
+                size_t localThreads[3]  = { win_size, 1, threadsNum / win_size };
+                size_t globalThreads[3] = { w *localThreads[0],
                      h * divUp(nr_plane, localThreads[2]) *localThreads[1], 1 * localThreads[2]
                  };
  
@@ -431,10 +407,7 @@ namespace cv
  
                  //size_t blockSize = 256;
                  size_t localThreads[]  = {32, 8, 1};
-                size_t globalThreads[] = {divUp(w, localThreads[0]) *localThreads[0],
-                    divUp(h, localThreads[1]) *localThreads[1],
-                    1
-                };
+                size_t globalThreads[] = { w, h, 1 };
  
                  int disp_step1 = msg_step1 * h;
                  int disp_step2 = msg_step2 * h2;
@@ -535,10 +508,7 @@ namespace cv
  
                  //size_t blockSize = 256;
                  size_t localThreads[]  = {32, 8, 1};
-                size_t globalThreads[] = {divUp(disp.cols, localThreads[0]) *localThreads[0],
-                    divUp(disp.rows, localThreads[1]) *localThreads[1],
-                    1
-                };
+                size_t globalThreads[] = { disp.cols, disp.rows, 1 };
  
                  int step_size = disp.step / disp.elemSize();
                  int disp_step = disp.rows * msg_step;
diff --git a/modules/ocl/src/stereobm.cpp b/modules/ocl/src/stereobm.cpp

index 151a7ea..8195346 100644 (file)
--- a/modules/ocl/src/stereobm.cpp
+++ b/modules/ocl/src/stereobm.cpp
@@ -96,10 +96,7 @@ static void prefilter_xsobel(const oclMat &input, oclMat &output, int prefilterC
  #define N_DISPARITIES 8
  #define ROWSperTHREAD 21
  #define BLOCK_W 128
-static inline int divUp(int total, int grain)
-{
-    return (total + grain - 1) / grain;
-}
+
  ////////////////////////////////////////////////////////////////////////////
  ///////////////////////////////stereoBM_GPU////////////////////////////////
  ////////////////////////////////////////////////////////////////////////////
@@ -117,11 +114,10 @@ static void stereo_bm(const oclMat &left, const oclMat &right,  oclMat &disp,
      size_t local_mem_size = (N_DISPARITIES * (BLOCK_W + 2 * winsz2)) *
                              sizeof(cl_uint);
      //size_t blockSize = 1;
-    size_t localThreads[]  = { BLOCK_W, 1,1};
-    size_t globalThreads[] = { divUp(left.cols - maxdisp - 2 * winsz2, BLOCK_W) *BLOCK_W,
+    size_t localThreads[]  = { BLOCK_W, 1, 1 };
+    size_t globalThreads[] = { left.cols - maxdisp - 2 * winsz2,
                                 divUp(left.rows - 2 * winsz2, ROWSperTHREAD),
-                               1
-                             };
+                               1 };
  
      std::vector< std::pair<size_t, const void *> > args;
      args.push_back(std::make_pair(sizeof(cl_mem), (void *)&left.data));
@@ -151,10 +147,9 @@ static void postfilter_textureness(oclMat &left, int winSize,
  
      size_t blockSize = 1;
      size_t localThreads[]  = { BLOCK_W, blockSize ,1};
-    size_t globalThreads[] = { divUp(left.cols, BLOCK_W) *BLOCK_W,
+    size_t globalThreads[] = { left.cols,
                                 divUp(left.rows, 2 * ROWSperTHREAD),
-                               1
-                             };
+                               1 };
  
      size_t local_mem_size = (localThreads[0] + localThreads[0] + (winSize / 2) * 2) * sizeof(float);
  
diff --git a/modules/ocl/src/stereobp.cpp b/modules/ocl/src/stereobp.cpp

index 4a326b8..fe91360 100644 (file)
--- a/modules/ocl/src/stereobp.cpp
+++ b/modules/ocl/src/stereobp.cpp
@@ -104,10 +104,7 @@ namespace cv
              {
                  openCLFree(cl_con_struct);
              }
-            static inline int divUp(int total, int grain)
-            {
-                return (total + grain - 1) / grain;
-            }
+
              /////////////////////////////////////////////////////////////////////////////
              ///////////////////////////comp data////////////////////////////////////////
              /////////////////////////////////////////////////////////////////////////
author	Ilya Lavrenov <ilya.lavrenov@itseez.com>
	Mon, 16 Sep 2013 11:11:56 +0000 (15:11 +0400)
committer	Ilya Lavrenov <ilya.lavrenov@itseez.com>
	Mon, 16 Sep 2013 11:48:30 +0000 (15:48 +0400)
modules/nonfree/src/surf.ocl.cpp		patch \| blob \| history
modules/ocl/include/opencv2/ocl/ocl.hpp		patch \| blob \| history
modules/ocl/src/arithm.cpp		patch \| blob \| history
modules/ocl/src/canny.cpp		patch \| blob \| history
modules/ocl/src/filtering.cpp		patch \| blob \| history
modules/ocl/src/hog.cpp		patch \| blob \| history
modules/ocl/src/imgproc.cpp		patch \| blob \| history
modules/ocl/src/initialization.cpp		patch \| blob \| history
modules/ocl/src/matrix_operations.cpp		patch \| blob \| history
modules/ocl/src/mcwutil.cpp		patch \| blob \| history
modules/ocl/src/optical_flow_farneback.cpp		patch \| blob \| history
modules/ocl/src/split_merge.cpp		patch \| blob \| history
modules/ocl/src/stereo_csbp.cpp		patch \| blob \| history
modules/ocl/src/stereobm.cpp		patch \| blob \| history
modules/ocl/src/stereobp.cpp		patch \| blob \| history