fixed some bugs in GPU matrix reductions, removed <functional> into precomp.hpp

author Alexey Spizhevoy <no@email>

Fri, 21 Jan 2011 07:43:11 +0000 (07:43 +0000)

committer Alexey Spizhevoy <no@email>

Fri, 21 Jan 2011 07:43:11 +0000 (07:43 +0000)
author Alexey Spizhevoy <no@email>
Fri, 21 Jan 2011 07:43:11 +0000 (07:43 +0000)
committer Alexey Spizhevoy <no@email>
Fri, 21 Jan 2011 07:43:11 +0000 (07:43 +0000)
diff --git a/modules/gpu/src/cuda/matrix_reductions.cu b/modules/gpu/src/cuda/matrix_reductions.cu

index 0e45fa4..9e0e7f8 100644 (file)
--- a/modules/gpu/src/cuda/matrix_reductions.cu
+++ b/modules/gpu/src/cuda/matrix_reductions.cu
@@ -328,13 +328,13 @@ namespace cv { namespace gpu { namespace mathfunc
          __shared__ best_type smaxval[nthreads];\r
          \r
          uint tid = threadIdx.y * blockDim.x + threadIdx.x;\r
-        uint idx = min(tid, gridDim.x * gridDim.y - 1);\r
+        uint idx = min(tid, size - 1);\r
  \r
          sminval[tid] = minval[idx];\r
          smaxval[tid] = maxval[idx];\r
          __syncthreads();\r
  \r
-               findMinMaxInSmem<nthreads, best_type>(sminval, smaxval, tid);\r
+        findMinMaxInSmem<nthreads, best_type>(sminval, smaxval, tid);\r
  \r
          if (tid == 0) \r
          {\r
@@ -428,7 +428,7 @@ namespace cv { namespace gpu { namespace mathfunc
  \r
      // Returns required buffer sizes\r
      void getBufSizeRequired(int cols, int rows, int elem_size, int& b1cols, \r
-                               int& b1rows, int& b2cols, int& b2rows)\r
+                            int& b1rows, int& b2cols, int& b2rows)\r
      {\r
          dim3 threads, grid;\r
          estimateThreadCfg(cols, rows, threads, grid);\r
@@ -623,7 +623,7 @@ namespace cv { namespace gpu { namespace mathfunc
  \r
      template <typename T>\r
      void minMaxLocCaller(const DevMem2D src, double* minval, double* maxval, \r
-                            int minloc[2], int maxloc[2], PtrStep valbuf, PtrStep locbuf)\r
+                         int minloc[2], int maxloc[2], PtrStep valbuf, PtrStep locbuf)\r
      {\r
          dim3 threads, grid;\r
          estimateThreadCfg(src.cols, src.rows, threads, grid);\r
@@ -671,7 +671,7 @@ namespace cv { namespace gpu { namespace mathfunc
          __shared__ uint smaxloc[nthreads];\r
  \r
          uint tid = threadIdx.y * blockDim.x + threadIdx.x;\r
-        uint idx = min(tid, gridDim.x * gridDim.y - 1);\r
+        uint idx = min(tid, size - 1);\r
  \r
          sminval[tid] = minval[idx];\r
          smaxval[tid] = maxval[idx];\r
@@ -679,7 +679,7 @@ namespace cv { namespace gpu { namespace mathfunc
          smaxloc[tid] = maxloc[idx];\r
          __syncthreads();\r
  \r
-               findMinMaxLocInSmem<nthreads, best_type>(sminval, smaxval, sminloc, smaxloc, tid);\r
+        findMinMaxLocInSmem<nthreads, best_type>(sminval, smaxval, sminloc, smaxloc, tid);\r
  \r
          if (tid == 0) \r
          {\r
@@ -1150,7 +1150,7 @@ namespace cv { namespace gpu { namespace mathfunc
  \r
          const int tid = threadIdx.y * blockDim.x + threadIdx.x;\r
  \r
-        DstType res = tid < gridDim.x * gridDim.y ? result[tid] : VecTraits<DstType>::all(0);\r
+        DstType res = tid < size ? result[tid] : VecTraits<DstType>::all(0);\r
          smem[tid] = res.x;\r
          smem[tid + nthreads] = res.y;\r
          __syncthreads();\r
@@ -1262,7 +1262,7 @@ namespace cv { namespace gpu { namespace mathfunc
  \r
          const int tid = threadIdx.y * blockDim.x + threadIdx.x;\r
  \r
-        DstType res = tid < gridDim.x * gridDim.y ? result[tid] : VecTraits<DstType>::all(0);\r
+        DstType res = tid < size ? result[tid] : VecTraits<DstType>::all(0);\r
          smem[tid] = res.x;\r
          smem[tid + nthreads] = res.y;\r
          smem[tid + 2 * nthreads] = res.z;\r
@@ -1384,7 +1384,7 @@ namespace cv { namespace gpu { namespace mathfunc
  \r
          const int tid = threadIdx.y * blockDim.x + threadIdx.x;\r
  \r
-        DstType res = tid < gridDim.x * gridDim.y ? result[tid] : VecTraits<DstType>::all(0);\r
+        DstType res = tid < size ? result[tid] : VecTraits<DstType>::all(0);\r
          smem[tid] = res.x;\r
          smem[tid + nthreads] = res.y;\r
          smem[tid + 2 * nthreads] = res.z;\r
diff --git a/modules/gpu/src/initialization.cpp b/modules/gpu/src/initialization.cpp

index bee31e5..0104bd0 100644 (file)
--- a/modules/gpu/src/initialization.cpp
+++ b/modules/gpu/src/initialization.cpp
@@ -41,7 +41,6 @@
  //M*/\r
  \r
  #include "precomp.hpp"\r
-#include <functional>\r
  \r
  using namespace cv;\r
  using namespace cv::gpu;\r
diff --git a/modules/gpu/src/matrix_reductions.cpp b/modules/gpu/src/matrix_reductions.cpp

index 732d36b..abfc894 100644 (file)
--- a/modules/gpu/src/matrix_reductions.cpp
+++ b/modules/gpu/src/matrix_reductions.cpp
@@ -276,11 +276,11 @@ void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal, const Gp
              minMaxMaskCaller<double> };\r
  \r
      CV_Assert(src.channels() == 1);\r
+\r
      CV_Assert(mask.empty() || (mask.type() == CV_8U && src.size() == mask.size()));\r
  \r
-    bool double_ok = hasGreaterOrEqualVersion(1, 3) && \r
-                     hasNativeDoubleSupport(getDevice());\r
-    CV_Assert(src.type() != CV_64F || double_ok);\r
+    CV_Assert(src.type() != CV_64F || (hasGreaterOrEqualVersion(1, 3) && \r
+                                       hasNativeDoubleSupport(getDevice())));\r
  \r
      double minVal_; if (!minVal) minVal = &minVal_;\r
      double maxVal_; if (!maxVal) maxVal = &maxVal_;\r
@@ -375,11 +375,11 @@ void cv::gpu::minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point
              minMaxLocMaskCaller<double> };\r
  \r
      CV_Assert(src.channels() == 1);\r
+\r
      CV_Assert(mask.empty() || (mask.type() == CV_8U && src.size() == mask.size()));\r
  \r
-    bool double_ok = hasGreaterOrEqualVersion(1, 3) && \r
-                     hasNativeDoubleSupport(getDevice());\r
-    CV_Assert(src.type() != CV_64F || double_ok);\r
+    CV_Assert(src.type() != CV_64F || (hasGreaterOrEqualVersion(1, 3) && \r
+                                       hasNativeDoubleSupport(getDevice())));\r
  \r
      double minVal_; if (!minVal) minVal = &minVal_;\r
      double maxVal_; if (!maxVal) maxVal = &maxVal_;\r
@@ -388,7 +388,7 @@ void cv::gpu::minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point
  \r
      Size valbuf_size, locbuf_size;\r
      getBufSizeRequired(src.cols, src.rows, src.elemSize(), valbuf_size.width, \r
-                          valbuf_size.height, locbuf_size.width, locbuf_size.height);\r
+                       valbuf_size.height, locbuf_size.width, locbuf_size.height);\r
      ensureSizeIsEnough(valbuf_size, CV_8U, valBuf);\r
      ensureSizeIsEnough(locbuf_size, CV_8U, locBuf);\r
  \r
@@ -459,9 +459,8 @@ int cv::gpu::countNonZero(const GpuMat& src, GpuMat& buf)
  \r
      CV_Assert(src.channels() == 1);\r
  \r
-    bool double_ok = hasGreaterOrEqualVersion(1, 3) && \r
-                     hasNativeDoubleSupport(getDevice());\r
-    CV_Assert(src.type() != CV_64F || double_ok);\r
+    CV_Assert(src.type() != CV_64F || (hasGreaterOrEqualVersion(1, 3) && \r
+                                       hasNativeDoubleSupport(getDevice())));\r
  \r
      Size buf_size;\r
      getBufSizeRequired(src.cols, src.rows, buf_size.width, buf_size.height);\r
diff --git a/modules/gpu/src/precomp.hpp b/modules/gpu/src/precomp.hpp

index 03acb4a..e0f2e00 100644 (file)
--- a/modules/gpu/src/precomp.hpp
+++ b/modules/gpu/src/precomp.hpp
@@ -57,6 +57,7 @@
  #include <sstream>\r
  #include <exception>\r
  #include <iterator>\r
+#include <functional>\r
  \r
  #include "opencv2/gpu/gpu.hpp"\r
  #include "opencv2/imgproc/imgproc.hpp"\r
diff --git a/tests/gpu/src/arithm.cpp b/tests/gpu/src/arithm.cpp

index 8fea540..3bc0e73 100644 (file)
--- a/tests/gpu/src/arithm.cpp
+++ b/tests/gpu/src/arithm.cpp
@@ -49,7 +49,7 @@ using namespace std;
  using namespace gpu;\r
  \r
  #define CHECK(pred, err) if (!(pred)) { \\r
-    ts->printf(CvTS::LOG, "Fail: \"%s\" at line: %d\n", #pred, __LINE__); \\r
+    ts->printf(CvTS::CONSOLE, "Fail: \"%s\" at line: %d\n", #pred, __LINE__); \\r
      ts->set_failed_test_info(err); \\r
      return; }\r
  \r
author	Alexey Spizhevoy <no@email>
	Fri, 21 Jan 2011 07:43:11 +0000 (07:43 +0000)
committer	Alexey Spizhevoy <no@email>
	Fri, 21 Jan 2011 07:43:11 +0000 (07:43 +0000)
modules/gpu/src/cuda/matrix_reductions.cu		patch \| blob \| history
modules/gpu/src/initialization.cpp		patch \| blob \| history
modules/gpu/src/matrix_reductions.cpp		patch \| blob \| history
modules/gpu/src/precomp.hpp		patch \| blob \| history
tests/gpu/src/arithm.cpp		patch \| blob \| history