result storing: atomic based

author Marina Kolpakova <no@email>

Mon, 2 Jul 2012 08:08:17 +0000 (08:08 +0000)

committer Marina Kolpakova <no@email>

Mon, 2 Jul 2012 08:08:17 +0000 (08:08 +0000)
author Marina Kolpakova <no@email>
Mon, 2 Jul 2012 08:08:17 +0000 (08:08 +0000)
committer Marina Kolpakova <no@email>
Mon, 2 Jul 2012 08:08:17 +0000 (08:08 +0000)
diff --git a/modules/gpu/src/cascadeclassifier.cpp b/modules/gpu/src/cascadeclassifier.cpp

index e1a4554..556d92d 100644 (file)
--- a/modules/gpu/src/cascadeclassifier.cpp
+++ b/modules/gpu/src/cascadeclassifier.cpp
@@ -272,7 +272,7 @@ namespace cv { namespace gpu { namespace device
  {\r
      namespace lbp\r
      {\r
-        void classifyStump(const DevMem2Db mstages,\r
+        int classifyStump(const DevMem2Db mstages,\r
                             const int nstages,\r
                             const DevMem2Di mnodes,\r
                             const DevMem2Df mleaves,\r
@@ -298,13 +298,10 @@ int cv::gpu::CascadeClassifier_GPU_LBP::detectMultiScale(const GpuMat& image, Gp
  \r
      const int defaultObjSearchNum = 100;\r
  \r
-    // if( !objects.empty() && objects.depth() == CV_32S)\r
-    //     objects.reshape(4, 1);\r
-    // else\r
-    //     objects.create(1 , defaultObjSearchNum, CV_32SC4);\r
-\r
-    // temp solution\r
-    objects.create(image.rows, image.cols, CV_32SC4);\r
+    if( !objects.empty() && objects.depth() == CV_32S)\r
+        objects.reshape(4, 1);\r
+    else\r
+        objects.create(1 , defaultObjSearchNum, CV_32SC4);\r
  \r
      if (maxObjectSize == cv::Size())\r
          maxObjectSize = image.size();\r
@@ -333,8 +330,9 @@ int cv::gpu::CascadeClassifier_GPU_LBP::detectMultiScale(const GpuMat& image, Gp
  \r
          int step = (factor <= 2.) + 1;\r
  \r
-        cv::gpu::device::lbp::classifyStump(stage_mat, stage_mat.cols / sizeof(Stage), nodes_mat, leaves_mat, subsets_mat, features_mat,\r
+        int res = cv::gpu::device::lbp::classifyStump(stage_mat, stage_mat.cols / sizeof(Stage), nodes_mat, leaves_mat, subsets_mat, features_mat,\r
          integral, processingRectSize.width, processingRectSize.height, windowSize.width, windowSize.height, scaleFactor, step, subsetSize, objects);\r
+        std::cout  << res << "Results:    " << cv::Mat(objects).row(0).colRange(0, res) << std::endl;\r
      }\r
      // TODO: reject levels\r
  \r
diff --git a/modules/gpu/src/cuda/lbp.cu b/modules/gpu/src/cuda/lbp.cu

index 7133920..b997947 100644 (file)
--- a/modules/gpu/src/cuda/lbp.cu
+++ b/modules/gpu/src/cuda/lbp.cu
@@ -47,11 +47,11 @@ namespace cv { namespace gpu { namespace device
      namespace lbp
      {
          __global__ void lbp_classify_stump(Stage* stages, int nstages, ClNode* nodes, const float* leaves, const int* subsets, const uchar4* features,
-            const DevMem2Di integral, int workWidth, int workHeight, int clWidth, int clHeight, float scale, int step, int subsetSize, DevMem2D_<int4> objects)
+            const DevMem2Di integral, int workWidth, int workHeight, int clWidth, int clHeight, float scale, int step, int subsetSize, DevMem2D_<int4> objects, unsigned int* n)
          {
              int y = threadIdx.x * scale;
              int x = blockIdx.x * scale;
-
+            *n = 0;
              int i = 0;
  
              int current_node = 0;
@@ -88,12 +88,11 @@ namespace cv { namespace gpu { namespace device
              rect.z = roundf(clWidth);
              rect.w = roundf(clHeight);
  
-            if(i >= 19)
-                printf( "GPU detected [%d, %d] - [%d, %d]\n", rect.x, rect.y, rect.z, rect.w);
-
+            int res = atomicInc(n, 1000);
+            objects(0, res) = rect;
          }
  
-        void classifyStump(const DevMem2Db mstages, const int nstages, const DevMem2Di mnodes, const DevMem2Df mleaves, const DevMem2Di msubsets, const DevMem2Db mfeatures,
+        int classifyStump(const DevMem2Db mstages, const int nstages, const DevMem2Di mnodes, const DevMem2Df mleaves, const DevMem2Di msubsets, const DevMem2Db mfeatures,
                             const DevMem2Di integral, const int workWidth, const int workHeight, const int clWidth, const int clHeight, float scale, int step, int subsetSize,
                             DevMem2D_<int4> objects)
          {
@@ -106,9 +105,12 @@ namespace cv { namespace gpu { namespace device
              const float* leaves = mleaves.ptr();
              const int* subsets = msubsets.ptr();
              const uchar4* features = (uchar4*)(mfeatures.ptr());
-
+            unsigned int * n, *h_n = new unsigned int[1];
+            cudaMalloc(&n, sizeof(int));
              lbp_classify_stump<<<blocks, threads>>>(stages, nstages, nodes, leaves, subsets, features, integral,
-                workWidth, workHeight, clWidth, clHeight, scale, step, subsetSize, objects);
+                workWidth, workHeight, clWidth, clHeight, scale, step, subsetSize, objects, n);
+            cudaMemcpy(h_n, n, sizeof(int), cudaMemcpyDeviceToHost);
+            return *h_n;
          }
      }
  }}}
 \ No newline at end of file
author	Marina Kolpakova <no@email>
	Mon, 2 Jul 2012 08:08:17 +0000 (08:08 +0000)
committer	Marina Kolpakova <no@email>
	Mon, 2 Jul 2012 08:08:17 +0000 (08:08 +0000)
modules/gpu/src/cascadeclassifier.cpp		patch \| blob \| history
modules/gpu/src/cuda/lbp.cu		patch \| blob \| history