integrate NMS (Dollar's criteria)

author marina.kolpakova <marina.kolpakova@itseez.com>

Mon, 26 Nov 2012 13:22:47 +0000 (17:22 +0400)

committer marina.kolpakova <marina.kolpakova@itseez.com>

Mon, 26 Nov 2012 13:22:47 +0000 (17:22 +0400)
author marina.kolpakova <marina.kolpakova@itseez.com>
Mon, 26 Nov 2012 13:22:47 +0000 (17:22 +0400)
committer marina.kolpakova <marina.kolpakova@itseez.com>
Mon, 26 Nov 2012 13:22:47 +0000 (17:22 +0400)
diff --git a/modules/gpu/src/cuda/icf-sc.cu b/modules/gpu/src/cuda/icf-sc.cu

index 5334441..12b511f 100644 (file)
--- a/modules/gpu/src/cuda/icf-sc.cu
+++ b/modules/gpu/src/cuda/icf-sc.cu
@@ -88,19 +88,23 @@ namespace icf {
          return (w < 0 || h < 0)? 0.f : (float)(w * h);
      }
  
-    __global__ void overlap(const uint* n, const Detection* detections, uchar* overlaps)
+    texture<uint4,  cudaTextureType2D, cudaReadModeElementType> tdetections;
+
+    __global__ void overlap(const uint* n, uchar* overlaps)
      {
          const int idx = threadIdx.x;
          const int total = *n;
  
-        for (int i = idx; i < total; i += 192)
+        for (int i = idx + 1; i < total; i += 192)
          {
-            const Detection& a = detections[i];
+            const uint4 _a = tex2D(tdetections, i, 0);
+            const Detection& a = *((Detection*)(&_a));
              bool excluded = false;
  
              for (int j = i + 1; j < total; ++j)
              {
-                const Detection& b = detections[j];
+                const uint4 _b = tex2D(tdetections, j, 0);
+                const Detection& b = *((Detection*)(&_b));
                  float ovl = overlapArea(a, b) / ::min(a.w * a.h, b.w * b.h);
  
                  if (ovl > 0.65f)
@@ -115,7 +119,7 @@ namespace icf {
          }
      }
  
-    __global__ void collect(const uint* n, const Detection* detections, uchar* overlaps)
+    __global__ void collect(const uint* n, uchar* overlaps, uint* ctr, uint4* suppressed)
      {
          const int idx = threadIdx.x;
          const int total = *n;
@@ -124,19 +128,24 @@ namespace icf {
          {
              if (!overlaps[i])
              {
-                const Detection& det = detections[i];
-                // printf("%d: %d %d %d %d %f\n", i, det.x, det.y, det.w, det.h, det.confidence );
+                int oidx = atomicInc(ctr, 50);
+                suppressed[oidx] = tex2D(tdetections, i + 1, 0);
              }
          }
      }
  
-    void suppress(const PtrStepSzb& objects, PtrStepSzb overlaps, PtrStepSzi ndetections)
+    void suppress(const PtrStepSzb& objects, PtrStepSzb overlaps, PtrStepSzi ndetections, PtrStepSzb suppressed)
      {
          int block = 192;
          int grid = 1;
  
-        overlap<<<grid, block>>>((uint*)ndetections.ptr(0), (Detection*)objects.ptr(0), (uchar*)overlaps.ptr(0));
-        collect<<<grid, block>>>((uint*)ndetections.ptr(0), (Detection*)objects.ptr(0), (uchar*)overlaps.ptr(0));
+        cudaChannelFormatDesc desc = cudaCreateChannelDesc<uint4>();
+        size_t offset;
+        cudaSafeCall( cudaBindTexture2D(&offset, tdetections, objects.data, desc, objects.cols / sizeof(uint4), objects.rows, objects.step));
+
+        overlap<<<grid, block>>>((uint*)ndetections.ptr(0), (uchar*)overlaps.ptr(0));
+        collect<<<grid, block>>>((uint*)ndetections.ptr(0), (uchar*)overlaps.ptr(0), (uint*)suppressed.ptr(0), ((uint4*)suppressed.ptr(0)) + 1);
+
          // if (!stream)
          {
              cudaSafeCall( cudaGetLastError());
diff --git a/modules/gpu/src/softcascade.cpp b/modules/gpu/src/softcascade.cpp

index 35bd72e..5324e2e 100644 (file)
--- a/modules/gpu/src/softcascade.cpp
+++ b/modules/gpu/src/softcascade.cpp
@@ -86,7 +86,7 @@ namespace icf {
      void fillBins(cv::gpu::PtrStepSzb hogluv, const cv::gpu::PtrStepSzf& nangle,
          const int fw, const int fh, const int bins, cudaStream_t stream);
  
-    void suppress(const PtrStepSzb& objects, PtrStepSzb overlaps, PtrStepSzi ndetections);
+    void suppress(const PtrStepSzb& objects, PtrStepSzb overlaps, PtrStepSzi ndetections, PtrStepSzb suppressed);
  }
  
  namespace imgproc {
@@ -312,6 +312,7 @@ struct cv::gpu::SCascade::Fields
          hogluv.setTo(cv::Scalar::all(0));
  
          overlaps.create(1, 5000, CV_8UC1);
+        suppressed.create(1, sizeof(Detection) * 51, CV_8UC1);
  
          return true;
      }
@@ -447,7 +448,9 @@ public:
      {
          ensureSizeIsEnough(objects.rows, objects.cols, CV_8UC1, overlaps);
          overlaps.setTo(0);
-        device::icf::suppress(objects, overlaps, ndetections);
+        suppressed.setTo(0);
+
+        device::icf::suppress(objects, overlaps, ndetections, suppressed);
          // std::cout << cv::Mat(overlaps) << std::endl;
      }
  
@@ -484,6 +487,9 @@ public:
      // used for area overlap computing during
      GpuMat overlaps;
  
+    // used for suppression
+    GpuMat suppressed;
+
      // Cascade from xml
      GpuMat octaves;
      GpuMat stages;
@@ -525,7 +531,6 @@ bool cv::gpu::SCascade::load(const FileNode& fn)
  void cv::gpu::SCascade::detect(InputArray image, InputArray _rois, OutputArray _objects, Stream& s) const
  {
      CV_Assert(fields);
-
      const GpuMat colored = image.getGpuMat();
  
      // only color images are supperted
@@ -545,6 +550,7 @@ void cv::gpu::SCascade::detect(InputArray image, InputArray _rois, OutputArray _
          colored.copyTo(flds.hogluv);
      }
  
+    GpuMat spr(objects, cv::Rect(0, 0, flds.suppressed.cols, flds.suppressed.rows));
  
      GpuMat tmp = GpuMat(objects, cv::Rect(0, 0, sizeof(Detection), 1));
      objects = GpuMat(objects, cv::Rect( sizeof(Detection), 0, objects.cols - sizeof(Detection), 1));
@@ -552,8 +558,11 @@ void cv::gpu::SCascade::detect(InputArray image, InputArray _rois, OutputArray _
  
      flds.detect(rois, tmp, objects, stream);
  
-    // if (rejCriteria != NO_REJECT)
-    flds.suppress(tmp, objects);
+    if (rejCriteria != NO_REJECT)
+    {
+        flds.suppress(tmp, objects);
+        flds.suppressed.copyTo(spr);
+    }
  }
  
  void cv::gpu::SCascade::genRoi(InputArray _roi, OutputArray _mask, Stream& stream) const
author	marina.kolpakova <marina.kolpakova@itseez.com>
	Mon, 26 Nov 2012 13:22:47 +0000 (17:22 +0400)
committer	marina.kolpakova <marina.kolpakova@itseez.com>
	Mon, 26 Nov 2012 13:22:47 +0000 (17:22 +0400)
modules/gpu/src/cuda/icf-sc.cu		patch \| blob \| history
modules/gpu/src/softcascade.cpp		patch \| blob \| history