integrate NMS (Dollar's criteria)
authormarina.kolpakova <marina.kolpakova@itseez.com>
Mon, 26 Nov 2012 13:22:47 +0000 (17:22 +0400)
committermarina.kolpakova <marina.kolpakova@itseez.com>
Mon, 26 Nov 2012 13:22:47 +0000 (17:22 +0400)
modules/gpu/src/cuda/icf-sc.cu
modules/gpu/src/softcascade.cpp

index 5334441..12b511f 100644 (file)
@@ -88,19 +88,23 @@ namespace icf {
         return (w < 0 || h < 0)? 0.f : (float)(w * h);
     }
 
-    __global__ void overlap(const uint* n, const Detection* detections, uchar* overlaps)
+    texture<uint4,  cudaTextureType2D, cudaReadModeElementType> tdetections;
+
+    __global__ void overlap(const uint* n, uchar* overlaps)
     {
         const int idx = threadIdx.x;
         const int total = *n;
 
-        for (int i = idx; i < total; i += 192)
+        for (int i = idx + 1; i < total; i += 192)
         {
-            const Detection& a = detections[i];
+            const uint4 _a = tex2D(tdetections, i, 0);
+            const Detection& a = *((Detection*)(&_a));
             bool excluded = false;
 
             for (int j = i + 1; j < total; ++j)
             {
-                const Detection& b = detections[j];
+                const uint4 _b = tex2D(tdetections, j, 0);
+                const Detection& b = *((Detection*)(&_b));
                 float ovl = overlapArea(a, b) / ::min(a.w * a.h, b.w * b.h);
 
                 if (ovl > 0.65f)
@@ -115,7 +119,7 @@ namespace icf {
         }
     }
 
-    __global__ void collect(const uint* n, const Detection* detections, uchar* overlaps)
+    __global__ void collect(const uint* n, uchar* overlaps, uint* ctr, uint4* suppressed)
     {
         const int idx = threadIdx.x;
         const int total = *n;
@@ -124,19 +128,24 @@ namespace icf {
         {
             if (!overlaps[i])
             {
-                const Detection& det = detections[i];
-                // printf("%d: %d %d %d %d %f\n", i, det.x, det.y, det.w, det.h, det.confidence );
+                int oidx = atomicInc(ctr, 50);
+                suppressed[oidx] = tex2D(tdetections, i + 1, 0);
             }
         }
     }
 
-    void suppress(const PtrStepSzb& objects, PtrStepSzb overlaps, PtrStepSzi ndetections)
+    void suppress(const PtrStepSzb& objects, PtrStepSzb overlaps, PtrStepSzi ndetections, PtrStepSzb suppressed)
     {
         int block = 192;
         int grid = 1;
 
-        overlap<<<grid, block>>>((uint*)ndetections.ptr(0), (Detection*)objects.ptr(0), (uchar*)overlaps.ptr(0));
-        collect<<<grid, block>>>((uint*)ndetections.ptr(0), (Detection*)objects.ptr(0), (uchar*)overlaps.ptr(0));
+        cudaChannelFormatDesc desc = cudaCreateChannelDesc<uint4>();
+        size_t offset;
+        cudaSafeCall( cudaBindTexture2D(&offset, tdetections, objects.data, desc, objects.cols / sizeof(uint4), objects.rows, objects.step));
+
+        overlap<<<grid, block>>>((uint*)ndetections.ptr(0), (uchar*)overlaps.ptr(0));
+        collect<<<grid, block>>>((uint*)ndetections.ptr(0), (uchar*)overlaps.ptr(0), (uint*)suppressed.ptr(0), ((uint4*)suppressed.ptr(0)) + 1);
+
         // if (!stream)
         {
             cudaSafeCall( cudaGetLastError());
index 35bd72e..5324e2e 100644 (file)
@@ -86,7 +86,7 @@ namespace icf {
     void fillBins(cv::gpu::PtrStepSzb hogluv, const cv::gpu::PtrStepSzf& nangle,
         const int fw, const int fh, const int bins, cudaStream_t stream);
 
-    void suppress(const PtrStepSzb& objects, PtrStepSzb overlaps, PtrStepSzi ndetections);
+    void suppress(const PtrStepSzb& objects, PtrStepSzb overlaps, PtrStepSzi ndetections, PtrStepSzb suppressed);
 }
 
 namespace imgproc {
@@ -312,6 +312,7 @@ struct cv::gpu::SCascade::Fields
         hogluv.setTo(cv::Scalar::all(0));
 
         overlaps.create(1, 5000, CV_8UC1);
+        suppressed.create(1, sizeof(Detection) * 51, CV_8UC1);
 
         return true;
     }
@@ -447,7 +448,9 @@ public:
     {
         ensureSizeIsEnough(objects.rows, objects.cols, CV_8UC1, overlaps);
         overlaps.setTo(0);
-        device::icf::suppress(objects, overlaps, ndetections);
+        suppressed.setTo(0);
+
+        device::icf::suppress(objects, overlaps, ndetections, suppressed);
         // std::cout << cv::Mat(overlaps) << std::endl;
     }
 
@@ -484,6 +487,9 @@ public:
     // used for area overlap computing during
     GpuMat overlaps;
 
+    // used for suppression
+    GpuMat suppressed;
+
     // Cascade from xml
     GpuMat octaves;
     GpuMat stages;
@@ -525,7 +531,6 @@ bool cv::gpu::SCascade::load(const FileNode& fn)
 void cv::gpu::SCascade::detect(InputArray image, InputArray _rois, OutputArray _objects, Stream& s) const
 {
     CV_Assert(fields);
-
     const GpuMat colored = image.getGpuMat();
 
     // only color images are supperted
@@ -545,6 +550,7 @@ void cv::gpu::SCascade::detect(InputArray image, InputArray _rois, OutputArray _
         colored.copyTo(flds.hogluv);
     }
 
+    GpuMat spr(objects, cv::Rect(0, 0, flds.suppressed.cols, flds.suppressed.rows));
 
     GpuMat tmp = GpuMat(objects, cv::Rect(0, 0, sizeof(Detection), 1));
     objects = GpuMat(objects, cv::Rect( sizeof(Detection), 0, objects.cols - sizeof(Detection), 1));
@@ -552,8 +558,11 @@ void cv::gpu::SCascade::detect(InputArray image, InputArray _rois, OutputArray _
 
     flds.detect(rois, tmp, objects, stream);
 
-    // if (rejCriteria != NO_REJECT)
-    flds.suppress(tmp, objects);
+    if (rejCriteria != NO_REJECT)
+    {
+        flds.suppress(tmp, objects);
+        flds.suppressed.copyTo(spr);
+    }
 }
 
 void cv::gpu::SCascade::genRoi(InputArray _roi, OutputArray _mask, Stream& stream) const