Merge pull request #18891 from CowKeyMan:NMS_boxes_with_different_labels
authorDaniel Cauchi <33454325+CowKeyMan@users.noreply.github.com>
Tue, 1 Dec 2020 13:50:24 +0000 (14:50 +0100)
committerGitHub <noreply@github.com>
Tue, 1 Dec 2020 13:50:24 +0000 (13:50 +0000)
Add option for NMS for boxes with different labels

* DetectionModel impl

* Add option for NMS for boxes with different labels

In the detect function in modules/dnn/include/opencv2/dnn/dnn.hpp, whose implementation can be found at modules/dnn/src/model.cpp, the Non Max Suppression (NMS) is applied only for objects of the same label. Thus, a flag
was added with the purpose to allow developers to choose if they want to keep the default implementation or wether they would like NMS to be applied to all the boxes, regardless of label.

The flag is called nmsDifferentLabels, and is given a default value of false, which applies the current default implementation, thus allowing existing projects to update opencv without disruption

Solves issue opencv#18832

* Change return type of set & Add default constr

* Add assertions due to default constructor

modules/dnn/include/opencv2/dnn/dnn.hpp
modules/dnn/src/model.cpp
modules/dnn/test/test_model.cpp

index 69b71f9..5467c98 100644 (file)
@@ -1296,6 +1296,23 @@ CV__DNN_INLINE_NS_BEGIN
           */
          CV_WRAP DetectionModel(const Net& network);
 
+         CV_DEPRECATED_EXTERNAL  // avoid using in C++ code (need to fix bindings first)
+         DetectionModel();
+
+         /**
+          * @brief nmsAcrossClasses defaults to false,
+          * such that when non max suppression is used during the detect() function, it will do so per-class.
+          * This function allows you to toggle this behaviour.
+          * @param[in] value The new value for nmsAcrossClasses
+          */
+         CV_WRAP DetectionModel& setNmsAcrossClasses(bool value);
+
+         /**
+          * @brief Getter for nmsAcrossClasses. This variable defaults to false,
+          * such that when non max suppression is used during the detect() function, it will do so only per-class
+          */
+         CV_WRAP bool getNmsAcrossClasses();
+
          /** @brief Given the @p input frame, create input blob, run net and return result detections.
           *  @param[in]  frame  The input image.
           *  @param[out] classIds Class indexes in result detection.
index aefeaa4..16f7d31 100644 (file)
@@ -320,34 +320,78 @@ void SegmentationModel::segment(InputArray frame, OutputArray mask)
     }
 }
 
-void disableRegionNMS(Net& net)
+class DetectionModel_Impl : public Model::Impl
 {
-    for (String& name : net.getUnconnectedOutLayersNames())
+public:
+    virtual ~DetectionModel_Impl() {}
+    DetectionModel_Impl() : Impl() {}
+    DetectionModel_Impl(const DetectionModel_Impl&) = delete;
+    DetectionModel_Impl(DetectionModel_Impl&&) = delete;
+
+    void disableRegionNMS(Net& net)
     {
-        int layerId = net.getLayerId(name);
-        Ptr<RegionLayer> layer = net.getLayer(layerId).dynamicCast<RegionLayer>();
-        if (!layer.empty())
+        for (String& name : net.getUnconnectedOutLayersNames())
         {
-            layer->nmsThreshold = 0;
+            int layerId = net.getLayerId(name);
+            Ptr<RegionLayer> layer = net.getLayer(layerId).dynamicCast<RegionLayer>();
+            if (!layer.empty())
+            {
+                layer->nmsThreshold = 0;
+            }
         }
     }
-}
+
+    void setNmsAcrossClasses(bool value) {
+        nmsAcrossClasses = value;
+    }
+
+    bool getNmsAcrossClasses() {
+        return nmsAcrossClasses;
+    }
+
+private:
+    bool nmsAcrossClasses = false;
+};
 
 DetectionModel::DetectionModel(const String& model, const String& config)
-    : Model(model, config)
+    : DetectionModel(readNet(model, config))
+{
+    // nothing
+}
+
+DetectionModel::DetectionModel(const Net& network) : Model()
 {
-    disableRegionNMS(getNetwork_());  // FIXIT Move to DetectionModel::Impl::initNet()
+    impl = makePtr<DetectionModel_Impl>();
+    impl->initNet(network);
+    impl.dynamicCast<DetectionModel_Impl>()->disableRegionNMS(getNetwork_());  // FIXIT Move to DetectionModel::Impl::initNet()
+}
+
+DetectionModel::DetectionModel() : Model()
+{
+    // nothing
+}
+
+DetectionModel& DetectionModel::setNmsAcrossClasses(bool value)
+{
+    CV_Assert(impl != nullptr && impl.dynamicCast<DetectionModel_Impl>() != nullptr); // remove once default constructor is removed
+
+    impl.dynamicCast<DetectionModel_Impl>()->setNmsAcrossClasses(value);
+    return *this;
 }
 
-DetectionModel::DetectionModel(const Net& network) : Model(network)
+bool DetectionModel::getNmsAcrossClasses()
 {
-    disableRegionNMS(getNetwork_());  // FIXIT Move to DetectionModel::Impl::initNet()
+    CV_Assert(impl != nullptr && impl.dynamicCast<DetectionModel_Impl>() != nullptr); // remove once default constructor is removed
+
+    return impl.dynamicCast<DetectionModel_Impl>()->getNmsAcrossClasses();
 }
 
 void DetectionModel::detect(InputArray frame, CV_OUT std::vector<int>& classIds,
                             CV_OUT std::vector<float>& confidences, CV_OUT std::vector<Rect>& boxes,
                             float confThreshold, float nmsThreshold)
 {
+    CV_Assert(impl != nullptr && impl.dynamicCast<DetectionModel_Impl>() != nullptr); // remove once default constructor is removed
+
     std::vector<Mat> detections;
     impl->processFrame(frame, detections);
 
@@ -413,7 +457,7 @@ void DetectionModel::detect(InputArray frame, CV_OUT std::vector<int>& classIds,
     {
         std::vector<int> predClassIds;
         std::vector<Rect> predBoxes;
-        std::vector<float> predConf;
+        std::vector<float> predConfidences;
         for (int i = 0; i < detections.size(); ++i)
         {
             // Network produces output blob with a shape NxC where N is a number of
@@ -442,37 +486,51 @@ void DetectionModel::detect(InputArray frame, CV_OUT std::vector<int>& classIds,
                 height   = std::max(1, std::min(height, frameHeight - top));
 
                 predClassIds.push_back(classIdPoint.x);
-                predConf.push_back(static_cast<float>(conf));
+                predConfidences.push_back(static_cast<float>(conf));
                 predBoxes.emplace_back(left, top, width, height);
             }
         }
 
         if (nmsThreshold)
         {
-            std::map<int, std::vector<size_t> > class2indices;
-            for (size_t i = 0; i < predClassIds.size(); i++)
+            if (getNmsAcrossClasses())
             {
-                if (predConf[i] >= confThreshold)
+                std::vector<int> indices;
+                NMSBoxes(predBoxes, predConfidences, confThreshold, nmsThreshold, indices);
+                for (int idx : indices)
                 {
-                    class2indices[predClassIds[i]].push_back(i);
+                    boxes.push_back(predBoxes[idx]);
+                    confidences.push_back(predConfidences[idx]);
+                    classIds.push_back(predClassIds[idx]);
                 }
             }
-            for (const auto& it : class2indices)
+            else
             {
-                std::vector<Rect> localBoxes;
-                std::vector<float> localConfidences;
-                for (size_t idx : it.second)
+                std::map<int, std::vector<size_t> > class2indices;
+                for (size_t i = 0; i < predClassIds.size(); i++)
                 {
-                    localBoxes.push_back(predBoxes[idx]);
-                    localConfidences.push_back(predConf[idx]);
+                    if (predConfidences[i] >= confThreshold)
+                    {
+                        class2indices[predClassIds[i]].push_back(i);
+                    }
                 }
-                std::vector<int> indices;
-                NMSBoxes(localBoxes, localConfidences, confThreshold, nmsThreshold, indices);
-                classIds.resize(classIds.size() + indices.size(), it.first);
-                for (int idx : indices)
+                for (const auto& it : class2indices)
                 {
-                    boxes.push_back(localBoxes[idx]);
-                    confidences.push_back(localConfidences[idx]);
+                    std::vector<Rect> localBoxes;
+                    std::vector<float> localConfidences;
+                    for (size_t idx : it.second)
+                    {
+                        localBoxes.push_back(predBoxes[idx]);
+                        localConfidences.push_back(predConfidences[idx]);
+                    }
+                    std::vector<int> indices;
+                    NMSBoxes(localBoxes, localConfidences, confThreshold, nmsThreshold, indices);
+                    classIds.resize(classIds.size() + indices.size(), it.first);
+                    for (int idx : indices)
+                    {
+                        boxes.push_back(localBoxes[idx]);
+                        confidences.push_back(localConfidences[idx]);
+                    }
                 }
             }
         }
@@ -480,7 +538,7 @@ void DetectionModel::detect(InputArray frame, CV_OUT std::vector<int>& classIds,
         {
             boxes       = std::move(predBoxes);
             classIds    = std::move(predClassIds);
-            confidences = std::move(predConf);
+            confidences = std::move(predConfidences);
         }
     }
     else
index 7d516de..58a8814 100644 (file)
@@ -25,7 +25,8 @@ public:
                          double scoreDiff, double iouDiff,
                          double confThreshold = 0.24, double nmsThreshold = 0.0,
                          const Size& size = {-1, -1}, Scalar mean = Scalar(),
-                         double scale = 1.0, bool swapRB = false, bool crop = false)
+                         double scale = 1.0, bool swapRB = false, bool crop = false,
+                         bool nmsAcrossClasses = false)
     {
         checkBackend();
 
@@ -38,6 +39,8 @@ public:
         model.setPreferableBackend(backend);
         model.setPreferableTarget(target);
 
+        model.setNmsAcrossClasses(nmsAcrossClasses);
+
         std::vector<int> classIds;
         std::vector<float> confidences;
         std::vector<Rect> boxes;
@@ -177,6 +180,58 @@ TEST_P(Test_Model, DetectRegion)
                     Scalar(), scale, swapRB);
 }
 
+TEST_P(Test_Model, DetectRegionWithNmsAcrossClasses)
+{
+    applyTestTag(CV_TEST_TAG_LONG, CV_TEST_TAG_MEMORY_1GB);
+
+#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2020040000)  // nGraph compilation failure
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL)
+        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16)
+        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
+#endif
+
+#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2019010000)
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_OPENCL_FP16)
+        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16);
+#endif
+
+#if defined(INF_ENGINE_RELEASE)
+    if (target == DNN_TARGET_MYRIAD
+        && getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X)
+        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X);
+#endif
+
+    std::vector<int> refClassIds = { 6, 11 };
+    std::vector<float> refConfidences = { 0.750469f, 0.901615f };
+    std::vector<Rect2d> refBoxes = { Rect2d(240, 53, 135, 72),
+                                    Rect2d(58, 141, 117, 249) };
+
+    std::string img_path = _tf("dog416.png");
+    std::string weights_file = _tf("yolo-voc.weights", false);
+    std::string config_file = _tf("yolo-voc.cfg");
+
+    double scale = 1.0 / 255.0;
+    Size size{ 416, 416 };
+    bool swapRB = true;
+    bool crop = false;
+    bool nmsAcrossClasses = true;
+
+    double confThreshold = 0.24;
+    double nmsThreshold = (target == DNN_TARGET_MYRIAD) ? 0.15: 0.15;
+    double scoreDiff = 8e-5, iouDiff = 1e-5;
+    if (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD || target == DNN_TARGET_CUDA_FP16)
+    {
+        scoreDiff = 1e-2;
+        iouDiff = 1.6e-2;
+    }
+
+    testDetectModel(weights_file, config_file, img_path, refClassIds, refConfidences,
+        refBoxes, scoreDiff, iouDiff, confThreshold, nmsThreshold, size,
+        Scalar(), scale, swapRB, crop,
+        nmsAcrossClasses);
+}
+
 TEST_P(Test_Model, DetectionOutput)
 {
 #if defined(INF_ENGINE_RELEASE)