Set output layers names and types for models in DLDT's intermediate representation

author Dmitry Kurtaev <dmitry.kurtaev+github@gmail.com>

Thu, 28 Jun 2018 06:09:11 +0000 (09:09 +0300)

committer Dmitry Kurtaev <dmitry.kurtaev+github@gmail.com>

Thu, 28 Jun 2018 07:21:45 +0000 (10:21 +0300)
author Dmitry Kurtaev <dmitry.kurtaev+github@gmail.com>
Thu, 28 Jun 2018 06:09:11 +0000 (09:09 +0300)
committer Dmitry Kurtaev <dmitry.kurtaev+github@gmail.com>
Thu, 28 Jun 2018 07:21:45 +0000 (10:21 +0300)
diff --git a/modules/dnn/src/dnn.cpp b/modules/dnn/src/dnn.cpp

index 6a7c9d5..438cde2 100644 (file)
--- a/modules/dnn/src/dnn.cpp
+++ b/modules/dnn/src/dnn.cpp
@@ -1993,11 +1993,17 @@ Net Net::readFromModelOptimizer(const String& xml, const String& bin)
      backendNode->net = Ptr<InfEngineBackendNet>(new InfEngineBackendNet(ieNet));
      for (auto& it : ieNet.getOutputsInfo())
      {
+        Ptr<Layer> cvLayer(new InfEngineBackendLayer(it.second));
+        InferenceEngine::CNNLayerPtr ieLayer = ieNet.getLayerByName(it.first.c_str());
+        CV_Assert(ieLayer);
+
          LayerParams lp;
          int lid = cvNet.addLayer(it.first, "", lp);
  
          LayerData& ld = cvNet.impl->layers[lid];
-        ld.layerInstance = Ptr<Layer>(new InfEngineBackendLayer(it.second));
+        cvLayer->name = it.first;
+        cvLayer->type = ieLayer->type;
+        ld.layerInstance = cvLayer;
          ld.backendNodes[DNN_BACKEND_INFERENCE_ENGINE] = backendNode;
  
          for (int i = 0; i < inputsNames.size(); ++i)
diff --git a/modules/dnn/test/test_layers.cpp b/modules/dnn/test/test_layers.cpp

index b773c25..720447a 100644 (file)
--- a/modules/dnn/test/test_layers.cpp
+++ b/modules/dnn/test/test_layers.cpp
@@ -925,6 +925,10 @@ TEST(Layer_Test_Convolution_DLDT, Accuracy)
      Mat out = net.forward();
  
      normAssert(outDefault, out);
+
+    std::vector<int> outLayers = net.getUnconnectedOutLayers();
+    ASSERT_EQ(net.getLayer(outLayers[0])->name, "output_merge");
+    ASSERT_EQ(net.getLayer(outLayers[0])->type, "Concat");
  }
  
  // 1. Create a .prototxt file with the following network:
diff --git a/samples/dnn/object_detection.cpp b/samples/dnn/object_detection.cpp

index 084d41b..922bdcc 100644 (file)
--- a/samples/dnn/object_detection.cpp
+++ b/samples/dnn/object_detection.cpp
@@ -22,6 +22,7 @@ const char* keys =
      "{ height      | -1 | Preprocess input image by resizing to a specific height. }"
      "{ rgb         |    | Indicate that model works with RGB input images instead BGR ones. }"
      "{ thr         | .5 | Confidence threshold. }"
+    "{ thr         | .4 | Non-maximum suppression threshold. }"
      "{ backend     |  0 | Choose one of computation backends: "
                           "0: automatically (by default), "
                           "1: Halide language (http://halide-lang.org/), "
@@ -37,7 +38,7 @@ const char* keys =
  using namespace cv;
  using namespace dnn;
  
-float confThreshold;
+float confThreshold, nmsThreshold;
  std::vector<std::string> classes;
  
  void postprocess(Mat& frame, const std::vector<Mat>& out, Net& net);
@@ -59,6 +60,7 @@ int main(int argc, char** argv)
      }
  
      confThreshold = parser.get<float>("thr");
+    nmsThreshold = parser.get<float>("nms");
      float scale = parser.get<float>("scale");
      Scalar mean = parser.get<Scalar>("mean");
      bool swapRB = parser.get<bool>("rgb");
@@ -144,6 +146,9 @@ void postprocess(Mat& frame, const std::vector<Mat>& outs, Net& net)
      static std::vector<int> outLayers = net.getUnconnectedOutLayers();
      static std::string outLayerType = net.getLayer(outLayers[0])->type;
  
+    std::vector<int> classIds;
+    std::vector<float> confidences;
+    std::vector<Rect> boxes;
      if (net.getLayer(0)->outputNameToIndex("im_info") != -1)  // Faster-RCNN or R-FCN
      {
          // Network produces output blob with a shape 1x1xNx7 where N is a number of
@@ -160,8 +165,11 @@ void postprocess(Mat& frame, const std::vector<Mat>& outs, Net& net)
                  int top = (int)data[i + 4];
                  int right = (int)data[i + 5];
                  int bottom = (int)data[i + 6];
-                int classId = (int)(data[i + 1]) - 1;  // Skip 0th background class id.
-                drawPred(classId, confidence, left, top, right, bottom, frame);
+                int width = right - left + 1;
+                int height = bottom - top + 1;
+                classIds.push_back((int)(data[i + 1]) - 1);  // Skip 0th background class id.
+                boxes.push_back(Rect(left, top, width, height));
+                confidences.push_back(confidence);
              }
          }
      }
@@ -181,16 +189,16 @@ void postprocess(Mat& frame, const std::vector<Mat>& outs, Net& net)
                  int top = (int)(data[i + 4] * frame.rows);
                  int right = (int)(data[i + 5] * frame.cols);
                  int bottom = (int)(data[i + 6] * frame.rows);
-                int classId = (int)(data[i + 1]) - 1;  // Skip 0th background class id.
-                drawPred(classId, confidence, left, top, right, bottom, frame);
+                int width = right - left + 1;
+                int height = bottom - top + 1;
+                classIds.push_back((int)(data[i + 1]) - 1);  // Skip 0th background class id.
+                boxes.push_back(Rect(left, top, width, height));
+                confidences.push_back(confidence);
              }
          }
      }
      else if (outLayerType == "Region")
      {
-        std::vector<int> classIds;
-        std::vector<float> confidences;
-        std::vector<Rect> boxes;
          for (size_t i = 0; i < outs.size(); ++i)
          {
              // Network produces output blob with a shape NxC where N is a number of
@@ -218,18 +226,19 @@ void postprocess(Mat& frame, const std::vector<Mat>& outs, Net& net)
                  }
              }
          }
-        std::vector<int> indices;
-        NMSBoxes(boxes, confidences, confThreshold, 0.4f, indices);
-        for (size_t i = 0; i < indices.size(); ++i)
-        {
-            int idx = indices[i];
-            Rect box = boxes[idx];
-            drawPred(classIds[idx], confidences[idx], box.x, box.y,
-                     box.x + box.width, box.y + box.height, frame);
-        }
      }
      else
          CV_Error(Error::StsNotImplemented, "Unknown output layer type: " + outLayerType);
+
+    std::vector<int> indices;
+    NMSBoxes(boxes, confidences, confThreshold, nmsThreshold, indices);
+    for (size_t i = 0; i < indices.size(); ++i)
+    {
+        int idx = indices[i];
+        Rect box = boxes[idx];
+        drawPred(classIds[idx], confidences[idx], box.x, box.y,
+                 box.x + box.width, box.y + box.height, frame);
+    }
  }
  
  void drawPred(int classId, float conf, int left, int top, int right, int bottom, Mat& frame)
diff --git a/samples/dnn/object_detection.py b/samples/dnn/object_detection.py

index b191cd4..386e028 100644 (file)
--- a/samples/dnn/object_detection.py
+++ b/samples/dnn/object_detection.py
@@ -31,6 +31,7 @@ parser.add_argument('--height', type=int,
  parser.add_argument('--rgb', action='store_true',
                      help='Indicate that model works with RGB input images instead BGR ones.')
  parser.add_argument('--thr', type=float, default=0.5, help='Confidence threshold')
+parser.add_argument('--nms', type=float, default=0.4, help='Non-maximum suppression threshold')
  parser.add_argument('--backend', choices=backends, default=cv.dnn.DNN_BACKEND_DEFAULT, type=int,
                      help="Choose one of computation backends: "
                           "%d: automatically (by default), "
@@ -57,6 +58,7 @@ net.setPreferableBackend(args.backend)
  net.setPreferableTarget(args.target)
  
  confThreshold = args.thr
+nmsThreshold = args.nms
  
  def getOutputsNames(net):
      layersNames = net.getLayerNames()
@@ -86,36 +88,43 @@ def postprocess(frame, outs):
      lastLayerId = net.getLayerId(layerNames[-1])
      lastLayer = net.getLayer(lastLayerId)
  
+    classIds = []
+    confidences = []
+    boxes = []
      if net.getLayer(0).outputNameToIndex('im_info') != -1:  # Faster-RCNN or R-FCN
          # Network produces output blob with a shape 1x1xNx7 where N is a number of
          # detections and an every detection is a vector of values
          # [batchId, classId, confidence, left, top, right, bottom]
-        assert(len(outs) == 1)
-        out = outs[0]
-        for detection in out[0, 0]:
-            confidence = detection[2]
-            if confidence > confThreshold:
-                left = int(detection[3])
-                top = int(detection[4])
-                right = int(detection[5])
-                bottom = int(detection[6])
-                classId = int(detection[1]) - 1  # Skip background label
-                drawPred(classId, confidence, left, top, right, bottom)
+        for out in outs:
+            for detection in out[0, 0]:
+                confidence = detection[2]
+                if confidence > confThreshold:
+                    left = int(detection[3])
+                    top = int(detection[4])
+                    right = int(detection[5])
+                    bottom = int(detection[6])
+                    width = right - left + 1
+                    height = bottom - top + 1
+                    classIds.append(int(detection[1]) - 1)  # Skip background label
+                    confidences.append(float(confidence))
+                    boxes.append([left, top, width, height])
      elif lastLayer.type == 'DetectionOutput':
          # Network produces output blob with a shape 1x1xNx7 where N is a number of
          # detections and an every detection is a vector of values
          # [batchId, classId, confidence, left, top, right, bottom]
-        assert(len(outs) == 1)
-        out = outs[0]
-        for detection in out[0, 0]:
-            confidence = detection[2]
-            if confidence > confThreshold:
-                left = int(detection[3] * frameWidth)
-                top = int(detection[4] * frameHeight)
-                right = int(detection[5] * frameWidth)
-                bottom = int(detection[6] * frameHeight)
-                classId = int(detection[1]) - 1  # Skip background label
-                drawPred(classId, confidence, left, top, right, bottom)
+        for out in outs:
+            for detection in out[0, 0]:
+                confidence = detection[2]
+                if confidence > confThreshold:
+                    left = int(detection[3] * frameWidth)
+                    top = int(detection[4] * frameHeight)
+                    right = int(detection[5] * frameWidth)
+                    bottom = int(detection[6] * frameHeight)
+                    width = right - left + 1
+                    height = bottom - top + 1
+                    classIds.append(int(detection[1]) - 1)  # Skip background label
+                    confidences.append(float(confidence))
+                    boxes.append([left, top, width, height])
      elif lastLayer.type == 'Region':
          # Network produces output blob with a shape NxC where N is a number of
          # detected objects and C is a number of classes + 4 where the first 4
@@ -138,15 +147,19 @@ def postprocess(frame, outs):
                      classIds.append(classId)
                      confidences.append(float(confidence))
                      boxes.append([left, top, width, height])
-        indices = cv.dnn.NMSBoxes(boxes, confidences, confThreshold, 0.4)
-        for i in indices:
-            i = i[0]
-            box = boxes[i]
-            left = box[0]
-            top = box[1]
-            width = box[2]
-            height = box[3]
-            drawPred(classIds[i], confidences[i], left, top, left + width, top + height)
+    else:
+        print('Unknown output layer type: ' + lastLayer.type)
+        exit()
+
+    indices = cv.dnn.NMSBoxes(boxes, confidences, confThreshold, nmsThreshold)
+    for i in indices:
+        i = i[0]
+        box = boxes[i]
+        left = box[0]
+        top = box[1]
+        width = box[2]
+        height = box[3]
+        drawPred(classIds[i], confidences[i], left, top, left + width, top + height)
  
  # Process inputs
  winName = 'Deep learning object detection in OpenCV'
author	Dmitry Kurtaev <dmitry.kurtaev+github@gmail.com>
	Thu, 28 Jun 2018 06:09:11 +0000 (09:09 +0300)
committer	Dmitry Kurtaev <dmitry.kurtaev+github@gmail.com>
	Thu, 28 Jun 2018 07:21:45 +0000 (10:21 +0300)
modules/dnn/src/dnn.cpp		patch \| blob \| history
modules/dnn/test/test_layers.cpp		patch \| blob \| history
samples/dnn/object_detection.cpp		patch \| blob \| history
samples/dnn/object_detection.py		patch \| blob \| history