Backport for dnn input shape estimation
authorDmitry Kurtaev <dmitry.kurtaev+github@gmail.com>
Mon, 2 Dec 2019 13:25:21 +0000 (16:25 +0300)
committerDmitry Kurtaev <dmitry.kurtaev+github@gmail.com>
Mon, 2 Dec 2019 13:28:59 +0000 (16:28 +0300)
modules/dnn/src/caffe/caffe_importer.cpp
modules/dnn/src/dnn.cpp
modules/dnn/test/test_caffe_importer.cpp
modules/dnn/test/test_ie_models.cpp

index 6c4d794..a911f95 100644 (file)
@@ -424,6 +424,36 @@ public:
         }
         dstNet.setInputsNames(netInputs);
 
+        std::vector<MatShape> inp_shapes;
+        if (net.input_shape_size() > 0 || (layersSize > 0 && net.layer(0).has_input_param() &&
+            net.layer(0).input_param().shape_size() > 0)) {
+
+            int size = (net.input_shape_size() > 0) ? net.input_shape_size() :
+                                                      net.layer(0).input_param().shape_size();
+            for (int inp_id = 0; inp_id < size; inp_id++)
+            {
+                const caffe::BlobShape &_input_shape = (net.input_shape_size() > 0) ?
+                                                        net.input_shape(inp_id) :
+                                                        net.layer(0).input_param().shape(inp_id);
+                MatShape shape;
+                for (int i = 0; i < _input_shape.dim_size(); i++) {
+                    shape.push_back((int)_input_shape.dim(i));
+                }
+                inp_shapes.push_back(shape);
+            }
+        }
+        else if (net.input_dim_size() > 0) {
+            MatShape shape;
+            for (int dim = 0; dim < net.input_dim_size(); dim++) {
+                shape.push_back(net.input_dim(dim));
+            }
+            inp_shapes.push_back(shape);
+        }
+
+        for (int inp_id = 0; inp_id < inp_shapes.size(); inp_id++) {
+            dstNet.setInput(Mat(inp_shapes[inp_id], CV_32F), netInputs[inp_id]);
+        }
+
         addedBlobs.clear();
     }
 
index ad2e527..407cbf2 100644 (file)
@@ -2772,6 +2772,18 @@ struct Net::Impl
     {
         std::vector<LayerPin>& inputLayerIds = layers[id].inputBlobsId;
 
+        if (inOutShapes[0].in[0].empty() && !layers[0].outputBlobs.empty())
+        {
+            ShapesVec shapes;
+            for (int i = 0; i < layers[0].outputBlobs.size(); i++)
+            {
+                Mat& inp = layers[0].outputBlobs[i];
+                CV_Assert(inp.total());
+                shapes.push_back(shape(inp));
+            }
+            inOutShapes[0].in = shapes;
+         }
+
         if (inOutShapes[id].in.empty())
         {
             for(int i = 0; i < inputLayerIds.size(); i++)
@@ -2934,14 +2946,23 @@ Net Net::readFromModelOptimizer(const String& xml, const String& bin)
 #endif
 
     std::vector<String> inputsNames;
+    std::vector<MatShape> inp_shapes;
     for (auto& it : ieNet.getInputsInfo())
     {
         inputsNames.push_back(it.first);
+        std::vector<size_t> dims = it.second->getTensorDesc().getDims();
+        inp_shapes.push_back(std::vector<int>(dims.begin(), dims.end()));
     }
 
     Net cvNet;
     cvNet.setInputsNames(inputsNames);
 
+    // set empty input to determine input shapes
+    for (int inp_id = 0; inp_id < inputsNames.size(); ++inp_id)
+    {
+        cvNet.setInput(Mat(inp_shapes[inp_id], CV_32F), inputsNames[inp_id]);
+    }
+
     Ptr<BackendNode> backendNode;
 #ifdef HAVE_DNN_NGRAPH
     if (DNN_BACKEND_INFERENCE_ENGINE_NGRAPH == getInferenceEngineBackendTypeParam())
index 3ec6f94..22e31db 100644 (file)
@@ -182,6 +182,17 @@ TEST_P(Reproducibility_AlexNet, Accuracy)
         ASSERT_FALSE(net.empty());
     }
 
+    // Test input layer size
+    std::vector<MatShape> inLayerShapes;
+    std::vector<MatShape> outLayerShapes;
+    net.getLayerShapes(MatShape(), 0, inLayerShapes, outLayerShapes);
+    ASSERT_FALSE(inLayerShapes.empty());
+    ASSERT_EQ(inLayerShapes[0].size(), 4);
+    ASSERT_EQ(inLayerShapes[0][0], 1);
+    ASSERT_EQ(inLayerShapes[0][1], 3);
+    ASSERT_EQ(inLayerShapes[0][2], 227);
+    ASSERT_EQ(inLayerShapes[0][3], 227);
+
     const float l1 = 1e-5;
     const float lInf = (targetId == DNN_TARGET_OPENCL_FP16) ? 3e-3 : 1e-4;
 
index 592d87c..8d94543 100644 (file)
@@ -254,20 +254,6 @@ void runIE(Target target, const std::string& xmlPath, const std::string& binPath
     infRequest.Infer();
 }
 
-std::vector<String> getOutputsNames(const Net& net)
-{
-    std::vector<String> names;
-    if (names.empty())
-    {
-        std::vector<int> outLayers = net.getUnconnectedOutLayers();
-        std::vector<String> layersNames = net.getLayerNames();
-        names.resize(outLayers.size());
-        for (size_t i = 0; i < outLayers.size(); ++i)
-            names[i] = layersNames[outLayers[i] - 1];
-    }
-    return names;
-}
-
 void runCV(Backend backendId, Target targetId, const std::string& xmlPath, const std::string& binPath,
            const std::map<std::string, cv::Mat>& inputsMap,
            std::map<std::string, cv::Mat>& outputsMap)
@@ -279,7 +265,7 @@ void runCV(Backend backendId, Target targetId, const std::string& xmlPath, const
     net.setPreferableBackend(backendId);
     net.setPreferableTarget(targetId);
 
-    std::vector<String> outNames = getOutputsNames(net);
+    std::vector<String> outNames = net.getUnconnectedOutLayersNames();
     std::vector<Mat> outs;
     net.forward(outs, outNames);