Let switch CPU/OpenCL targets for models from Intel's Model Optimizer
authorDmitry Kurtaev <dmitry.kurtaev+github@gmail.com>
Wed, 18 Apr 2018 14:26:54 +0000 (17:26 +0300)
committerDmitry Kurtaev <dmitry.kurtaev+github@gmail.com>
Thu, 19 Apr 2018 07:23:57 +0000 (10:23 +0300)
modules/dnn/src/op_inf_engine.cpp
modules/dnn/test/test_backends.cpp
modules/dnn/test/test_caffe_importer.cpp
modules/dnn/test/test_common.hpp
modules/dnn/test/test_darknet_importer.cpp
modules/dnn/test/test_tf_importer.cpp

index 129ed94..6202701 100644 (file)
@@ -139,7 +139,6 @@ InfEngineBackendNet::InfEngineBackendNet(InferenceEngine::CNNNetwork& net)
     inputs = net.getInputsInfo();
     outputs = net.getOutputsInfo();
     layers.resize(net.layerCount());  // A hack to execute InfEngineBackendNet::layerCount correctly.
-    initPlugin(net);
 }
 
 void InfEngineBackendNet::Release() noexcept
index ea79119..5d6f427 100644 (file)
@@ -71,7 +71,7 @@ public:
         Mat out = net.forward(outputLayer).clone();
 
         if (outputLayer == "detection_out")
-            checkDetections(outDefault, out, "First run", l1, lInf);
+            normAssertDetections(outDefault, out, "First run", 0.2);
         else
             normAssert(outDefault, out, "First run", l1, lInf);
 
index 38c3c7e..82b395a 100644 (file)
@@ -167,7 +167,7 @@ TEST(Reproducibility_SSD, Accuracy)
     Mat out = net.forward("detection_out");
 
     Mat ref = blobFromNPY(_tf("ssd_out.npy"));
-    normAssert(ref, out);
+    normAssertDetections(ref, out);
 }
 
 typedef testing::TestWithParam<DNNTarget> Reproducibility_MobileNet_SSD;
@@ -186,7 +186,7 @@ TEST_P(Reproducibility_MobileNet_SSD, Accuracy)
     Mat out = net.forward();
 
     Mat ref = blobFromNPY(_tf("mobilenet_ssd_caffe_out.npy"));
-    normAssert(ref, out);
+    normAssertDetections(ref, out);
 
     // Check that detections aren't preserved.
     inp.setTo(0.0f);
@@ -403,14 +403,13 @@ TEST_P(opencv_face_detector, Accuracy)
     // Output has shape 1x1xNx7 where N - number of detections.
     // An every detection is a vector of values [id, classId, confidence, left, top, right, bottom]
     Mat out = net.forward();
-
-    Mat ref = (Mat_<float>(6, 5) << 0.99520785, 0.80997437, 0.16379407, 0.87996572, 0.26685631,
-                                    0.9934696, 0.2831718, 0.50738752, 0.345781, 0.5985168,
-                                    0.99096733, 0.13629119, 0.24892329, 0.19756334, 0.3310290,
-                                    0.98977017, 0.23901358, 0.09084064, 0.29902688, 0.1769477,
-                                    0.97203469, 0.67965847, 0.06876482, 0.73999709, 0.1513494,
-                                    0.95097077, 0.51901293, 0.45863652, 0.5777427, 0.5347801);
-    normAssert(out.reshape(1, out.total() / 7).rowRange(0, 6).colRange(2, 7), ref);
+    Mat ref = (Mat_<float>(6, 7) << 0, 1, 0.99520785, 0.80997437, 0.16379407, 0.87996572, 0.26685631,
+                                    0, 1, 0.9934696, 0.2831718, 0.50738752, 0.345781, 0.5985168,
+                                    0, 1, 0.99096733, 0.13629119, 0.24892329, 0.19756334, 0.3310290,
+                                    0, 1, 0.98977017, 0.23901358, 0.09084064, 0.29902688, 0.1769477,
+                                    0, 1, 0.97203469, 0.67965847, 0.06876482, 0.73999709, 0.1513494,
+                                    0, 1, 0.95097077, 0.51901293, 0.45863652, 0.5777427, 0.5347801);
+    normAssertDetections(ref, out, "", 0.5, 1e-5, 2e-4);
 }
 INSTANTIATE_TEST_CASE_P(Test_Caffe, opencv_face_detector,
     Combine(
@@ -426,14 +425,14 @@ TEST(Test_Caffe, FasterRCNN_and_RFCN)
                             "resnet50_rfcn_final.caffemodel"};
     std::string protos[] = {"faster_rcnn_vgg16.prototxt", "faster_rcnn_zf.prototxt",
                             "rfcn_pascal_voc_resnet50.prototxt"};
-    Mat refs[] = {(Mat_<float>(3, 6) << 2, 0.949398, 99.2454, 210.141, 601.205, 462.849,
-                                        7, 0.997022, 481.841, 92.3218, 722.685, 175.953,
-                                        12, 0.993028, 133.221, 189.377, 350.994, 563.166),
-                  (Mat_<float>(3, 6) << 2, 0.90121, 120.407, 115.83, 570.586, 528.395,
-                                        7, 0.988779, 469.849, 75.1756, 718.64, 186.762,
-                                        12, 0.967198, 138.588, 206.843, 329.766, 553.176),
-                  (Mat_<float>(2, 6) << 7, 0.991359, 491.822, 81.1668, 702.573, 178.234,
-                                        12, 0.94786, 132.093, 223.903, 338.077, 566.16)};
+    Mat refs[] = {(Mat_<float>(3, 7) << 0, 2, 0.949398, 99.2454, 210.141, 601.205, 462.849,
+                                        0, 7, 0.997022, 481.841, 92.3218, 722.685, 175.953,
+                                        0, 12, 0.993028, 133.221, 189.377, 350.994, 563.166),
+                  (Mat_<float>(3, 7) << 0, 2, 0.90121, 120.407, 115.83, 570.586, 528.395,
+                                        0, 7, 0.988779, 469.849, 75.1756, 718.64, 186.762,
+                                        0, 12, 0.967198, 138.588, 206.843, 329.766, 553.176),
+                  (Mat_<float>(2, 7) << 0, 7, 0.991359, 491.822, 81.1668, 702.573, 178.234,
+                                        0, 12, 0.94786, 132.093, 223.903, 338.077, 566.16)};
     for (int i = 0; i < 3; ++i)
     {
         std::string proto = findDataFile("dnn/" + protos[i], false);
@@ -450,15 +449,7 @@ TEST(Test_Caffe, FasterRCNN_and_RFCN)
         // Output has shape 1x1xNx7 where N - number of detections.
         // An every detection is a vector of values [id, classId, confidence, left, top, right, bottom]
         Mat out = net.forward();
-        out = out.reshape(1, out.total() / 7);
-
-        Mat detections;
-        for (int j = 0; j < out.rows; ++j)
-        {
-            if (out.at<float>(j, 2) > 0.8)
-              detections.push_back(out.row(j).colRange(1, 7));
-        }
-        normAssert(detections, refs[i], ("model name: " + models[i]).c_str(), 1e-3, 1e-3);
+        normAssertDetections(refs[i], out, ("model name: " + models[i]).c_str(), 0.8);
     }
 }
 
index a6da72c..872d19d 100644 (file)
@@ -57,6 +57,96 @@ inline void normAssert(cv::InputArray ref, cv::InputArray test, const char *comm
     EXPECT_LE(normInf, lInf) << comment;
 }
 
+static std::vector<cv::Rect2d> matToBoxes(const cv::Mat& m)
+{
+    EXPECT_EQ(m.type(), CV_32FC1);
+    EXPECT_EQ(m.dims, 2);
+    EXPECT_EQ(m.cols, 4);
+
+    std::vector<cv::Rect2d> boxes(m.rows);
+    for (int i = 0; i < m.rows; ++i)
+    {
+        CV_Assert(m.row(i).isContinuous());
+        const float* data = m.ptr<float>(i);
+        double l = data[0], t = data[1], r = data[2], b = data[3];
+        boxes[i] = cv::Rect2d(l, t, r - l, b - t);
+    }
+    return boxes;
+}
+
+inline void normAssertDetections(const std::vector<int>& refClassIds,
+                                 const std::vector<float>& refScores,
+                                 const std::vector<cv::Rect2d>& refBoxes,
+                                 const std::vector<int>& testClassIds,
+                                 const std::vector<float>& testScores,
+                                 const std::vector<cv::Rect2d>& testBoxes,
+                                 const char *comment = "", double confThreshold = 0.0,
+                                 double scores_diff = 1e-5, double boxes_iou_diff = 1e-4)
+{
+    std::vector<bool> matchedRefBoxes(refBoxes.size(), false);
+    for (int i = 0; i < testBoxes.size(); ++i)
+    {
+        double testScore = testScores[i];
+        if (testScore < confThreshold)
+            continue;
+
+        int testClassId = testClassIds[i];
+        const cv::Rect2d& testBox = testBoxes[i];
+        bool matched = false;
+        for (int j = 0; j < refBoxes.size() && !matched; ++j)
+        {
+            if (!matchedRefBoxes[j] && testClassId == refClassIds[j] &&
+                std::abs(testScore - refScores[j]) < scores_diff)
+            {
+                double interArea = (testBox & refBoxes[j]).area();
+                double iou = interArea / (testBox.area() + refBoxes[j].area() - interArea);
+                if (std::abs(iou - 1.0) < boxes_iou_diff)
+                {
+                    matched = true;
+                    matchedRefBoxes[j] = true;
+                }
+            }
+        }
+        if (!matched)
+            std::cout << cv::format("Unmatched prediction: class %d score %f box ",
+                                    testClassId, testScore) << testBox << std::endl;
+        EXPECT_TRUE(matched) << comment;
+    }
+
+    // Check unmatched reference detections.
+    for (int i = 0; i < refBoxes.size(); ++i)
+    {
+        if (!matchedRefBoxes[i] && refScores[i] > confThreshold)
+        {
+            std::cout << cv::format("Unmatched reference: class %d score %f box ",
+                                    refClassIds[i], refScores[i]) << refBoxes[i] << std::endl;
+            EXPECT_LE(refScores[i], confThreshold) << comment;
+        }
+    }
+}
+
+// For SSD-based object detection networks which produce output of shape 1x1xNx7
+// where N is a number of detections and an every detection is represented by
+// a vector [batchId, classId, confidence, left, top, right, bottom].
+inline void normAssertDetections(cv::Mat ref, cv::Mat out, const char *comment = "",
+                                 double confThreshold = 0.0, double scores_diff = 1e-5,
+                                 double boxes_iou_diff = 1e-4)
+{
+    CV_Assert(ref.total() % 7 == 0);
+    CV_Assert(out.total() % 7 == 0);
+    ref = ref.reshape(1, ref.total() / 7);
+    out = out.reshape(1, out.total() / 7);
+
+    cv::Mat refClassIds, testClassIds;
+    ref.col(1).convertTo(refClassIds, CV_32SC1);
+    out.col(1).convertTo(testClassIds, CV_32SC1);
+    std::vector<float> refScores(ref.col(2)), testScores(out.col(2));
+    std::vector<cv::Rect2d> refBoxes = matToBoxes(ref.colRange(3, 7));
+    std::vector<cv::Rect2d> testBoxes = matToBoxes(out.colRange(3, 7));
+    normAssertDetections(refClassIds, refScores, refBoxes, testClassIds, testScores,
+                         testBoxes, comment, confThreshold, scores_diff, boxes_iou_diff);
+}
+
 inline bool readFileInMemory(const std::string& filename, std::string& content)
 {
     std::ios::openmode mode = std::ios::in | std::ios::binary;
index ebe8d93..a7679da 100644 (file)
@@ -70,7 +70,7 @@ static void testDarknetModel(const std::string& cfg, const std::string& weights,
                              const std::vector<cv::String>& outNames,
                              const std::vector<int>& refClassIds,
                              const std::vector<float>& refConfidences,
-                             const std::vector<Rect2f>& refBoxes,
+                             const std::vector<Rect2d>& refBoxes,
                              int targetId, float confThreshold = 0.24)
 {
     Mat sample = imread(_tf("dog416.png"));
@@ -85,7 +85,7 @@ static void testDarknetModel(const std::string& cfg, const std::string& weights,
 
     std::vector<int> classIds;
     std::vector<float> confidences;
-    std::vector<Rect2f> boxes;
+    std::vector<Rect2d> boxes;
     for (int i = 0; i < outs.size(); ++i)
     {
         Mat& out = outs[i];
@@ -95,31 +95,20 @@ static void testDarknetModel(const std::string& cfg, const std::string& weights,
             double confidence;
             Point maxLoc;
             minMaxLoc(scores, 0, &confidence, 0, &maxLoc);
-            if (confidence > confThreshold)
-            {
-                float* detection = out.ptr<float>(j);
-                float centerX = detection[0];
-                float centerY = detection[1];
-                float width = detection[2];
-                float height = detection[3];
-                boxes.push_back(Rect2f(centerX - 0.5 * width, centerY - 0.5 * height,
-                                       width, height));
-                confidences.push_back(confidence);
-                classIds.push_back(maxLoc.x);
-            }
-        }
-    }
 
-    ASSERT_EQ(classIds.size(), refClassIds.size());
-    ASSERT_EQ(confidences.size(), refConfidences.size());
-    ASSERT_EQ(boxes.size(), refBoxes.size());
-    for (int i = 0; i < boxes.size(); ++i)
-    {
-        ASSERT_EQ(classIds[i], refClassIds[i]);
-        ASSERT_LE(std::abs(confidences[i] - refConfidences[i]), 1e-4);
-        float iou = (boxes[i] & refBoxes[i]).area() / (boxes[i] | refBoxes[i]).area();
-        ASSERT_LE(std::abs(iou - 1.0f), 1e-4);
+            float* detection = out.ptr<float>(j);
+            double centerX = detection[0];
+            double centerY = detection[1];
+            double width = detection[2];
+            double height = detection[3];
+            boxes.push_back(Rect2d(centerX - 0.5 * width, centerY - 0.5 * height,
+                                   width, height));
+            confidences.push_back(confidence);
+            classIds.push_back(maxLoc.x);
+        }
     }
+    normAssertDetections(refClassIds, refConfidences, refBoxes, classIds,
+                         confidences, boxes, "", confThreshold, 8e-5, 3e-5);
 }
 
 typedef testing::TestWithParam<DNNTarget> Test_Darknet_nets;
@@ -131,10 +120,10 @@ TEST_P(Test_Darknet_nets, YoloVoc)
 
     std::vector<int> classIds(3);
     std::vector<float> confidences(3);
-    std::vector<Rect2f> boxes(3);
-    classIds[0] = 6;  confidences[0] = 0.750469f; boxes[0] = Rect2f(0.577374, 0.127391, 0.325575, 0.173418);  // a car
-    classIds[1] = 1;  confidences[1] = 0.780879f; boxes[1] = Rect2f(0.270762, 0.264102, 0.461713, 0.48131); // a bycicle
-    classIds[2] = 11; confidences[2] = 0.901615f; boxes[2] = Rect2f(0.1386, 0.338509, 0.282737, 0.60028);  // a dog
+    std::vector<Rect2d> boxes(3);
+    classIds[0] = 6;  confidences[0] = 0.750469f; boxes[0] = Rect2d(0.577374, 0.127391, 0.325575, 0.173418);  // a car
+    classIds[1] = 1;  confidences[1] = 0.780879f; boxes[1] = Rect2d(0.270762, 0.264102, 0.461713, 0.48131); // a bycicle
+    classIds[2] = 11; confidences[2] = 0.901615f; boxes[2] = Rect2d(0.1386, 0.338509, 0.282737, 0.60028);  // a dog
     testDarknetModel("yolo-voc.cfg", "yolo-voc.weights", outNames,
                      classIds, confidences, boxes, targetId);
 }
@@ -145,9 +134,9 @@ TEST_P(Test_Darknet_nets, TinyYoloVoc)
     std::vector<cv::String> outNames(1, "detection_out");
     std::vector<int> classIds(2);
     std::vector<float> confidences(2);
-    std::vector<Rect2f> boxes(2);
-    classIds[0] = 6;  confidences[0] = 0.761967f; boxes[0] = Rect2f(0.579042, 0.159161, 0.31544, 0.160779);  // a car
-    classIds[1] = 11; confidences[1] = 0.780595f; boxes[1] = Rect2f(0.129696, 0.386467, 0.315579, 0.534527);  // a dog
+    std::vector<Rect2d> boxes(2);
+    classIds[0] = 6;  confidences[0] = 0.761967f; boxes[0] = Rect2d(0.579042, 0.159161, 0.31544, 0.160779);  // a car
+    classIds[1] = 11; confidences[1] = 0.780595f; boxes[1] = Rect2d(0.129696, 0.386467, 0.315579, 0.534527);  // a dog
     testDarknetModel("tiny-yolo-voc.cfg", "tiny-yolo-voc.weights", outNames,
                      classIds, confidences, boxes, targetId);
 }
@@ -162,10 +151,10 @@ TEST_P(Test_Darknet_nets, YOLOv3)
 
     std::vector<int> classIds(3);
     std::vector<float> confidences(3);
-    std::vector<Rect2f> boxes(3);
-    classIds[0] = 7;  confidences[0] = 0.952983f; boxes[0] = Rect2f(0.614622, 0.150257, 0.286747, 0.138994);  // a truck
-    classIds[1] = 1; confidences[1] = 0.987908f; boxes[1] = Rect2f(0.150913, 0.221933, 0.591342, 0.524327);  // a bycicle
-    classIds[2] = 16; confidences[2] = 0.998836f; boxes[2] = Rect2f(0.160024, 0.389964, 0.257861, 0.553752);  // a dog (COCO)
+    std::vector<Rect2d> boxes(3);
+    classIds[0] = 7;  confidences[0] = 0.952983f; boxes[0] = Rect2d(0.614622, 0.150257, 0.286747, 0.138994);  // a truck
+    classIds[1] = 1; confidences[1] = 0.987908f; boxes[1] = Rect2d(0.150913, 0.221933, 0.591342, 0.524327);  // a bycicle
+    classIds[2] = 16; confidences[2] = 0.998836f; boxes[2] = Rect2d(0.160024, 0.389964, 0.257861, 0.553752);  // a dog (COCO)
     testDarknetModel("yolov3.cfg", "yolov3.weights", outNames,
                      classIds, confidences, boxes, targetId);
 }
index 8d4f4b6..43e30eb 100644 (file)
@@ -237,7 +237,7 @@ TEST_P(Test_TensorFlow_nets, MobileNet_SSD)
 
     normAssert(target[0].reshape(1, 1), output[0].reshape(1, 1), "", 1e-5, 1.5e-4);
     normAssert(target[1].reshape(1, 1), output[1].reshape(1, 1), "", 1e-5, 3e-4);
-    normAssert(target[2].reshape(1, 1), output[2].reshape(1, 1), "", 4e-5, 1e-2);
+    normAssertDetections(target[2], output[2], "", 0.2);
 }
 
 TEST_P(Test_TensorFlow_nets, Inception_v2_SSD)
@@ -255,21 +255,12 @@ TEST_P(Test_TensorFlow_nets, Inception_v2_SSD)
     // Output has shape 1x1xNx7 where N - number of detections.
     // An every detection is a vector of values [id, classId, confidence, left, top, right, bottom]
     Mat out = net.forward();
-    out = out.reshape(1, out.total() / 7);
-
-    Mat detections;
-    for (int i = 0; i < out.rows; ++i)
-    {
-        if (out.at<float>(i, 2) > 0.5)
-          detections.push_back(out.row(i).colRange(1, 7));
-    }
-
-    Mat ref = (Mat_<float>(5, 6) << 1, 0.90176028, 0.19872092, 0.36311883, 0.26461923, 0.63498729,
-                                    3, 0.93569964, 0.64865261, 0.45906419, 0.80675775, 0.65708131,
-                                    3, 0.75838411, 0.44668293, 0.45907149, 0.49459291, 0.52197015,
-                                    10, 0.95932811, 0.38349164, 0.32528657, 0.40387636, 0.39165527,
-                                    10, 0.93973452, 0.66561931, 0.37841269, 0.68074018, 0.42907384);
-    normAssert(detections, ref);
+    Mat ref = (Mat_<float>(5, 7) << 0, 1, 0.90176028, 0.19872092, 0.36311883, 0.26461923, 0.63498729,
+                                    0, 3, 0.93569964, 0.64865261, 0.45906419, 0.80675775, 0.65708131,
+                                    0, 3, 0.75838411, 0.44668293, 0.45907149, 0.49459291, 0.52197015,
+                                    0, 10, 0.95932811, 0.38349164, 0.32528657, 0.40387636, 0.39165527,
+                                    0, 10, 0.93973452, 0.66561931, 0.37841269, 0.68074018, 0.42907384);
+    normAssertDetections(ref, out, "", 0.5);
 }
 
 TEST_P(Test_TensorFlow_nets, opencv_face_detector_uint8)
@@ -289,13 +280,13 @@ TEST_P(Test_TensorFlow_nets, opencv_face_detector_uint8)
     Mat out = net.forward();
 
     // References are from test for Caffe model.
-    Mat ref = (Mat_<float>(6, 5) << 0.99520785, 0.80997437, 0.16379407, 0.87996572, 0.26685631,
-                                    0.9934696, 0.2831718, 0.50738752, 0.345781, 0.5985168,
-                                    0.99096733, 0.13629119, 0.24892329, 0.19756334, 0.3310290,
-                                    0.98977017, 0.23901358, 0.09084064, 0.29902688, 0.1769477,
-                                    0.97203469, 0.67965847, 0.06876482, 0.73999709, 0.1513494,
-                                    0.95097077, 0.51901293, 0.45863652, 0.5777427, 0.5347801);
-    normAssert(out.reshape(1, out.total() / 7).rowRange(0, 6).colRange(2, 7), ref, "", 2.8e-4, 3.4e-3);
+    Mat ref = (Mat_<float>(6, 7) << 0, 1, 0.99520785, 0.80997437, 0.16379407, 0.87996572, 0.26685631,
+                                    0, 1, 0.9934696, 0.2831718, 0.50738752, 0.345781, 0.5985168,
+                                    0, 1, 0.99096733, 0.13629119, 0.24892329, 0.19756334, 0.3310290,
+                                    0, 1, 0.98977017, 0.23901358, 0.09084064, 0.29902688, 0.1769477,
+                                    0, 1, 0.97203469, 0.67965847, 0.06876482, 0.73999709, 0.1513494,
+                                    0, 1, 0.95097077, 0.51901293, 0.45863652, 0.5777427, 0.5347801);
+    normAssertDetections(ref, out, "", 0.9, 3.4e-3, 1e-2);
 }
 
 INSTANTIATE_TEST_CASE_P(/**/, Test_TensorFlow_nets, availableDnnTargets());