Merge pull request #20957 from sturkmen72:update-documentation
authorSuleyman TURKMEN <sturkmen@hotmail.com>
Sun, 28 Nov 2021 12:56:28 +0000 (15:56 +0300)
committerGitHub <noreply@github.com>
Sun, 28 Nov 2021 12:56:28 +0000 (12:56 +0000)
Update documentation

* Update DNN-based Face Detection And Recognition tutorial

* samples(dnn/face): update face_detect.cpp

* final changes

Co-authored-by: Alexander Alekhin <alexander.a.alekhin@gmail.com>
doc/tutorials/dnn/dnn_face/dnn_face.markdown
samples/dnn/face_detect.cpp
samples/dnn/face_detect.py
samples/dnn/face_match.cpp [deleted file]
samples/dnn/face_match.py [deleted file]

index 202be3e..f55cdb7 100644 (file)
@@ -36,14 +36,34 @@ There are two models (ONNX format) pre-trained and required for this module:
 
 ### DNNFaceDetector
 
-```cpp
-// Initialize FaceDetectorYN
-Ptr<FaceDetectorYN> faceDetector = FaceDetectorYN::create(onnx_path, "", image.size(), score_thresh, nms_thresh, top_k);
+@add_toggle_cpp
+-   **Downloadable code**: Click
+    [here](https://github.com/opencv/opencv/tree/master/samples/dnn/face_detect.cpp)
 
-// Forward
-Mat faces;
-faceDetector->detect(image, faces);
-```
+-   **Code at glance:**
+    @include samples/dnn/face_detect.cpp
+@end_toggle
+
+@add_toggle_python
+-   **Downloadable code**: Click
+    [here](https://github.com/opencv/opencv/tree/master/samples/dnn/face_detect.py)
+
+-   **Code at glance:**
+    @include samples/dnn/face_detect.py
+@end_toggle
+
+Explanation
+-----------
+
+@add_toggle_cpp
+@snippet dnn/face_detect.cpp initialize_FaceDetectorYN
+@snippet dnn/face_detect.cpp inference
+@end_toggle
+
+@add_toggle_python
+@snippet dnn/face_detect.py initialize_FaceDetectorYN
+@snippet dnn/face_detect.py inference
+@end_toggle
 
 The detection output `faces` is a two-dimension array of type CV_32F, whose rows are the detected face instances, columns are the location of a face and 5 facial landmarks. The format of each row is as follows:
 
@@ -57,28 +77,25 @@ x1, y1, w, h, x_re, y_re, x_le, y_le, x_nt, y_nt, x_rcm, y_rcm, x_lcm, y_lcm
 
 Following Face Detection, run codes below to extract face feature from facial image.
 
-```cpp
-// Initialize FaceRecognizerSF with model path (cv::String)
-Ptr<FaceRecognizerSF> faceRecognizer = FaceRecognizerSF::create(model_path, "");
-
-// Aligning and cropping facial image through the first face of faces detected by dnn_face::DNNFaceDetector
-Mat aligned_face;
-faceRecognizer->alignCrop(image, faces.row(0), aligned_face);
+@add_toggle_cpp
+@snippet dnn/face_detect.cpp initialize_FaceRecognizerSF
+@snippet dnn/face_detect.cpp facerecognizer
+@end_toggle
 
-// Run feature extraction with given aligned_face (cv::Mat)
-Mat feature;
-faceRecognizer->feature(aligned_face, feature);
-feature = feature.clone();
-```
+@add_toggle_python
+@snippet dnn/face_detect.py initialize_FaceRecognizerSF
+@snippet dnn/face_detect.py facerecognizer
+@end_toggle
 
 After obtaining face features *feature1* and *feature2* of two facial images, run codes below to calculate the identity discrepancy between the two faces.
 
-```cpp
-// Calculating the discrepancy between two face features by using cosine distance.
-double cos_score = faceRecognizer->match(feature1, feature2, FaceRecognizer::DisType::COSINE);
-// Calculating the discrepancy between two face features by using normL2 distance.
-double L2_score = faceRecognizer->match(feature1, feature2, FaceRecognizer::DisType::NORM_L2);
-```
+@add_toggle_cpp
+@snippet dnn/face_detect.cpp match
+@end_toggle
+
+@add_toggle_python
+@snippet dnn/face_detect.py match
+@end_toggle
 
 For example, two faces have same identity if the cosine distance is greater than or equal to 0.363, or the normL2 distance is less than or equal to 1.128.
 
index 8d91a10..161940c 100644 (file)
 using namespace cv;
 using namespace std;
 
-static Mat visualize(Mat input, Mat faces, int thickness=2)
+static
+void visualize(Mat& input, int frame, Mat& faces, double fps, int thickness = 2)
 {
-    Mat output = input.clone();
+    std::string fpsString = cv::format("FPS : %.2f", (float)fps);
+    if (frame >= 0)
+        cout << "Frame " << frame << ", ";
+    cout << "FPS: " << fpsString << endl;
     for (int i = 0; i < faces.rows; i++)
     {
         // Print results
         cout << "Face " << i
              << ", top-left coordinates: (" << faces.at<float>(i, 0) << ", " << faces.at<float>(i, 1) << "), "
              << "box width: " << faces.at<float>(i, 2)  << ", box height: " << faces.at<float>(i, 3) << ", "
-             << "score: " << faces.at<float>(i, 14) << "\n";
+             << "score: " << cv::format("%.2f", faces.at<float>(i, 14))
+             << endl;
 
         // Draw bounding box
-        rectangle(output, Rect2i(int(faces.at<float>(i, 0)), int(faces.at<float>(i, 1)), int(faces.at<float>(i, 2)), int(faces.at<float>(i, 3))), Scalar(0, 255, 0), thickness);
+        rectangle(input, Rect2i(int(faces.at<float>(i, 0)), int(faces.at<float>(i, 1)), int(faces.at<float>(i, 2)), int(faces.at<float>(i, 3))), Scalar(0, 255, 0), thickness);
         // Draw landmarks
-        circle(output, Point2i(int(faces.at<float>(i, 4)),  int(faces.at<float>(i, 5))),  2, Scalar(255,   0,   0), thickness);
-        circle(output, Point2i(int(faces.at<float>(i, 6)),  int(faces.at<float>(i, 7))),  2, Scalar(  0,   0, 255), thickness);
-        circle(output, Point2i(int(faces.at<float>(i, 8)),  int(faces.at<float>(i, 9))),  2, Scalar(  0, 255,   0), thickness);
-        circle(output, Point2i(int(faces.at<float>(i, 10)), int(faces.at<float>(i, 11))), 2, Scalar(255,   0, 255), thickness);
-        circle(output, Point2i(int(faces.at<float>(i, 12)), int(faces.at<float>(i, 13))), 2, Scalar(  0, 255, 255), thickness);
+        circle(input, Point2i(int(faces.at<float>(i, 4)), int(faces.at<float>(i, 5))), 2, Scalar(255, 0, 0), thickness);
+        circle(input, Point2i(int(faces.at<float>(i, 6)), int(faces.at<float>(i, 7))), 2, Scalar(0, 0, 255), thickness);
+        circle(input, Point2i(int(faces.at<float>(i, 8)), int(faces.at<float>(i, 9))), 2, Scalar(0, 255, 0), thickness);
+        circle(input, Point2i(int(faces.at<float>(i, 10)), int(faces.at<float>(i, 11))), 2, Scalar(255, 0, 255), thickness);
+        circle(input, Point2i(int(faces.at<float>(i, 12)), int(faces.at<float>(i, 13))), 2, Scalar(0, 255, 255), thickness);
     }
-    return output;
+    putText(input, fpsString, Point(0, 15), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 255, 0), 2);
 }
 
-int main(int argc, char ** argv)
+int main(int argc, char** argv)
 {
     CommandLineParser parser(argc, argv,
-        "{help  h           |            | Print this message.}"
-        "{input i           |            | Path to the input image. Omit for detecting on default camera.}"
-        "{model m           | yunet.onnx | Path to the model. Download yunet.onnx in https://github.com/ShiqiYu/libfacedetection.train/tree/master/tasks/task1/onnx.}"
-        "{score_threshold   | 0.9        | Filter out faces of score < score_threshold.}"
-        "{nms_threshold     | 0.3        | Suppress bounding boxes of iou >= nms_threshold.}"
-        "{top_k             | 5000       | Keep top_k bounding boxes before NMS.}"
-        "{save  s           | false      | Set true to save results. This flag is invalid when using camera.}"
-        "{vis   v           | true       | Set true to open a window for result visualization. This flag is invalid when using camera.}"
+        "{help  h           |            | Print this message}"
+        "{image1 i1         |            | Path to the input image1. Omit for detecting through VideoCapture}"
+        "{image2 i2         |            | Path to the input image2. When image1 and image2 parameters given then the program try to find a face on both images and runs face recognition algorithm}"
+        "{video v           | 0          | Path to the input video}"
+        "{scale sc          | 1.0        | Scale factor used to resize input video frames}"
+        "{fd_model fd       | yunet.onnx | Path to the model. Download yunet.onnx in https://github.com/ShiqiYu/libfacedetection.train/tree/master/tasks/task1/onnx }"
+        "{fr_model fr       | face_recognizer_fast.onnx | Path to the face recognition model. Download the model at https://drive.google.com/file/d/1ClK9WiB492c5OZFKveF3XiHCejoOxINW/view}"
+        "{score_threshold   | 0.9        | Filter out faces of score < score_threshold}"
+        "{nms_threshold     | 0.3        | Suppress bounding boxes of iou >= nms_threshold}"
+        "{top_k             | 5000       | Keep top_k bounding boxes before NMS}"
+        "{save s            | false      | Set true to save results. This flag is invalid when using camera}"
     );
-    if (argc == 1 || parser.has("help"))
+    if (parser.has("help"))
     {
         parser.printMessage();
-        return -1;
+        return 0;
     }
 
-    String modelPath = parser.get<String>("model");
+    String fd_modelPath = parser.get<String>("fd_model");
+    String fr_modelPath = parser.get<String>("fr_model");
 
     float scoreThreshold = parser.get<float>("score_threshold");
     float nmsThreshold = parser.get<float>("nms_threshold");
     int topK = parser.get<int>("top_k");
 
     bool save = parser.get<bool>("save");
-    bool vis = parser.get<bool>("vis");
 
+    double cosine_similar_thresh = 0.363;
+    double l2norm_similar_thresh = 1.128;
+
+    //! [initialize_FaceDetectorYN]
     // Initialize FaceDetectorYN
-    Ptr<FaceDetectorYN> detector = FaceDetectorYN::create(modelPath, "", Size(320, 320), scoreThreshold, nmsThreshold, topK);
+    Ptr<FaceDetectorYN> detector = FaceDetectorYN::create(fd_modelPath, "", Size(320, 320), scoreThreshold, nmsThreshold, topK);
+    //! [initialize_FaceDetectorYN]
+
+    TickMeter tm;
 
     // If input is an image
-    if (parser.has("input"))
+    if (parser.has("image1"))
     {
-        String input = parser.get<String>("input");
-        Mat image = imread(input);
+        String input1 = parser.get<String>("image1");
+        Mat image1 = imread(samples::findFile(input1));
+        if (image1.empty())
+        {
+            std::cerr << "Cannot read image: " << input1 << std::endl;
+            return 2;
+        }
 
+        tm.start();
+
+        //! [inference]
         // Set input size before inference
-        detector->setInputSize(image.size());
+        detector->setInputSize(image1.size());
 
-        // Inference
-        Mat faces;
-        detector->detect(image, faces);
+        Mat faces1;
+        detector->detect(image1, faces1);
+        if (faces1.rows < 1)
+        {
+            std::cerr << "Cannot find a face in " << input1 << std::endl;
+            return 1;
+        }
+        //! [inference]
 
+        tm.stop();
         // Draw results on the input image
-        Mat result = visualize(image, faces);
+        visualize(image1, -1, faces1, tm.getFPS());
 
         // Save results if save is true
-        if(save)
+        if (save)
         {
-            cout << "Results saved to result.jpg\n";
-            imwrite("result.jpg", result);
+            cout << "Saving result.jpg...\n";
+            imwrite("result.jpg", image1);
         }
 
         // Visualize results
-        if (vis)
+        imshow("image1", image1);
+        pollKey();  // handle UI events to show content
+
+        if (parser.has("image2"))
         {
-            namedWindow(input, WINDOW_AUTOSIZE);
-            imshow(input, result);
-            waitKey(0);
+            String input2 = parser.get<String>("image2");
+            Mat image2 = imread(samples::findFile(input2));
+            if (image2.empty())
+            {
+                std::cerr << "Cannot read image2: " << input2 << std::endl;
+                return 2;
+            }
+
+            tm.reset();
+            tm.start();
+            detector->setInputSize(image2.size());
+
+            Mat faces2;
+            detector->detect(image2, faces2);
+            if (faces2.rows < 1)
+            {
+                std::cerr << "Cannot find a face in " << input2 << std::endl;
+                return 1;
+            }
+            tm.stop();
+            visualize(image2, -1, faces2, tm.getFPS());
+            if (save)
+            {
+                cout << "Saving result2.jpg...\n";
+                imwrite("result2.jpg", image2);
+            }
+            imshow("image2", image2);
+            pollKey();
+
+            //! [initialize_FaceRecognizerSF]
+            // Initialize FaceRecognizerSF
+            Ptr<FaceRecognizerSF> faceRecognizer = FaceRecognizerSF::create(fr_modelPath, "");
+            //! [initialize_FaceRecognizerSF]
+
+
+            //! [facerecognizer]
+            // Aligning and cropping facial image through the first face of faces detected.
+            Mat aligned_face1, aligned_face2;
+            faceRecognizer->alignCrop(image1, faces1.row(0), aligned_face1);
+            faceRecognizer->alignCrop(image2, faces2.row(0), aligned_face2);
+
+            // Run feature extraction with given aligned_face
+            Mat feature1, feature2;
+            faceRecognizer->feature(aligned_face1, feature1);
+            feature1 = feature1.clone();
+            faceRecognizer->feature(aligned_face2, feature2);
+            feature2 = feature2.clone();
+            //! [facerecognizer]
+
+            //! [match]
+            double cos_score = faceRecognizer->match(feature1, feature2, FaceRecognizerSF::DisType::FR_COSINE);
+            double L2_score = faceRecognizer->match(feature1, feature2, FaceRecognizerSF::DisType::FR_NORM_L2);
+            //! [match]
+
+            if (cos_score >= cosine_similar_thresh)
+            {
+                std::cout << "They have the same identity;";
+            }
+            else
+            {
+                std::cout << "They have different identities;";
+            }
+            std::cout << " Cosine Similarity: " << cos_score << ", threshold: " << cosine_similar_thresh << ". (higher value means higher similarity, max 1.0)\n";
+
+            if (L2_score <= l2norm_similar_thresh)
+            {
+                std::cout << "They have the same identity;";
+            }
+            else
+            {
+                std::cout << "They have different identities.";
+            }
+            std::cout << " NormL2 Distance: " << L2_score << ", threshold: " << l2norm_similar_thresh << ". (lower value means higher similarity, min 0.0)\n";
         }
+        cout << "Press any key to exit..." << endl;
+        waitKey(0);
     }
     else
     {
-        int deviceId = 0;
-        VideoCapture cap;
-        cap.open(deviceId, CAP_ANY);
-        int frameWidth = int(cap.get(CAP_PROP_FRAME_WIDTH));
-        int frameHeight = int(cap.get(CAP_PROP_FRAME_HEIGHT));
+        int frameWidth, frameHeight;
+        float scale = parser.get<float>("scale");
+        VideoCapture capture;
+        std::string video = parser.get<string>("video");
+        if (video.size() == 1 && isdigit(video[0]))
+            capture.open(parser.get<int>("video"));
+        else
+            capture.open(samples::findFileOrKeep(video));  // keep GStreamer pipelines
+        if (capture.isOpened())
+        {
+            frameWidth = int(capture.get(CAP_PROP_FRAME_WIDTH) * scale);
+            frameHeight = int(capture.get(CAP_PROP_FRAME_HEIGHT) * scale);
+            cout << "Video " << video
+                << ": width=" << frameWidth
+                << ", height=" << frameHeight
+                << endl;
+        }
+        else
+        {
+            cout << "Could not initialize video capturing: " << video << "\n";
+            return 1;
+        }
+
         detector->setInputSize(Size(frameWidth, frameHeight));
 
-        Mat frame;
-        TickMeter tm;
-        String msg = "FPS: ";
-        while(waitKey(1) < 0) // Press any key to exit
+        cout << "Press 'SPACE' to save frame, any other key to exit..." << endl;
+        int nFrame = 0;
+        for (;;)
         {
             // Get frame
-            if (!cap.read(frame))
+            Mat frame;
+            if (!capture.read(frame))
             {
-                cerr << "No frames grabbed!\n";
+                cerr << "Can't grab frame! Stop\n";
                 break;
             }
 
+            resize(frame, frame, Size(frameWidth, frameHeight));
+
             // Inference
             Mat faces;
             tm.start();
             detector->detect(frame, faces);
             tm.stop();
 
+            Mat result = frame.clone();
             // Draw results on the input image
-            Mat result = visualize(frame, faces);
-            putText(result, msg + to_string(tm.getFPS()), Point(0, 15), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 255, 0));
+            visualize(result, nFrame, faces, tm.getFPS());
 
             // Visualize results
             imshow("Live", result);
 
-            tm.reset();
+            int key = waitKey(1);
+            bool saveFrame = save;
+            if (key == ' ')
+            {
+                saveFrame = true;
+                key = 0;  // handled
+            }
+
+            if (saveFrame)
+            {
+                std::string frame_name = cv::format("frame_%05d.png", nFrame);
+                std::string result_name = cv::format("result_%05d.jpg", nFrame);
+                cout << "Saving '" << frame_name << "' and '" << result_name << "' ...\n";
+                imwrite(frame_name, frame);
+                imwrite(result_name, result);
+            }
+
+            ++nFrame;
+
+            if (key > 0)
+                break;
         }
+        cout << "Processed " << nFrame << " frames" << endl;
     }
-}
\ No newline at end of file
+    cout << "Done." << endl;
+    return 0;
+}
index 65069d6..8900a7f 100644 (file)
@@ -12,90 +12,144 @@ def str2bool(v):
         raise NotImplementedError
 
 parser = argparse.ArgumentParser()
-parser.add_argument('--input', '-i', type=str, help='Path to the input image.')
-parser.add_argument('--model', '-m', type=str, default='yunet.onnx', help='Path to the model. Download the model at https://github.com/ShiqiYu/libfacedetection.train/tree/master/tasks/task1/onnx.')
+parser.add_argument('--image1', '-i1', type=str, help='Path to the input image1. Omit for detecting on default camera.')
+parser.add_argument('--image2', '-i2', type=str, help='Path to the input image2. When image1 and image2 parameters given then the program try to find a face on both images and runs face recognition algorithm.')
+parser.add_argument('--video', '-v', type=str, help='Path to the input video.')
+parser.add_argument('--scale', '-sc', type=float, default=1.0, help='Scale factor used to resize input video frames.')
+parser.add_argument('--face_detection_model', '-fd', type=str, default='yunet.onnx', help='Path to the face detection model. Download the model at https://github.com/ShiqiYu/libfacedetection.train/tree/master/tasks/task1/onnx.')
+parser.add_argument('--face_recognition_model', '-fr', type=str, default='face_recognizer_fast.onnx', help='Path to the face recognition model. Download the model at https://drive.google.com/file/d/1ClK9WiB492c5OZFKveF3XiHCejoOxINW/view.')
 parser.add_argument('--score_threshold', type=float, default=0.9, help='Filtering out faces of score < score_threshold.')
 parser.add_argument('--nms_threshold', type=float, default=0.3, help='Suppress bounding boxes of iou >= nms_threshold.')
 parser.add_argument('--top_k', type=int, default=5000, help='Keep top_k bounding boxes before NMS.')
 parser.add_argument('--save', '-s', type=str2bool, default=False, help='Set true to save results. This flag is invalid when using camera.')
-parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Set true to open a window for result visualization. This flag is invalid when using camera.')
 args = parser.parse_args()
 
-def visualize(input, faces, thickness=2):
-    output = input.copy()
+def visualize(input, faces, fps, thickness=2):
     if faces[1] is not None:
         for idx, face in enumerate(faces[1]):
             print('Face {}, top-left coordinates: ({:.0f}, {:.0f}), box width: {:.0f}, box height {:.0f}, score: {:.2f}'.format(idx, face[0], face[1], face[2], face[3], face[-1]))
 
             coords = face[:-1].astype(np.int32)
-            cv.rectangle(output, (coords[0], coords[1]), (coords[0]+coords[2], coords[1]+coords[3]), (0, 255, 0), 2)
-            cv.circle(output, (coords[4], coords[5]), 2, (255, 0, 0), 2)
-            cv.circle(output, (coords[6], coords[7]), 2, (0, 0, 255), 2)
-            cv.circle(output, (coords[8], coords[9]), 2, (0, 255, 0), 2)
-            cv.circle(output, (coords[10], coords[11]), 2, (255, 0, 255), 2)
-            cv.circle(output, (coords[12], coords[13]), 2, (0, 255, 255), 2)
-    return output
+            cv.rectangle(input, (coords[0], coords[1]), (coords[0]+coords[2], coords[1]+coords[3]), (0, 255, 0), thickness)
+            cv.circle(input, (coords[4], coords[5]), 2, (255, 0, 0), thickness)
+            cv.circle(input, (coords[6], coords[7]), 2, (0, 0, 255), thickness)
+            cv.circle(input, (coords[8], coords[9]), 2, (0, 255, 0), thickness)
+            cv.circle(input, (coords[10], coords[11]), 2, (255, 0, 255), thickness)
+            cv.circle(input, (coords[12], coords[13]), 2, (0, 255, 255), thickness)
+    cv.putText(input, 'FPS: {:.2f}'.format(fps), (1, 16), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
 
 if __name__ == '__main__':
 
-    # Instantiate FaceDetectorYN
+    ## [initialize_FaceDetectorYN]
     detector = cv.FaceDetectorYN.create(
-        args.model,
+        args.face_detection_model,
         "",
         (320, 320),
         args.score_threshold,
         args.nms_threshold,
         args.top_k
     )
+    ## [initialize_FaceDetectorYN]
+
+    tm = cv.TickMeter()
 
     # If input is an image
-    if args.input is not None:
-        image = cv.imread(args.input)
+    if args.image1 is not None:
+        img1 = cv.imread(cv.samples.findFile(args.image1))
 
+        tm.start()
+        ## [inference]
         # Set input size before inference
-        detector.setInputSize((image.shape[1], image.shape[0]))
+        detector.setInputSize((img1.shape[1], img1.shape[0]))
+
+        faces1 = detector.detect(img1)
+        ## [inference]
 
-        # Inference
-        faces = detector.detect(image)
+        tm.stop()
+        assert faces1[1] is not None, 'Cannot find a face in {}'.format(args.image1)
 
         # Draw results on the input image
-        result = visualize(image, faces)
+        visualize(img1, faces1, tm.getFPS())
 
         # Save results if save is true
         if args.save:
-            print('Resutls saved to result.jpg\n')
-            cv.imwrite('result.jpg', result)
+            print('Results saved to result.jpg\n')
+            cv.imwrite('result.jpg', img1)
 
         # Visualize results in a new window
-        if args.vis:
-            cv.namedWindow(args.input, cv.WINDOW_AUTOSIZE)
-            cv.imshow(args.input, result)
-            cv.waitKey(0)
+        cv.imshow("image1", img1)
+
+        if args.image2 is not None:
+            img2 = cv.imread(cv.samples.findFile(args.image2))
+
+            tm.reset()
+            tm.start()
+            detector.setInputSize((img2.shape[1], img2.shape[0]))
+            faces2 = detector.detect(img2)
+            tm.stop()
+            assert faces2[1] is not None, 'Cannot find a face in {}'.format(args.image2)
+            visualize(img2, faces2, tm.getFPS())
+            cv.imshow("image2", img2)
+
+            ## [initialize_FaceRecognizerSF]
+            recognizer = cv.FaceRecognizerSF.create(
+            args.face_recognition_model,"")
+            ## [initialize_FaceRecognizerSF]
+
+            ## [facerecognizer]
+            # Align faces
+            face1_align = recognizer.alignCrop(img1, faces1[1][0])
+            face2_align = recognizer.alignCrop(img2, faces2[1][0])
+
+            # Extract features
+            face1_feature = recognizer.feature(face1_align)
+            face2_feature = recognizer.feature(face2_align)
+            ## [facerecognizer]
+
+            cosine_similarity_threshold = 0.363
+            l2_similarity_threshold = 1.128
+
+            ## [match]
+            cosine_score = recognizer.match(face1_feature, face2_feature, cv.FaceRecognizerSF_FR_COSINE)
+            l2_score = recognizer.match(face1_feature, face2_feature, cv.FaceRecognizerSF_FR_NORM_L2)
+            ## [match]
+
+            msg = 'different identities'
+            if cosine_score >= cosine_similarity_threshold:
+                msg = 'the same identity'
+            print('They have {}. Cosine Similarity: {}, threshold: {} (higher value means higher similarity, max 1.0).'.format(msg, cosine_score, cosine_similarity_threshold))
+
+            msg = 'different identities'
+            if l2_score <= l2_similarity_threshold:
+                msg = 'the same identity'
+            print('They have {}. NormL2 Distance: {}, threshold: {} (lower value means higher similarity, min 0.0).'.format(msg, l2_score, l2_similarity_threshold))
+        cv.waitKey(0)
     else: # Omit input to call default camera
-        deviceId = 0
+        if args.video is not None:
+            deviceId = args.video
+        else:
+            deviceId = 0
         cap = cv.VideoCapture(deviceId)
-        frameWidth = int(cap.get(cv.CAP_PROP_FRAME_WIDTH))
-        frameHeight = int(cap.get(cv.CAP_PROP_FRAME_HEIGHT))
+        frameWidth = int(cap.get(cv.CAP_PROP_FRAME_WIDTH)*args.scale)
+        frameHeight = int(cap.get(cv.CAP_PROP_FRAME_HEIGHT)*args.scale)
         detector.setInputSize([frameWidth, frameHeight])
 
-        tm = cv.TickMeter()
         while cv.waitKey(1) < 0:
             hasFrame, frame = cap.read()
             if not hasFrame:
                 print('No frames grabbed!')
                 break
 
+            frame = cv.resize(frame, (frameWidth, frameHeight))
+
             # Inference
             tm.start()
             faces = detector.detect(frame) # faces is a tuple
             tm.stop()
 
             # Draw results on the input image
-            frame = visualize(frame, faces)
+            visualize(frame, faces, tm.getFPS())
 
-            cv.putText(frame, 'FPS: {}'.format(tm.getFPS()), (0, 15), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0))
-
-            # Visualize results in a new Window
+            # Visualize results
             cv.imshow('Live', frame)
-
-            tm.reset()
\ No newline at end of file
+    cv.destroyAllWindows()
diff --git a/samples/dnn/face_match.cpp b/samples/dnn/face_match.cpp
deleted file mode 100644 (file)
index f24134b..0000000
+++ /dev/null
@@ -1,103 +0,0 @@
-// This file is part of OpenCV project.
-// It is subject to the license terms in the LICENSE file found in the top-level directory
-// of this distribution and at http://opencv.org/license.html.
-
-#include "opencv2/dnn.hpp"
-#include "opencv2/imgproc.hpp"
-#include "opencv2/highgui.hpp"
-
-#include <iostream>
-
-#include "opencv2/objdetect.hpp"
-
-
-using namespace cv;
-using namespace std;
-
-
-int main(int argc, char ** argv)
-{
-    if (argc != 5)
-    {
-        std::cerr << "Usage " << argv[0] << ": "
-                  << "<det_onnx_path> "
-                  << "<reg_onnx_path> "
-                  << "<image1>"
-                  << "<image2>\n";
-        return -1;
-    }
-
-    String det_onnx_path = argv[1];
-    String reg_onnx_path = argv[2];
-    String image1_path = argv[3];
-    String image2_path = argv[4];
-    std::cout<<image1_path<<" "<<image2_path<<std::endl;
-    Mat image1 = imread(image1_path);
-    Mat image2 = imread(image2_path);
-
-    float score_thresh = 0.9f;
-    float nms_thresh = 0.3f;
-    double cosine_similar_thresh = 0.363;
-    double l2norm_similar_thresh = 1.128;
-    int top_k = 5000;
-
-    // Initialize FaceDetector
-    Ptr<FaceDetectorYN> faceDetector;
-
-    faceDetector = FaceDetectorYN::create(det_onnx_path, "", image1.size(), score_thresh, nms_thresh, top_k);
-    Mat faces_1;
-    faceDetector->detect(image1, faces_1);
-    if (faces_1.rows < 1)
-    {
-        std::cerr << "Cannot find a face in " << image1_path << "\n";
-        return -1;
-    }
-
-    faceDetector = FaceDetectorYN::create(det_onnx_path, "", image2.size(), score_thresh, nms_thresh, top_k);
-    Mat faces_2;
-    faceDetector->detect(image2, faces_2);
-    if (faces_2.rows < 1)
-    {
-        std::cerr << "Cannot find a face in " << image2_path << "\n";
-        return -1;
-    }
-
-    // Initialize FaceRecognizerSF
-    Ptr<FaceRecognizerSF> faceRecognizer = FaceRecognizerSF::create(reg_onnx_path, "");
-
-
-    Mat aligned_face1, aligned_face2;
-    faceRecognizer->alignCrop(image1, faces_1.row(0), aligned_face1);
-    faceRecognizer->alignCrop(image2, faces_2.row(0), aligned_face2);
-
-    Mat feature1, feature2;
-    faceRecognizer->feature(aligned_face1, feature1);
-    feature1 = feature1.clone();
-    faceRecognizer->feature(aligned_face2, feature2);
-    feature2 = feature2.clone();
-
-    double cos_score = faceRecognizer->match(feature1, feature2, FaceRecognizerSF::DisType::FR_COSINE);
-    double L2_score = faceRecognizer->match(feature1, feature2, FaceRecognizerSF::DisType::FR_NORM_L2);
-
-    if(cos_score >= cosine_similar_thresh)
-    {
-        std::cout << "They have the same identity;";
-    }
-    else
-    {
-        std::cout << "They have different identities;";
-    }
-    std::cout << " Cosine Similarity: " << cos_score << ", threshold: " << cosine_similar_thresh << ". (higher value means higher similarity, max 1.0)\n";
-
-    if(L2_score <= l2norm_similar_thresh)
-    {
-        std::cout << "They have the same identity;";
-    }
-    else
-    {
-        std::cout << "They have different identities.";
-    }
-    std::cout << " NormL2 Distance: " << L2_score << ", threshold: " << l2norm_similar_thresh << ". (lower value means higher similarity, min 0.0)\n";
-
-    return 0;
-}
diff --git a/samples/dnn/face_match.py b/samples/dnn/face_match.py
deleted file mode 100644 (file)
index 916c76a..0000000
+++ /dev/null
@@ -1,57 +0,0 @@
-import argparse
-
-import numpy as np
-import cv2 as cv
-
-parser = argparse.ArgumentParser()
-parser.add_argument('--input1', '-i1', type=str, help='Path to the input image1.')
-parser.add_argument('--input2', '-i2', type=str, help='Path to the input image2.')
-parser.add_argument('--face_detection_model', '-fd', type=str, help='Path to the face detection model. Download the model at https://github.com/ShiqiYu/libfacedetection.train/tree/master/tasks/task1/onnx.')
-parser.add_argument('--face_recognition_model', '-fr', type=str, help='Path to the face recognition model. Download the model at https://drive.google.com/file/d/1ClK9WiB492c5OZFKveF3XiHCejoOxINW/view.')
-args = parser.parse_args()
-
-# Read the input image
-img1 = cv.imread(args.input1)
-img2 = cv.imread(args.input2)
-
-# Instantiate face detector and recognizer
-detector = cv.FaceDetectorYN.create(
-    args.face_detection_model,
-    "",
-    (img1.shape[1], img1.shape[0])
-)
-recognizer = cv.FaceRecognizerSF.create(
-    args.face_recognition_model,
-    ""
-)
-
-# Detect face
-detector.setInputSize((img1.shape[1], img1.shape[0]))
-face1 = detector.detect(img1)
-detector.setInputSize((img2.shape[1], img2.shape[0]))
-face2 = detector.detect(img2)
-assert face1[1].shape[0] > 0, 'Cannot find a face in {}'.format(args.input1)
-assert face2[1].shape[0] > 0, 'Cannot find a face in {}'.format(args.input2)
-
-# Align faces
-face1_align = recognizer.alignCrop(img1, face1[1][0])
-face2_align = recognizer.alignCrop(img2, face2[1][0])
-
-# Extract features
-face1_feature = recognizer.feature(face1_align)
-face2_feature = recognizer.feature(face2_align)
-
-# Calculate distance (0: cosine, 1: L2)
-cosine_similarity_threshold = 0.363
-cosine_score = recognizer.match(face1_feature, face2_feature, 0)
-msg = 'different identities'
-if cosine_score >= cosine_similarity_threshold:
-    msg = 'the same identity'
-print('They have {}. Cosine Similarity: {}, threshold: {} (higher value means higher similarity, max 1.0).'.format(msg, cosine_score, cosine_similarity_threshold))
-
-l2_similarity_threshold = 1.128
-l2_score = recognizer.match(face1_feature, face2_feature, 1)
-msg = 'different identities'
-if l2_score <= l2_similarity_threshold:
-    msg = 'the same identity'
-print('They have {}. NormL2 Distance: {}, threshold: {} (lower value means higher similarity, min 0.0).'.format(msg, l2_score, l2_similarity_threshold))