From: Orest Chura Date: Tue, 17 Dec 2019 08:00:49 +0000 (+0300) Subject: Merge pull request #15942 from OrestChura:fb_tutorial X-Git-Tag: accepted/tizen/6.0/unified/20201030.111113~1^2~10 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=287874a444326f44a501fe225ea89fd1a49a94df;p=platform%2Fupstream%2Fopencv.git Merge pull request #15942 from OrestChura:fb_tutorial G-API: Tutorial: Face beautification algorithm implementation * Introduce a tutorial on face beautification algorithm - small typo issue in render_ocv.cpp * Addressing comments rgarnov smirnov-alexey --- diff --git a/doc/tutorials/gapi/face_beautification/face_beautification.markdown b/doc/tutorials/gapi/face_beautification/face_beautification.markdown new file mode 100644 index 0000000..e219cee --- /dev/null +++ b/doc/tutorials/gapi/face_beautification/face_beautification.markdown @@ -0,0 +1,440 @@ +# Implementing a face beautification algorithm with G-API {#tutorial_gapi_face_beautification} + +[TOC] + +# Introduction {#gapi_fb_intro} + +In this tutorial you will learn: +* Basics of a sample face beautification algorithm; +* How to infer different networks inside a pipeline with G-API; +* How to run a G-API pipeline on a video stream. + +## Prerequisites {#gapi_fb_prerec} + +This sample requires: +- PC with GNU/Linux or Microsoft Windows (Apple macOS is supported but + was not tested); +- OpenCV 4.2 or later built with Intel® Distribution of [OpenVINO™ + Toolkit](https://docs.openvinotoolkit.org/) (building with [Intel® + TBB](https://www.threadingbuildingblocks.org/intel-tbb-tutorial) is + a plus); +- The following topologies from OpenVINO™ Toolkit [Open Model + Zoo](https://github.com/opencv/open_model_zoo): + - `face-detection-adas-0001`; + - `facial-landmarks-35-adas-0002`. + +## Face beautification algorithm {#gapi_fb_algorithm} + +We will implement a simple face beautification algorithm using a +combination of modern Deep Learning techniques and traditional +Computer Vision. The general idea behind the algorithm is to make +face skin smoother while preserving face features like eyes or a +mouth contrast. The algorithm identifies parts of the face using a DNN +inference, applies different filters to the parts found, and then +combines it into the final result using basic image arithmetics: + +\dot +strict digraph Pipeline { + node [shape=record fontname=Helvetica fontsize=10 style=filled color="#4c7aa4" fillcolor="#5b9bd5" fontcolor="white"]; + edge [color="#62a8e7"]; + ordering="out"; + splines=ortho; + rankdir=LR; + + input [label="Input"]; + fd [label="Face\ndetector"]; + bgMask [label="Generate\nBG mask"]; + unshMask [label="Unsharp\nmask"]; + bilFil [label="Bilateral\nfilter"]; + shMask [label="Generate\nsharp mask"]; + blMask [label="Generate\nblur mask"]; + mul_1 [label="*" fontsize=24 shape=circle labelloc=b]; + mul_2 [label="*" fontsize=24 shape=circle labelloc=b]; + mul_3 [label="*" fontsize=24 shape=circle labelloc=b]; + + subgraph cluster_0 { + style=dashed + fontsize=10 + ld [label="Landmarks\ndetector"]; + label="for each face" + } + + sum_1 [label="+" fontsize=24 shape=circle]; + out [label="Output"]; + + temp_1 [style=invis shape=point width=0]; + temp_2 [style=invis shape=point width=0]; + temp_3 [style=invis shape=point width=0]; + temp_4 [style=invis shape=point width=0]; + temp_5 [style=invis shape=point width=0]; + temp_6 [style=invis shape=point width=0]; + temp_7 [style=invis shape=point width=0]; + temp_8 [style=invis shape=point width=0]; + temp_9 [style=invis shape=point width=0]; + + input -> temp_1 [arrowhead=none] + temp_1 -> fd -> ld + ld -> temp_4 [arrowhead=none] + temp_4 -> bgMask + bgMask -> mul_1 -> sum_1 -> out + + temp_4 -> temp_5 -> temp_6 [arrowhead=none constraint=none] + ld -> temp_2 -> temp_3 [style=invis constraint=none] + + temp_1 -> {unshMask, bilFil} + fd -> unshMask [style=invis constraint=none] + unshMask -> bilFil [style=invis constraint=none] + + bgMask -> shMask [style=invis constraint=none] + shMask -> blMask [style=invis constraint=none] + mul_1 -> mul_2 [style=invis constraint=none] + temp_5 -> shMask -> mul_2 + temp_6 -> blMask -> mul_3 + + unshMask -> temp_2 -> temp_5 [style=invis] + bilFil -> temp_3 -> temp_6 [style=invis] + + mul_2 -> temp_7 [arrowhead=none] + mul_3 -> temp_8 [arrowhead=none] + + temp_8 -> temp_7 [arrowhead=none constraint=none] + temp_7 -> sum_1 [constraint=none] + + unshMask -> mul_2 [constraint=none] + bilFil -> mul_3 [constraint=none] + temp_1 -> mul_1 [constraint=none] +} +\enddot + +Briefly the algorithm is described as follows: +- Input image \f$I\f$ is passed to unsharp mask and bilateral filters + (\f$U\f$ and \f$L\f$ respectively); +- Input image \f$I\f$ is passed to an SSD-based face detector; +- SSD result (a \f$[1 \times 1 \times 200 \times 7]\f$ blob) is parsed + and converted to an array of faces; +- Every face is passed to a landmarks detector; +- Based on landmarks found for every face, three image masks are + generated: + - A background mask \f$b\f$ -- indicating which areas from the + original image to keep as-is; + - A face part mask \f$p\f$ -- identifying regions to preserve + (sharpen). + - A face skin mask \f$s\f$ -- identifying regions to blur; +- The final result \f$O\f$ is a composition of features above + calculated as \f$O = b*I + p*U + s*L\f$. + +Generating face element masks based on a limited set of features (just +35 per face, including all its parts) is not very trivial and is +described in the sections below. + +# Constructing a G-API pipeline {#gapi_fb_pipeline} + +## Declaring Deep Learning topologies {#gapi_fb_decl_nets} + +This sample is using two DNN detectors. Every network takes one input +and produces one output. In G-API, networks are defined with macro +G_API_NET(): + +@snippet cpp/tutorial_code/gapi/face_beautification/face_beautification.cpp net_decl + +To get more information, see +[Declaring Deep Learning topologies](@ref gapi_ifd_declaring_nets) +described in the "Face Analytics pipeline" tutorial. + +## Describing the processing graph {#gapi_fb_ppline} + +The code below generates a graph for the algorithm above: + +@snippet cpp/tutorial_code/gapi/face_beautification/face_beautification.cpp ppl + +The resulting graph is a mixture of G-API's standard operations, +user-defined operations (namespace `custom::`), and DNN inference. +The generic function `cv::gapi::infer<>()` allows to trigger inference +within the pipeline; networks to infer are specified as template +parameters. The sample code is using two versions of `cv::gapi::infer<>()`: +- A frame-oriented one is used to detect faces on the input frame. +- An ROI-list oriented one is used to run landmarks inference on a + list of faces -- this version produces an array of landmarks per + every face. + +More on this in "Face Analytics pipeline" +([Building a GComputation](@ref gapi_ifd_gcomputation) section). + +## Unsharp mask in G-API {#gapi_fb_unsh} + +The unsharp mask \f$U\f$ for image \f$I\f$ is defined as: + +\f[U = I - s * L(M(I)),\f] + +where \f$M()\f$ is a median filter, \f$L()\f$ is the Laplace operator, +and \f$s\f$ is a strength coefficient. While G-API doesn't provide +this function out-of-the-box, it is expressed naturally with the +existing G-API operations: + +@snippet cpp/tutorial_code/gapi/face_beautification/face_beautification.cpp unsh + +Note that the code snipped above is a regular C++ function defined +with G-API types. Users can write functions like this to simplify +graph construction; when called, this function just puts the relevant +nodes to the pipeline it is used in. + +# Custom operations {#gapi_fb_proc} + +The face beautification graph is using custom operations +extensively. This chapter focuses on the most interesting kernels, +refer to [G-API Kernel API](@ref gapi_kernel_api) for general +information on defining operations and implementing kernels in G-API. + +## Face detector post-processing {#gapi_fb_face_detect} + +A face detector output is converted to an array of faces with the +following kernel: + +@snippet cpp/tutorial_code/gapi/face_beautification/face_beautification.cpp vec_ROI +@snippet cpp/tutorial_code/gapi/face_beautification/face_beautification.cpp fd_pp + +## Facial landmarks post-processing {#gapi_fb_landm_detect} + +The algorithm infers locations of face elements (like the eyes, the mouth +and the head contour itself) using a generic facial landmarks detector +(details) +from OpenVINO™ Open Model Zoo. However, the detected landmarks as-is are not +enough to generate masks --- this operation requires regions of interest on +the face represented by closed contours, so some interpolation is applied to +get them. This landmarks +processing and interpolation is performed by the following kernel: + +@snippet cpp/tutorial_code/gapi/face_beautification/face_beautification.cpp ld_pp_cnts + +The kernel takes two arrays of denormalized landmarks coordinates and +returns an array of elements' closed contours and an array of faces' +closed contours; in other words, outputs are, the first, an array of +contours of image areas to be sharpened and, the second, another one +to be smoothed. + +Here and below `Contour` is a vector of points. + +### Getting an eye contour {#gapi_fb_ld_eye} + +Eye contours are estimated with the following function: + +@snippet cpp/tutorial_code/gapi/face_beautification/face_beautification.cpp ld_pp_incl +@snippet cpp/tutorial_code/gapi/face_beautification/face_beautification.cpp ld_pp_eye + +Briefly, this function restores the bottom side of an eye by a +half-ellipse based on two points in left and right eye +corners. In fact, `cv::ellipse2Poly()` is used to approximate the eye region, and +the function only defines ellipse parameters based on just two points: +- The ellipse center and the \f$X\f$ half-axis calculated by two eye Points; +- The \f$Y\f$ half-axis calculated according to the assumption that an average +eye width is \f$1/3\f$ of its length; +- The start and the end angles which are 0 and 180 (refer to + `cv::ellipse()` documentation); +- The angle delta: how much points to produce in the contour; +- The inclination angle of the axes. + +The use of the `atan2()` instead of just `atan()` in function +`custom::getLineInclinationAngleDegrees()` is essential as it allows to +return a negative value depending on the `x` and the `y` signs so we +can get the right angle even in case of upside-down face arrangement +(if we put the points in the right order, of course). + +### Getting a forehead contour {#gapi_fb_ld_fhd} + +The function approximates the forehead contour: + +@snippet cpp/tutorial_code/gapi/face_beautification/face_beautification.cpp ld_pp_fhd + +As we have only jaw points in our detected landmarks, we have to get a +half-ellipse based on three points of a jaw: the leftmost, the +rightmost and the lowest one. The jaw width is assumed to be equal to the +forehead width and the latter is calculated using the left and the +right points. Speaking of the \f$Y\f$ axis, we have no points to get +it directly, and instead assume that the forehead height is about \f$2/3\f$ +of the jaw height, which can be figured out from the face center (the +middle between the left and right points) and the lowest jaw point. + +## Drawing masks {#gapi_fb_masks_drw} + +When we have all the contours needed, we are able to draw masks: + +@snippet cpp/tutorial_code/gapi/face_beautification/face_beautification.cpp msk_ppline + +The steps to get the masks are: +* the "sharp" mask calculation: + * fill the contours that should be sharpened; + * blur that to get the "sharp" mask (`mskSharpG`); +* the "bilateral" mask calculation: + * fill all the face contours fully; + * blur that; + * subtract areas which intersect with the "sharp" mask --- and get the + "bilateral" mask (`mskBlurFinal`); +* the background mask calculation: + * add two previous masks + * set all non-zero pixels of the result as 255 (by `cv::gapi::threshold()`) + * revert the output (by `cv::gapi::bitwise_not`) to get the background + mask (`mskNoFaces`). + +# Configuring and running the pipeline {#gapi_fb_comp_args} + +Once the graph is fully expressed, we can finally compile it and run +on real data. G-API graph compilation is the stage where the G-API +framework actually understands which kernels and networks to use. This +configuration happens via G-API compilation arguments. + +## DNN parameters {#gapi_fb_comp_args_net} + +This sample is using OpenVINO™ Toolkit Inference Engine backend for DL +inference, which is configured the following way: + +@snippet cpp/tutorial_code/gapi/face_beautification/face_beautification.cpp net_param + +Every `cv::gapi::ie::Params<>` object is related to the network +specified in its template argument. We should pass there the network +type we have defined in `G_API_NET()` in the early beginning of the +tutorial. + +Network parameters are then wrapped in `cv::gapi::NetworkPackage`: + +@snippet cpp/tutorial_code/gapi/face_beautification/face_beautification.cpp netw + +More details in "Face Analytics Pipeline" +([Configuring the pipeline](@ref gapi_ifd_configuration) section). + +## Kernel packages {#gapi_fb_comp_args_kernels} + +In this example we use a lot of custom kernels, in addition to that we +use Fluid backend to optimize out memory for G-API's standard kernels +where applicable. The resulting kernel package is formed like this: + +@snippet cpp/tutorial_code/gapi/face_beautification/face_beautification.cpp kern_pass_1 + +## Compiling the streaming pipeline {#gapi_fb_compiling} + +G-API optimizes execution for video streams when compiled in the +"Streaming" mode. + +@snippet cpp/tutorial_code/gapi/face_beautification/face_beautification.cpp str_comp + +More on this in "Face Analytics Pipeline" +([Configuring the pipeline](@ref gapi_ifd_configuration) section). + +## Running the streaming pipeline {#gapi_fb_running} + +In order to run the G-API streaming pipeline, all we need is to +specify the input video source, call +`cv::GStreamingCompiled::start()`, and then fetch the pipeline +processing results: + +@snippet cpp/tutorial_code/gapi/face_beautification/face_beautification.cpp str_src +@snippet cpp/tutorial_code/gapi/face_beautification/face_beautification.cpp str_loop + +Once results are ready and can be pulled from the pipeline we display +it on the screen and handle GUI events. + +See [Running the pipeline](@ref gapi_ifd_running) section +in the "Face Analytics Pipeline" tutorial for more details. + +# Conclusion {#gapi_fb_cncl} + +The tutorial has two goals: to show the use of brand new features of +G-API introduced in OpenCV 4.2, and give a basic understanding on a +sample face beautification algorithm. + +The result of the algorithm application: + +![Face Beautification example](pics/example.jpg) + +On the test machine (Intel® Core™ i7-8700) the G-API-optimized video +pipeline outperforms its serial (non-pipelined) version by a factor of +**2.7** -- meaning that for such a non-trivial graph, the proper +pipelining can bring almost 3x increase in performance. + + diff --git a/doc/tutorials/gapi/face_beautification/pics/example.jpg b/doc/tutorials/gapi/face_beautification/pics/example.jpg new file mode 100644 index 0000000..99a538f Binary files /dev/null and b/doc/tutorials/gapi/face_beautification/pics/example.jpg differ diff --git a/doc/tutorials/gapi/table_of_content_gapi.markdown b/doc/tutorials/gapi/table_of_content_gapi.markdown index 96e395c..f9d7b03 100644 --- a/doc/tutorials/gapi/table_of_content_gapi.markdown +++ b/doc/tutorials/gapi/table_of_content_gapi.markdown @@ -29,3 +29,14 @@ how G-API module can be used for that. is ported on G-API, covering the basic intuition behind this transition process, and examining benefits which a graph model brings there. + +- @subpage tutorial_gapi_face_beautification + + *Languages:* C++ + + *Compatibility:* \> OpenCV 4.2 + + *Author:* Orest Chura + + In this tutorial we build a complex hybrid Computer Vision/Deep + Learning video processing pipeline with G-API. diff --git a/modules/gapi/src/api/render_ocv.cpp b/modules/gapi/src/api/render_ocv.cpp index d87581e..4aa2388 100644 --- a/modules/gapi/src/api/render_ocv.cpp +++ b/modules/gapi/src/api/render_ocv.cpp @@ -197,7 +197,7 @@ void drawPrimitivesOCV(cv::Mat& in, const auto& ftp = cv::util::get(p); const auto color = converter.cvtColor(ftp.color); - GAPI_Assert(ftpr && "I must pass cv::gapi::wip::draw::freetype_font" + GAPI_Assert(ftpr && "You must pass cv::gapi::wip::draw::freetype_font" " to the graph compile arguments"); int baseline = 0; auto size = ftpr->getTextSize(ftp.text, ftp.fh, &baseline); diff --git a/modules/gapi/samples/face_beautification.cpp b/samples/cpp/tutorial_code/gapi/face_beautification/face_beautification.cpp similarity index 61% rename from modules/gapi/samples/face_beautification.cpp rename to samples/cpp/tutorial_code/gapi/face_beautification/face_beautification.cpp index 9966997..e42de86 100644 --- a/modules/gapi/samples/face_beautification.cpp +++ b/samples/cpp/tutorial_code/gapi/face_beautification/face_beautification.cpp @@ -4,6 +4,9 @@ // // Copyright (C) 2018-2019 Intel Corporation +#include "opencv2/opencv_modules.hpp" +#if defined(HAVE_OPENCV_GAPI) + #include #include #include @@ -11,16 +14,31 @@ #include #include #include -#include "opencv2/gapi/streaming/cap.hpp" +#include -#include -#include -#include +#include // windows namespace config { constexpr char kWinFaceBeautification[] = "FaceBeautificator"; constexpr char kWinInput[] = "Input"; +constexpr char kParserAbout[] = + "Use this script to run the face beautification algorithm with G-API."; +constexpr char kParserOptions[] = +"{ help h || print the help message. }" + +"{ facepath f || a path to a Face detection model file (.xml).}" +"{ facedevice |GPU| the face detection computation device.}" + +"{ landmpath l || a path to a Landmarks detection model file (.xml).}" +"{ landmdevice |CPU| the landmarks detection computation device.}" + +"{ input i || a path to an input. Skip to capture from a camera.}" +"{ boxes b |false| set true to draw face Boxes in the \"Input\" window.}" +"{ landmarks m |false| set true to draw landMarks in the \"Input\" window.}" +"{ streaming s |true| set false to disable stream pipelining.}" +"{ performance p |false| set true to disable output displaying.}"; + const cv::Scalar kClrWhite (255, 255, 255); const cv::Scalar kClrGreen ( 0, 255, 0); const cv::Scalar kClrYellow( 0, 255, 255); @@ -36,13 +54,13 @@ constexpr int kUnshSigma = 3; constexpr float kUnshStrength = 0.7f; constexpr int kAngDelta = 1; constexpr bool kClosedLine = true; - -const size_t kNumPointsInHalfEllipse = 180 / config::kAngDelta + 1; } // namespace config namespace { +//! [vec_ROI] using VectorROI = std::vector; +//! [vec_ROI] using GArrayROI = cv::GArray; using Contour = std::vector; using Landmarks = std::vector; @@ -54,10 +72,35 @@ template inline int toIntRounded(const Tp x) return static_cast(std::lround(x)); } +//! [toDbl] template inline double toDouble(const Tp x) { return static_cast(x); } +//! [toDbl] + +struct Avg { + struct Elapsed { + explicit Elapsed(double ms) : ss(ms / 1000.), + mm(toIntRounded(ss / 60)) {} + const double ss; + const int mm; + }; + + using MS = std::chrono::duration>; + using TS = std::chrono::time_point; + TS started; + + void start() { started = now(); } + TS now() const { return std::chrono::high_resolution_clock::now(); } + double tick() const { return std::chrono::duration_cast(now() - started).count(); } + Elapsed elapsed() const { return Elapsed{tick()}; } + double fps(std::size_t n) const { return static_cast(n) / (tick() / 1000.); } + }; +std::ostream& operator<<(std::ostream &os, const Avg::Elapsed &e) { + os << e.mm << ':' << (e.ss - 60*e.mm); + return os; +} std::string getWeightsPath(const std::string &mdlXMLPath) // mdlXMLPath = // "The/Full/Path.xml" @@ -77,31 +120,28 @@ namespace custom { using TplPtsFaceElements_Jaw = std::tuple, cv::GArray>; -using TplFaces_FaceElements = std::tuple, - cv::GArray>; // Wrapper-functions inline int getLineInclinationAngleDegrees(const cv::Point &ptLeft, const cv::Point &ptRight); inline Contour getForeheadEllipse(const cv::Point &ptJawLeft, const cv::Point &ptJawRight, - const cv::Point &ptJawMiddle, - const size_t capacity); + const cv::Point &ptJawMiddle); inline Contour getEyeEllipse(const cv::Point &ptLeft, - const cv::Point &ptRight, - const size_t capacity); + const cv::Point &ptRight); inline Contour getPatchedEllipse(const cv::Point &ptLeft, const cv::Point &ptRight, const cv::Point &ptUp, const cv::Point &ptDown); // Networks +//! [net_decl] G_API_NET(FaceDetector, , "face_detector"); G_API_NET(LandmDetector, , "landm_detector"); +//! [net_decl] // Function kernels -G_TYPED_KERNEL(GBilatFilter, - , +G_TYPED_KERNEL(GBilatFilter, , "custom.faceb12n.bilateralFilter") { static cv::GMatDesc outMeta(cv::GMatDesc in, int,double,double) @@ -110,8 +150,7 @@ G_TYPED_KERNEL(GBilatFilter, } }; -G_TYPED_KERNEL(GLaplacian, - , +G_TYPED_KERNEL(GLaplacian, , "custom.faceb12n.Laplacian") { static cv::GMatDesc outMeta(cv::GMatDesc in, int) @@ -120,8 +159,7 @@ G_TYPED_KERNEL(GLaplacian, } }; -G_TYPED_KERNEL(GFillPolyGContours, - )>, +G_TYPED_KERNEL(GFillPolyGContours, )>, "custom.faceb12n.fillPolyGContours") { static cv::GMatDesc outMeta(cv::GMatDesc in, cv::GArrayDesc) @@ -130,8 +168,8 @@ G_TYPED_KERNEL(GFillPolyGContours, } }; -G_TYPED_KERNEL(GPolyLines, - ,bool,cv::Scalar)>, +G_TYPED_KERNEL(GPolyLines, ,bool, + cv::Scalar)>, "custom.faceb12n.polyLines") { static cv::GMatDesc outMeta(cv::GMatDesc in, cv::GArrayDesc,bool,cv::Scalar) @@ -140,8 +178,7 @@ G_TYPED_KERNEL(GPolyLines, } }; -G_TYPED_KERNEL(GRectangle, - , +G_TYPED_KERNEL(GRectangle, , "custom.faceb12n.rectangle") { static cv::GMatDesc outMeta(cv::GMatDesc in, cv::GArrayDesc,cv::Scalar) @@ -150,8 +187,7 @@ G_TYPED_KERNEL(GRectangle, } }; -G_TYPED_KERNEL(GFacePostProc, - , +G_TYPED_KERNEL(GFacePostProc, , "custom.faceb12n.faceDetectPostProc") { static cv::GArrayDesc outMeta(const cv::GMatDesc&,const cv::GMatDesc&,float) @@ -160,8 +196,8 @@ G_TYPED_KERNEL(GFacePostProc, } }; -G_TYPED_KERNEL_M(GLandmPostProc, - ,GArrayROI)>, +G_TYPED_KERNEL_M(GLandmPostProc, , + GArrayROI)>, "custom.faceb12n.landmDetectPostProc") { static std::tuple outMeta( @@ -171,17 +207,17 @@ G_TYPED_KERNEL_M(GLandmPostProc, } }; -G_TYPED_KERNEL_M(GGetContours, - , - cv::GArray)>, +//! [kern_m_decl] +using TplFaces_FaceElements = std::tuple, cv::GArray>; +G_TYPED_KERNEL_M(GGetContours, , cv::GArray)>, "custom.faceb12n.getContours") { - static std::tuple outMeta( - const cv::GArrayDesc&,const cv::GArrayDesc&) + static std::tuple outMeta(const cv::GArrayDesc&,const cv::GArrayDesc&) { return std::make_tuple(cv::empty_array_desc(), cv::empty_array_desc()); } }; +//! [kern_m_decl] // OCV_Kernels @@ -262,11 +298,12 @@ GAPI_OCV_KERNEL(GCPURectangle, custom::GRectangle) // A face detector outputs a blob with the shape: [1, 1, N, 7], where N is // the number of detected bounding boxes. Structure of an output for every // detected face is the following: -// [image_id, label, conf, x_min, y_min, x_max, y_max]; all the seven elements +// [image_id, label, conf, x_min, y_min, x_max, y_max], all the seven elements // are floating point. For more details please visit: -// https://github.com/opencv/open_model_zoo/blob/master/intel_models/face-detection-adas-0001 +// https://github.com/opencv/open_model_zoo/blob/master/intel_models/face-detection-adas-0001 // This kernel is the face detection output blob parsing that returns a vector // of detected faces' rects: +//! [fd_pp] GAPI_OCV_KERNEL(GCPUFacePostProc, GFacePostProc) { static void run(const cv::Mat &inDetectResult, @@ -289,12 +326,17 @@ GAPI_OCV_KERNEL(GCPUFacePostProc, GFacePostProc) break; } const float faceConfidence = data[i * kObjectSize + 2]; + // We can cut detections by the `conf` field + // to avoid mistakes of the detector. if (faceConfidence > faceConfThreshold) { const float left = data[i * kObjectSize + 3]; const float top = data[i * kObjectSize + 4]; const float right = data[i * kObjectSize + 5]; const float bottom = data[i * kObjectSize + 6]; + // These are normalized coordinates and are between 0 and 1; + // to get the real pixel coordinates we should multiply it by + // the image sizes respectively to the directions: cv::Point tl(toIntRounded(left * imgCols), toIntRounded(top * imgRows)); cv::Point br(toIntRounded(right * imgCols), @@ -304,10 +346,18 @@ GAPI_OCV_KERNEL(GCPUFacePostProc, GFacePostProc) } } }; +//! [fd_pp] // This kernel is the facial landmarks detection output Mat parsing for every // detected face; returns a tuple containing a vector of vectors of // face elements' Points and a vector of vectors of jaw's Points: +// There are 35 landmarks given by the default detector for each face +// in a frame; the first 18 of them are face elements (eyes, eyebrows, +// a nose, a mouth) and the last 17 - a jaw contour. The detector gives +// floating point values for landmarks' normed coordinates relatively +// to an input ROI (not the original frame). +// For more details please visit: +// https://github.com/opencv/open_model_zoo/blob/master/intel_models/facial-landmarks-35-adas-0002 GAPI_OCV_KERNEL(GCPULandmPostProc, GLandmPostProc) { static void run(const std::vector &vctDetectResults, @@ -315,13 +365,6 @@ GAPI_OCV_KERNEL(GCPULandmPostProc, GLandmPostProc) std::vector &vctPtsFaceElems, std::vector &vctCntJaw) { - // There are 35 landmarks given by the default detector for each face - // in a frame; the first 18 of them are face elements (eyes, eyebrows, - // a nose, a mouth) and the last 17 - a jaw contour. The detector gives - // floating point values for landmarks' normed coordinates relatively - // to an input ROI (not the original frame). - // For more details please visit: - // https://github.com/opencv/open_model_zoo/blob/master/intel_models/facial-landmarks-35-adas-0002 static constexpr int kNumFaceElems = 18; static constexpr int kNumTotal = 35; const size_t numFaces = vctRects.size(); @@ -342,10 +385,8 @@ GAPI_OCV_KERNEL(GCPULandmPostProc, GLandmPostProc) ptsFaceElems.clear(); for (int j = 0; j < kNumFaceElems * 2; j += 2) { - cv::Point pt = - cv::Point(toIntRounded(data[j] * vctRects[i].width), - toIntRounded(data[j+1] * vctRects[i].height)) - + vctRects[i].tl(); + cv::Point pt = cv::Point(toIntRounded(data[j] * vctRects[i].width), + toIntRounded(data[j+1] * vctRects[i].height)) + vctRects[i].tl(); ptsFaceElems.push_back(pt); } vctPtsFaceElems.push_back(ptsFaceElems); @@ -354,10 +395,8 @@ GAPI_OCV_KERNEL(GCPULandmPostProc, GLandmPostProc) cntJaw.clear(); for(int j = kNumFaceElems * 2; j < kNumTotal * 2; j += 2) { - cv::Point pt = - cv::Point(toIntRounded(data[j] * vctRects[i].width), - toIntRounded(data[j+1] * vctRects[i].height)) - + vctRects[i].tl(); + cv::Point pt = cv::Point(toIntRounded(data[j] * vctRects[i].width), + toIntRounded(data[j+1] * vctRects[i].height)) + vctRects[i].tl(); cntJaw.push_back(pt); } vctCntJaw.push_back(cntJaw); @@ -368,23 +407,24 @@ GAPI_OCV_KERNEL(GCPULandmPostProc, GLandmPostProc) // This kernel is the facial landmarks detection post-processing for every face // detected before; output is a tuple of vectors of detected face contours and // facial elements contours: +//! [ld_pp_cnts] +//! [kern_m_impl] GAPI_OCV_KERNEL(GCPUGetContours, GGetContours) { - static void run(const std::vector &vctPtsFaceElems, - const std::vector &vctCntJaw, + static void run(const std::vector &vctPtsFaceElems, // 18 landmarks of the facial elements + const std::vector &vctCntJaw, // 17 landmarks of a jaw std::vector &vctElemsContours, std::vector &vctFaceContours) { +//! [kern_m_impl] size_t numFaces = vctCntJaw.size(); CV_Assert(numFaces == vctPtsFaceElems.size()); CV_Assert(vctElemsContours.size() == 0ul); CV_Assert(vctFaceContours.size() == 0ul); // vctFaceElemsContours will store all the face elements' contours found - // on an input image, namely 4 elements (two eyes, nose, mouth) - // for every detected face + // in an input image, namely 4 elements (two eyes, nose, mouth) for every detected face: vctElemsContours.reserve(numFaces * 4); - // vctFaceElemsContours will store all the faces' contours found on - // an input image + // vctFaceElemsContours will store all the faces' contours found in an input image: vctFaceContours.reserve(numFaces); Contour cntFace, cntLeftEye, cntRightEye, cntNose, cntMouth; @@ -393,63 +433,47 @@ GAPI_OCV_KERNEL(GCPUGetContours, GGetContours) for (size_t i = 0ul; i < numFaces; i++) { // The face elements contours + // A left eye: - // Approximating the lower eye contour by half-ellipse - // (using eye points) and storing in cntLeftEye: - cntLeftEye = getEyeEllipse(vctPtsFaceElems[i][1], - vctPtsFaceElems[i][0], - config::kNumPointsInHalfEllipse + 3); + // Approximating the lower eye contour by half-ellipse (using eye points) and storing in cntLeftEye: + cntLeftEye = getEyeEllipse(vctPtsFaceElems[i][1], vctPtsFaceElems[i][0]); // Pushing the left eyebrow clock-wise: - cntLeftEye.insert(cntLeftEye.cend(), {vctPtsFaceElems[i][12], - vctPtsFaceElems[i][13], + cntLeftEye.insert(cntLeftEye.cend(), {vctPtsFaceElems[i][12], vctPtsFaceElems[i][13], vctPtsFaceElems[i][14]}); + // A right eye: - // Approximating the lower eye contour by half-ellipse - // (using eye points) and storing in vctRightEye: - cntRightEye = getEyeEllipse(vctPtsFaceElems[i][2], - vctPtsFaceElems[i][3], - config::kNumPointsInHalfEllipse + 3); + // Approximating the lower eye contour by half-ellipse (using eye points) and storing in vctRightEye: + cntRightEye = getEyeEllipse(vctPtsFaceElems[i][2], vctPtsFaceElems[i][3]); // Pushing the right eyebrow clock-wise: - cntRightEye.insert(cntRightEye.cend(), {vctPtsFaceElems[i][15], - vctPtsFaceElems[i][16], + cntRightEye.insert(cntRightEye.cend(), {vctPtsFaceElems[i][15], vctPtsFaceElems[i][16], vctPtsFaceElems[i][17]}); + // A nose: // Storing the nose points clock-wise cntNose.clear(); - cntNose.insert(cntNose.cend(), {vctPtsFaceElems[i][4], - vctPtsFaceElems[i][7], - vctPtsFaceElems[i][5], - vctPtsFaceElems[i][6]}); + cntNose.insert(cntNose.cend(), {vctPtsFaceElems[i][4], vctPtsFaceElems[i][7], + vctPtsFaceElems[i][5], vctPtsFaceElems[i][6]}); + // A mouth: - // Approximating the mouth contour by two half-ellipses - // (using mouth points) and storing in vctMouth: - cntMouth = getPatchedEllipse(vctPtsFaceElems[i][8], - vctPtsFaceElems[i][9], - vctPtsFaceElems[i][10], - vctPtsFaceElems[i][11]); + // Approximating the mouth contour by two half-ellipses (using mouth points) and storing in vctMouth: + cntMouth = getPatchedEllipse(vctPtsFaceElems[i][8], vctPtsFaceElems[i][9], + vctPtsFaceElems[i][10], vctPtsFaceElems[i][11]); + // Storing all the elements in a vector: - vctElemsContours.insert(vctElemsContours.cend(), {cntLeftEye, - cntRightEye, - cntNose, - cntMouth}); + vctElemsContours.insert(vctElemsContours.cend(), {cntLeftEye, cntRightEye, cntNose, cntMouth}); // The face contour: - // Approximating the forehead contour by half-ellipse - // (using jaw points) and storing in vctFace: - cntFace = getForeheadEllipse(vctCntJaw[i][0], vctCntJaw[i][16], - vctCntJaw[i][8], - config::kNumPointsInHalfEllipse + - vctCntJaw[i].size()); - // The ellipse is drawn clock-wise, but jaw contour points goes - // vice versa, so it's necessary to push cntJaw from the end - // to the begin using a reverse iterator: - std::copy(vctCntJaw[i].crbegin(), vctCntJaw[i].crend(), - std::back_inserter(cntFace)); + // Approximating the forehead contour by half-ellipse (using jaw points) and storing in vctFace: + cntFace = getForeheadEllipse(vctCntJaw[i][0], vctCntJaw[i][16], vctCntJaw[i][8]); + // The ellipse is drawn clock-wise, but jaw contour points goes vice versa, so it's necessary to push + // cntJaw from the end to the begin using a reverse iterator: + std::copy(vctCntJaw[i].crbegin(), vctCntJaw[i].crend(), std::back_inserter(cntFace)); // Storing the face contour in another vector: vctFaceContours.push_back(cntFace); } } }; +//! [ld_pp_cnts] // GAPI subgraph functions inline cv::GMat unsharpMask(const cv::GMat &src, @@ -463,27 +487,26 @@ inline cv::GMat mask3C(const cv::GMat &src, // Functions implementation: // Returns an angle (in degrees) between a line given by two Points and // the horison. Note that the result depends on the arguments order: -inline int custom::getLineInclinationAngleDegrees(const cv::Point &ptLeft, - const cv::Point &ptRight) +//! [ld_pp_incl] +inline int custom::getLineInclinationAngleDegrees(const cv::Point &ptLeft, const cv::Point &ptRight) { const cv::Point residual = ptRight - ptLeft; if (residual.y == 0 && residual.x == 0) return 0; else - return toIntRounded(atan2(toDouble(residual.y), toDouble(residual.x)) - * 180.0 / M_PI); + return toIntRounded(atan2(toDouble(residual.y), toDouble(residual.x)) * 180.0 / CV_PI); } +//! [ld_pp_incl] // Approximates a forehead by half-ellipse using jaw points and some geometry // and then returns points of the contour; "capacity" is used to reserve enough // memory as there will be other points inserted. +//! [ld_pp_fhd] inline Contour custom::getForeheadEllipse(const cv::Point &ptJawLeft, const cv::Point &ptJawRight, - const cv::Point &ptJawLower, - const size_t capacity = 0) + const cv::Point &ptJawLower) { Contour cntForehead; - cntForehead.reserve(std::max(capacity, config::kNumPointsInHalfEllipse)); // The point amid the top two points of a jaw: const cv::Point ptFaceCenter((ptJawLeft + ptJawRight) / 2); // This will be the center of the ellipse. @@ -505,21 +528,18 @@ inline Contour custom::getForeheadEllipse(const cv::Point &ptJawLeft, // We need the upper part of an ellipse: static constexpr int kAngForeheadStart = 180; static constexpr int kAngForeheadEnd = 360; - cv::ellipse2Poly(ptFaceCenter, cv::Size(axisX, axisY), angFace, - kAngForeheadStart, kAngForeheadEnd, config::kAngDelta, - cntForehead); + cv::ellipse2Poly(ptFaceCenter, cv::Size(axisX, axisY), angFace, kAngForeheadStart, kAngForeheadEnd, + config::kAngDelta, cntForehead); return cntForehead; } +//! [ld_pp_fhd] // Approximates the lower eye contour by half-ellipse using eye points and some -// geometry and then returns points of the contour; "capacity" is used -// to reserve enough memory as there will be other points inserted. -inline Contour custom::getEyeEllipse(const cv::Point &ptLeft, - const cv::Point &ptRight, - const size_t capacity = 0) +// geometry and then returns points of the contour. +//! [ld_pp_eye] +inline Contour custom::getEyeEllipse(const cv::Point &ptLeft, const cv::Point &ptRight) { Contour cntEyeBottom; - cntEyeBottom.reserve(std::max(capacity, config::kNumPointsInHalfEllipse)); const cv::Point ptEyeCenter((ptRight + ptLeft) / 2); const int angle = getLineInclinationAngleDegrees(ptLeft, ptRight); const int axisX = toIntRounded(cv::norm(ptRight - ptLeft) / 2.0); @@ -529,10 +549,11 @@ inline Contour custom::getEyeEllipse(const cv::Point &ptLeft, // We need the lower part of an ellipse: static constexpr int kAngEyeStart = 0; static constexpr int kAngEyeEnd = 180; - cv::ellipse2Poly(ptEyeCenter, cv::Size(axisX, axisY), angle, kAngEyeStart, - kAngEyeEnd, config::kAngDelta, cntEyeBottom); + cv::ellipse2Poly(ptEyeCenter, cv::Size(axisX, axisY), angle, kAngEyeStart, kAngEyeEnd, config::kAngDelta, + cntEyeBottom); return cntEyeBottom; } +//! [ld_pp_eye] //This function approximates an object (a mouth) by two half-ellipses using // 4 points of the axes' ends and then returns points of the contour: @@ -552,8 +573,7 @@ inline Contour custom::getPatchedEllipse(const cv::Point &ptLeft, // We need the upper part of an ellipse: static constexpr int angTopStart = 180; static constexpr int angTopEnd = 360; - cv::ellipse2Poly(ptMouthCenter, cv::Size(axisX, axisYTop), angMouth, - angTopStart, angTopEnd, config::kAngDelta, cntMouthTop); + cv::ellipse2Poly(ptMouthCenter, cv::Size(axisX, axisYTop), angMouth, angTopStart, angTopEnd, config::kAngDelta, cntMouthTop); // The bottom half-ellipse: Contour cntMouth; @@ -561,16 +581,14 @@ inline Contour custom::getPatchedEllipse(const cv::Point &ptLeft, // We need the lower part of an ellipse: static constexpr int angBotStart = 0; static constexpr int angBotEnd = 180; - cv::ellipse2Poly(ptMouthCenter, cv::Size(axisX, axisYBot), angMouth, - angBotStart, angBotEnd, config::kAngDelta, cntMouth); + cv::ellipse2Poly(ptMouthCenter, cv::Size(axisX, axisYBot), angMouth, angBotStart, angBotEnd, config::kAngDelta, cntMouth); // Pushing the upper part to vctOut - cntMouth.reserve(cntMouth.size() + cntMouthTop.size()); - std::copy(cntMouthTop.cbegin(), cntMouthTop.cend(), - std::back_inserter(cntMouth)); + std::copy(cntMouthTop.cbegin(), cntMouthTop.cend(), std::back_inserter(cntMouth)); return cntMouth; } +//! [unsh] inline cv::GMat custom::unsharpMask(const cv::GMat &src, const int sigma, const float strength) @@ -579,6 +597,7 @@ inline cv::GMat custom::unsharpMask(const cv::GMat &src, cv::GMat laplacian = custom::GLaplacian::on(blurred, CV_8U); return (src - (laplacian * strength)); } +//! [unsh] inline cv::GMat custom::mask3C(const cv::GMat &src, const cv::GMat &mask) @@ -593,30 +612,17 @@ inline cv::GMat custom::mask3C(const cv::GMat &src, int main(int argc, char** argv) { - cv::CommandLineParser parser(argc, argv, -"{ help h || print the help message. }" - -"{ facepath f || a path to a Face detection model file (.xml).}" -"{ facedevice |GPU| the face detection computation device.}" - -"{ landmpath l || a path to a Landmarks detection model file (.xml).}" -"{ landmdevice |CPU| the landmarks detection computation device.}" + cv::namedWindow(config::kWinFaceBeautification, cv::WINDOW_NORMAL); + cv::namedWindow(config::kWinInput, cv::WINDOW_NORMAL); -"{ input i || a path to an input. Skip to capture from a camera.}" -"{ boxes b |false| set true to draw face Boxes in the \"Input\" window.}" -"{ landmarks m |false| set true to draw landMarks in the \"Input\" window.}" - ); - parser.about("Use this script to run the face beautification" - " algorithm on G-API."); + cv::CommandLineParser parser(argc, argv, config::kParserOptions); + parser.about(config::kParserAbout); if (argc == 1 || parser.has("help")) { parser.printMessage(); return 0; } - cv::namedWindow(config::kWinFaceBeautification, cv::WINDOW_NORMAL); - cv::namedWindow(config::kWinInput, cv::WINDOW_NORMAL); - // Parsing input arguments const std::string faceXmlPath = parser.get("facepath"); const std::string faceBinPath = getWeightsPath(faceXmlPath); @@ -626,59 +632,47 @@ int main(int argc, char** argv) const std::string landmBinPath = getWeightsPath(landmXmlPath); const std::string landmDevice = parser.get("landmdevice"); - // The flags for drawing/not drawing face boxes or/and landmarks in the - // \"Input\" window: - const bool flgBoxes = parser.get("boxes"); - const bool flgLandmarks = parser.get("landmarks"); - // To provide this opportunity, it is necessary to check the flags when - // compiling a graph - // Declaring a graph - // Streaming-API version of a pipeline expression with a lambda-based + // The version of a pipeline expression with a lambda-based // constructor is used to keep all temporary objects in a dedicated scope. +//! [ppl] cv::GComputation pipeline([=]() { - cv::GMat gimgIn; - // Infering +//! [net_usg_fd] + cv::GMat gimgIn; // input + cv::GMat faceOut = cv::gapi::infer(gimgIn); - GArrayROI garRects = custom::GFacePostProc::on(faceOut, gimgIn, - config::kConfThresh); - cv::GArray garElems; - cv::GArray garJaws; - cv::GArray landmOut = cv::gapi::infer( - garRects, gimgIn); - std::tie(garElems, garJaws) = custom::GLandmPostProc::on(landmOut, - garRects); - cv::GArray garElsConts; - cv::GArray garFaceConts; - std::tie(garElsConts, garFaceConts) = custom::GGetContours::on(garElems, - garJaws); - // Masks drawing - // All masks are created as CV_8UC1 - cv::GMat mskSharp = custom::GFillPolyGContours::on(gimgIn, - garElsConts); - cv::GMat mskSharpG = cv::gapi::gaussianBlur(mskSharp, - config::kGKernelSize, - config::kGSigma); - cv::GMat mskBlur = custom::GFillPolyGContours::on(gimgIn, - garFaceConts); - cv::GMat mskBlurG = cv::gapi::gaussianBlur(mskBlur, - config::kGKernelSize, - config::kGSigma); - // The first argument in mask() is Blur as we want to subtract from - // BlurG the next step: - cv::GMat mskBlurFinal = mskBlurG - cv::gapi::mask(mskBlurG, - mskSharpG); - cv::GMat mskFacesGaussed = mskBlurFinal + mskSharpG; - cv::GMat mskFacesWhite = cv::gapi::threshold(mskFacesGaussed, 0, 255, - cv::THRESH_BINARY); - cv::GMat mskNoFaces = cv::gapi::bitwise_not(mskFacesWhite); - cv::GMat gimgBilat = custom::GBilatFilter::on(gimgIn, - config::kBSize, - config::kBSigmaCol, - config::kBSigmaSp); - cv::GMat gimgSharp = custom::unsharpMask(gimgIn, - config::kUnshSigma, +//! [net_usg_fd] + GArrayROI garRects = custom::GFacePostProc::on(faceOut, gimgIn, config::kConfThresh); // post-proc + +//! [net_usg_ld] + cv::GArray landmOut = cv::gapi::infer(garRects, gimgIn); +//! [net_usg_ld] + cv::GArray garElems; // | + cv::GArray garJaws; // |output arrays + std::tie(garElems, garJaws) = custom::GLandmPostProc::on(landmOut, garRects); // post-proc + cv::GArray garElsConts; // face elements + cv::GArray garFaceConts; // whole faces + std::tie(garElsConts, garFaceConts) = custom::GGetContours::on(garElems, garJaws); // interpolation + +//! [msk_ppline] + cv::GMat mskSharp = custom::GFillPolyGContours::on(gimgIn, garElsConts); // | + cv::GMat mskSharpG = cv::gapi::gaussianBlur(mskSharp, config::kGKernelSize, // | + config::kGSigma); // | + cv::GMat mskBlur = custom::GFillPolyGContours::on(gimgIn, garFaceConts); // | + cv::GMat mskBlurG = cv::gapi::gaussianBlur(mskBlur, config::kGKernelSize, // | + config::kGSigma); // |draw masks + // The first argument in mask() is Blur as we want to subtract from // | + // BlurG the next step: // | + cv::GMat mskBlurFinal = mskBlurG - cv::gapi::mask(mskBlurG, mskSharpG); // | + cv::GMat mskFacesGaussed = mskBlurFinal + mskSharpG; // | + cv::GMat mskFacesWhite = cv::gapi::threshold(mskFacesGaussed, 0, 255, cv::THRESH_BINARY); // | + cv::GMat mskNoFaces = cv::gapi::bitwise_not(mskFacesWhite); // | +//! [msk_ppline] + + cv::GMat gimgBilat = custom::GBilatFilter::on(gimgIn, config::kBSize, + config::kBSigmaCol, config::kBSigmaSp); + cv::GMat gimgSharp = custom::unsharpMask(gimgIn, config::kUnshSigma, config::kUnshStrength); // Applying the masks // Custom function mask3C() should be used instead of just gapi::mask() @@ -686,54 +680,34 @@ int main(int argc, char** argv) cv::GMat gimgBilatMasked = custom::mask3C(gimgBilat, mskBlurFinal); cv::GMat gimgSharpMasked = custom::mask3C(gimgSharp, mskSharpG); cv::GMat gimgInMasked = custom::mask3C(gimgIn, mskNoFaces); - cv::GMat gimgBeautif = gimgBilatMasked + gimgSharpMasked + - gimgInMasked; - // Drawing face boxes and landmarks if necessary: - cv::GMat gimgTemp; - if (flgLandmarks == true) - { - cv::GMat gimgTemp2 = custom::GPolyLines::on(gimgIn, garFaceConts, - config::kClosedLine, - config::kClrYellow); - gimgTemp = custom::GPolyLines::on(gimgTemp2, garElsConts, - config::kClosedLine, - config::kClrYellow); - } - else - { - gimgTemp = gimgIn; - } - cv::GMat gimgShow; - if (flgBoxes == true) - { - gimgShow = custom::GRectangle::on(gimgTemp, garRects, - config::kClrGreen); - } - else - { - // This action is necessary because an output node must be a result of - // some operations applied to an input node, so it handles the case - // when it should be nothing to draw - gimgShow = cv::gapi::copy(gimgTemp); - } - return cv::GComputation(cv::GIn(gimgIn), - cv::GOut(gimgBeautif, gimgShow)); + cv::GMat gimgBeautif = gimgBilatMasked + gimgSharpMasked + gimgInMasked; + return cv::GComputation(cv::GIn(gimgIn), cv::GOut(gimgBeautif, + cv::gapi::copy(gimgIn), + garFaceConts, + garElsConts, + garRects)); }); +//! [ppl] // Declaring IE params for networks +//! [net_param] auto faceParams = cv::gapi::ie::Params { - faceXmlPath, - faceBinPath, - faceDevice + /*std::string*/ faceXmlPath, + /*std::string*/ faceBinPath, + /*std::string*/ faceDevice }; auto landmParams = cv::gapi::ie::Params { - landmXmlPath, - landmBinPath, - landmDevice + /*std::string*/ landmXmlPath, + /*std::string*/ landmBinPath, + /*std::string*/ landmDevice }; +//! [net_param] +//! [netw] auto networks = cv::gapi::networks(faceParams, landmParams); +//! [netw] // Declaring custom and fluid kernels have been used: +//! [kern_pass_1] auto customKernels = cv::gapi::kernels(); auto kernels = cv::gapi::combine(cv::gapi::core::fluid::kernels(), customKernels); +//! [kern_pass_1] + + Avg avg; + size_t frames = 0; + + // The flags for drawing/not drawing face boxes or/and landmarks in the + // \"Input\" window: + const bool flgBoxes = parser.get("boxes"); + const bool flgLandmarks = parser.get("landmarks"); + // The flag to involve stream pipelining: + const bool flgStreaming = parser.get("streaming"); + // The flag to display the output images or not: + const bool flgPerformance = parser.get("performance"); // Now we are ready to compile the pipeline to a stream with specified // kernels, networks and image format expected to process - auto stream = pipeline.compileStreaming(cv::GMatDesc{CV_8U,3, - cv::Size(1280,720)}, - cv::compile_args(kernels, - networks)); - // Setting the source for the stream: - if (parser.has("input")) + if (flgStreaming == true) { - stream.setSource(cv::gapi::wip::make_src - (parser.get("input"))); - } - else - { - stream.setSource(cv::gapi::wip::make_src - (0)); +//! [str_comp] + cv::GStreamingCompiled stream = pipeline.compileStreaming(cv::compile_args(kernels, networks)); +//! [str_comp] + // Setting the source for the stream: +//! [str_src] + if (parser.has("input")) + { + stream.setSource(cv::gapi::wip::make_src(parser.get("input"))); + } +//! [str_src] + else + { + stream.setSource(cv::gapi::wip::make_src(0)); + } + // Declaring output variables + // Streaming: + cv::Mat imgShow; + cv::Mat imgBeautif; + std::vector vctFaceConts, vctElsConts; + VectorROI vctRects; + if (flgPerformance == true) + { + auto out_vector = cv::gout(imgBeautif, imgShow, vctFaceConts, + vctElsConts, vctRects); + stream.start(); + avg.start(); + while (stream.running()) + { + stream.pull(std::move(out_vector)); + frames++; + } + } + else // flgPerformance == false + { +//! [str_loop] + auto out_vector = cv::gout(imgBeautif, imgShow, vctFaceConts, + vctElsConts, vctRects); + stream.start(); + avg.start(); + while (stream.running()) + { + if (!stream.try_pull(std::move(out_vector))) + { + // Use a try_pull() to obtain data. + // If there's no data, let UI refresh (and handle keypress) + if (cv::waitKey(1) >= 0) break; + else continue; + } + frames++; + // Drawing face boxes and landmarks if necessary: + if (flgLandmarks == true) + { + cv::polylines(imgShow, vctFaceConts, config::kClosedLine, + config::kClrYellow); + cv::polylines(imgShow, vctElsConts, config::kClosedLine, + config::kClrYellow); + } + if (flgBoxes == true) + for (auto rect : vctRects) + cv::rectangle(imgShow, rect, config::kClrGreen); + cv::imshow(config::kWinInput, imgShow); + cv::imshow(config::kWinFaceBeautification, imgBeautif); + } +//! [str_loop] + } + std::cout << "Processed " << frames << " frames in " << avg.elapsed() + << " (" << avg.fps(frames) << " FPS)" << std::endl; } - // Declaring output variables - cv::Mat imgShow; - cv::Mat imgBeautif; - // Streaming: - stream.start(); - while (stream.running()) + else // serial mode: { - auto out_vector = cv::gout(imgBeautif, imgShow); - if (!stream.try_pull(std::move(out_vector))) +//! [bef_cap] +#include + cv::GCompiled cc; + cv::VideoCapture cap; + if (parser.has("input")) + { + cap.open(parser.get("input")); + } +//! [bef_cap] + else if (!cap.open(0)) + { + std::cout << "No input available" << std::endl; + return 1; + } + if (flgPerformance == true) { - // Use a try_pull() to obtain data. - // If there's no data, let UI refresh (and handle keypress) - if (cv::waitKey(1) >= 0) break; - else continue; + while (true) + { + cv::Mat img; + cv::Mat imgShow; + cv::Mat imgBeautif; + std::vector vctFaceConts, vctElsConts; + VectorROI vctRects; + cap >> img; + if (img.empty()) + { + break; + } + frames++; + if (!cc) + { + cc = pipeline.compile(cv::descr_of(img), cv::compile_args(kernels, networks)); + avg.start(); + } + cc(cv::gin(img), cv::gout(imgBeautif, imgShow, vctFaceConts, + vctElsConts, vctRects)); + } } - cv::imshow(config::kWinInput, imgShow); - cv::imshow(config::kWinFaceBeautification, imgBeautif); + else // flgPerformance == false + { +//! [bef_loop] + while (cv::waitKey(1) < 0) + { + cv::Mat img; + cv::Mat imgShow; + cv::Mat imgBeautif; + std::vector vctFaceConts, vctElsConts; + VectorROI vctRects; + cap >> img; + if (img.empty()) + { + cv::waitKey(); + break; + } + frames++; +//! [apply] + pipeline.apply(cv::gin(img), cv::gout(imgBeautif, imgShow, + vctFaceConts, + vctElsConts, vctRects), + cv::compile_args(kernels, networks)); +//! [apply] + if (frames == 1) + { + // Start timer only after 1st frame processed -- compilation + // happens on-the-fly here + avg.start(); + } + // Drawing face boxes and landmarks if necessary: + if (flgLandmarks == true) + { + cv::polylines(imgShow, vctFaceConts, config::kClosedLine, + config::kClrYellow); + cv::polylines(imgShow, vctElsConts, config::kClosedLine, + config::kClrYellow); + } + if (flgBoxes == true) + for (auto rect : vctRects) + cv::rectangle(imgShow, rect, config::kClrGreen); + cv::imshow(config::kWinInput, imgShow); + cv::imshow(config::kWinFaceBeautification, imgBeautif); + } + } +//! [bef_loop] + std::cout << "Processed " << frames << " frames in " << avg.elapsed() + << " (" << avg.fps(frames) << " FPS)" << std::endl; } return 0; } +#else +#include +int main() +{ + std::cerr << "This tutorial code requires G-API module " + "with Inference Engine backend to run" + << std::endl; + return 1; +} +#endif // HAVE_OPECV_GAPI