From 4753206783b2d266a89042d0935cc1a22d2c8c4d Mon Sep 17 00:00:00 2001 From: Dmitry Budnikov Date: Tue, 18 May 2021 13:58:08 +0300 Subject: [PATCH] Merge pull request #20065 from dbudniko:dbudniko/G-API_mtcnn_demo_PR_hotfix2 G-API MTCNN demo hotfix to align overall pipeline accuracy with the reference Python code output. * MTCNN G-API demo aligned with Python from OMZ * clean up * more comments from Maxim are addressed. * address comment from Dmitry --- modules/gapi/samples/face_detection.cpp | 179 +++++++++++++----------- 1 file changed, 94 insertions(+), 85 deletions(-) diff --git a/modules/gapi/samples/face_detection.cpp b/modules/gapi/samples/face_detection.cpp index 73115284ca..56f3f18a13 100644 --- a/modules/gapi/samples/face_detection.cpp +++ b/modules/gapi/samples/face_detection.cpp @@ -56,45 +56,45 @@ namespace { #define NUM_PTS 5 struct BBox { - double x1; - double y1; - double x2; - double y2; + int x1; + int y1; + int x2; + int y2; - cv::Rect getRect() const { return cv::Rect(static_cast(x1), - static_cast(y1), - static_cast(x2 - x1), - static_cast(y2 - y1)); } + cv::Rect getRect() const { return cv::Rect(x1, + y1, + x2 - x1, + y2 - y1); } BBox getSquare() const { BBox bbox; - double bboxWidth = x2 - x1; - double bboxHeight = y2 - y1; - double side = std::max(bboxWidth, bboxHeight); - bbox.x1 = static_cast(x1) + (bboxWidth - side) * 0.5; - bbox.y1 = static_cast(y1) + (bboxHeight - side) * 0.5; - bbox.x2 = bbox.x1 + side; - bbox.y2 = bbox.y1 + side; + float bboxWidth = static_cast(x2 - x1); + float bboxHeight = static_cast(y2 - y1); + float side = std::max(bboxWidth, bboxHeight); + bbox.x1 = static_cast(static_cast(x1) + (bboxWidth - side) * 0.5f); + bbox.y1 = static_cast(static_cast(y1) + (bboxHeight - side) * 0.5f); + bbox.x2 = static_cast(static_cast(bbox.x1) + side); + bbox.y2 = static_cast(static_cast(bbox.y1) + side); return bbox; } }; struct Face { BBox bbox; - double score; - std::array regression; - double ptsCoords[2 * NUM_PTS]; + float score; + std::array regression; + std::array ptsCoords; static void applyRegression(std::vector& faces, bool addOne = false) { for (auto& face : faces) { - double bboxWidth = - face.bbox.x2 - face.bbox.x1 + static_cast(addOne); - double bboxHeight = - face.bbox.y2 - face.bbox.y1 + static_cast(addOne); - face.bbox.x1 = face.bbox.x1 + static_cast(face.regression[1]) * bboxWidth; - face.bbox.y1 = face.bbox.y1 + static_cast(face.regression[0]) * bboxHeight; - face.bbox.x2 = face.bbox.x2 + static_cast(face.regression[3]) * bboxWidth; - face.bbox.y2 = face.bbox.y2 + static_cast(face.regression[2]) * bboxHeight; + float bboxWidth = + face.bbox.x2 - face.bbox.x1 + static_cast(addOne); + float bboxHeight = + face.bbox.y2 - face.bbox.y1 + static_cast(addOne); + face.bbox.x1 = static_cast(static_cast(face.bbox.x1) + (face.regression[1] * bboxWidth)); + face.bbox.y1 = static_cast(static_cast(face.bbox.y1) + (face.regression[0] * bboxHeight)); + face.bbox.x2 = static_cast(static_cast(face.bbox.x2) + (face.regression[3] * bboxWidth)); + face.bbox.y2 = static_cast(static_cast(face.bbox.y2) + (face.regression[2] * bboxHeight)); } } @@ -104,7 +104,7 @@ struct Face { } } - static std::vector runNMS(std::vector& faces, const double threshold, + static std::vector runNMS(std::vector& faces, const float threshold, const bool useMin = false) { std::vector facesNMS; if (faces.empty()) { @@ -123,22 +123,22 @@ struct Face { facesNMS.push_back(faces[idx]); std::vector tmpIndices = indices; indices.clear(); - const double area1 = (faces[idx].bbox.x2 - faces[idx].bbox.x1 + 1) * - (faces[idx].bbox.y2 - faces[idx].bbox.y1 + 1); + const float area1 = static_cast(faces[idx].bbox.x2 - faces[idx].bbox.x1 + 1) * + static_cast(faces[idx].bbox.y2 - faces[idx].bbox.y1 + 1); for (size_t i = 1; i < tmpIndices.size(); ++i) { int tmpIdx = tmpIndices[i]; - const double interX1 = std::max(faces[idx].bbox.x1, faces[tmpIdx].bbox.x1); - const double interY1 = std::max(faces[idx].bbox.y1, faces[tmpIdx].bbox.y1); - const double interX2 = std::min(faces[idx].bbox.x2, faces[tmpIdx].bbox.x2); - const double interY2 = std::min(faces[idx].bbox.y2, faces[tmpIdx].bbox.y2); - - const double bboxWidth = std::max(0.0, (interX2 - interX1 + 1)); - const double bboxHeight = std::max(0.0, (interY2 - interY1 + 1)); - - const double interArea = bboxWidth * bboxHeight; - const double area2 = (faces[tmpIdx].bbox.x2 - faces[tmpIdx].bbox.x1 + 1) * - (faces[tmpIdx].bbox.y2 - faces[tmpIdx].bbox.y1 + 1); - double overlap = 0.0; + const float interX1 = static_cast(std::max(faces[idx].bbox.x1, faces[tmpIdx].bbox.x1)); + const float interY1 = static_cast(std::max(faces[idx].bbox.y1, faces[tmpIdx].bbox.y1)); + const float interX2 = static_cast(std::min(faces[idx].bbox.x2, faces[tmpIdx].bbox.x2)); + const float interY2 = static_cast(std::min(faces[idx].bbox.y2, faces[tmpIdx].bbox.y2)); + + const float bboxWidth = std::max(0.0f, (interX2 - interX1 + 1)); + const float bboxHeight = std::max(0.0f, (interY2 - interY1 + 1)); + + const float interArea = bboxWidth * bboxHeight; + const float area2 = static_cast(faces[tmpIdx].bbox.x2 - faces[tmpIdx].bbox.x1 + 1) * + static_cast(faces[tmpIdx].bbox.y2 - faces[tmpIdx].bbox.y1 + 1); + float overlap = 0.0; if (useMin) { overlap = interArea / std::min(area1, area2); } else { @@ -153,13 +153,12 @@ struct Face { } }; -const double P_NET_WINDOW_SIZE = 12.0; -const double P_NET_STRIDE = 2.0; +const float P_NET_WINDOW_SIZE = 12.0f; std::vector buildFaces(const cv::Mat& scores, const cv::Mat& regressions, - const double scaleFactor, - const double threshold) { + const float scaleFactor, + const float threshold) { auto w = scores.size[3]; auto h = scores.size[2]; @@ -170,20 +169,28 @@ std::vector buildFaces(const cv::Mat& scores, const float* reg_data = regressions.ptr(); + auto out_side = std::max(h, w); + auto in_side = 2 * out_side + 11; + float stride = 0.0f; + if (out_side != 1) + { + stride = static_cast(in_side - P_NET_WINDOW_SIZE) / static_cast(out_side - 1); + } + std::vector boxes; for (int i = 0; i < size; i++) { if (scores_data[i] >= (threshold)) { - int y = i / w; - int x = i - w * y; + float y = static_cast(i / w); + float x = static_cast(i - w * y); Face faceInfo; BBox& faceBox = faceInfo.bbox; - faceBox.x1 = (static_cast(x) * P_NET_STRIDE) / scaleFactor; - faceBox.y1 = (static_cast(y) * P_NET_STRIDE) / scaleFactor; - faceBox.x2 = (static_cast(x) * P_NET_STRIDE + P_NET_WINDOW_SIZE - 1.f) / scaleFactor; - faceBox.y2 = (static_cast(y) * P_NET_STRIDE + P_NET_WINDOW_SIZE - 1.f) / scaleFactor; + faceBox.x1 = std::max(0, static_cast((x * stride) / scaleFactor)); + faceBox.y1 = std::max(0, static_cast((y * stride) / scaleFactor)); + faceBox.x2 = static_cast((x * stride + P_NET_WINDOW_SIZE - 1.0f) / scaleFactor); + faceBox.y2 = static_cast((y * stride + P_NET_WINDOW_SIZE - 1.0f) / scaleFactor); faceInfo.regression[0] = reg_data[i]; faceInfo.regression[1] = reg_data[i + size]; faceInfo.regression[2] = reg_data[i + 2 * size]; @@ -213,21 +220,21 @@ G_API_NET(MTCNNOutput, using GFaces = cv::GArray; G_API_OP(BuildFaces, - , + , "sample.custom.mtcnn.build_faces") { static cv::GArrayDesc outMeta(const cv::GMatDesc&, const cv::GMatDesc&, - const double, - const double) { + const float, + const float) { return cv::empty_array_desc(); } }; G_API_OP(RunNMS, - , + , "sample.custom.mtcnn.run_nms") { static cv::GArrayDesc outMeta(const cv::GArrayDesc&, - const double, const bool) { + const float, const bool) { return cv::empty_array_desc(); } }; @@ -267,24 +274,24 @@ G_API_OP(R_O_NetPreProcGetROIs, G_API_OP(RNetPostProc, - , + , "sample.custom.mtcnn.rnet_postproc") { static cv::GArrayDesc outMeta(const cv::GArrayDesc&, const cv::GArrayDesc&, const cv::GArrayDesc&, - const double) { + const float) { return cv::empty_array_desc(); } }; G_API_OP(ONetPostProc, - , + , "sample.custom.mtcnn.onet_postproc") { static cv::GArrayDesc outMeta(const cv::GArrayDesc&, const cv::GArrayDesc&, const cv::GArrayDesc&, const cv::GArrayDesc&, - const double) { + const float) { return cv::empty_array_desc(); } }; @@ -309,8 +316,8 @@ G_API_OP(Transpose, GAPI_OCV_KERNEL(OCVBuildFaces, BuildFaces) { static void run(const cv::Mat & in_scores, const cv::Mat & in_regresssions, - const double scaleFactor, - const double threshold, + const float scaleFactor, + const float threshold, std::vector &out_faces) { out_faces = buildFaces(in_scores, in_regresssions, scaleFactor, threshold); } @@ -318,7 +325,7 @@ GAPI_OCV_KERNEL(OCVBuildFaces, BuildFaces) { GAPI_OCV_KERNEL(OCVRunNMS, RunNMS) { static void run(const std::vector &in_faces, - const double threshold, + const float threshold, const bool useMin, std::vector &out_faces) { std::vector in_faces_copy = in_faces; @@ -375,7 +382,7 @@ GAPI_OCV_KERNEL(OCVRNetPostProc, RNetPostProc) { static void run(const std::vector &in_faces, const std::vector &in_scores, const std::vector &in_regresssions, - const double threshold, + const float threshold, std::vector &out_faces) { out_faces.clear(); for (unsigned int k = 0; k < in_faces.size(); ++k) { @@ -396,7 +403,7 @@ GAPI_OCV_KERNEL(OCVONetPostProc, ONetPostProc) { const std::vector &in_scores, const std::vector &in_regresssions, const std::vector &in_landmarks, - const double threshold, + const float threshold, std::vector &out_faces) { out_faces.clear(); for (unsigned int k = 0; k < in_faces.size(); ++k) { @@ -406,16 +413,16 @@ GAPI_OCV_KERNEL(OCVONetPostProc, ONetPostProc) { if (scores_data[1] >= threshold) { Face info = in_faces[k]; info.score = scores_data[1]; - for (int i = 0; i < 4; ++i) { + for (size_t i = 0; i < 4; ++i) { info.regression[i] = reg_data[i]; } - double w = info.bbox.x2 - info.bbox.x1 + 1.0; - double h = info.bbox.y2 - info.bbox.y1 + 1.0; + float w = info.bbox.x2 - info.bbox.x1 + 1.0f; + float h = info.bbox.y2 - info.bbox.y1 + 1.0f; - for (int p = 0; p < NUM_PTS; ++p) { + for (size_t p = 0; p < NUM_PTS; ++p) { info.ptsCoords[2 * p] = - info.bbox.x1 + static_cast(landmark_data[NUM_PTS + p]) * w - 1; - info.ptsCoords[2 * p + 1] = info.bbox.y1 + static_cast(landmark_data[p]) * h - 1; + info.bbox.x1 + static_cast(landmark_data[NUM_PTS + p]) * w - 1; + info.ptsCoords[2 * p + 1] = info.bbox.y1 + static_cast(landmark_data[p]) * h - 1; } out_faces.push_back(info); @@ -433,7 +440,7 @@ GAPI_OCV_KERNEL(OCVSwapFaces, SwapFaces) { for (size_t i = 0; i < in_faces_copy.size(); ++i) { std::swap(in_faces_copy[i].bbox.x1, in_faces_copy[i].bbox.y1); std::swap(in_faces_copy[i].bbox.x2, in_faces_copy[i].bbox.y2); - for (int p = 0; p < NUM_PTS; ++p) { + for (size_t p = 0; p < NUM_PTS; ++p) { std::swap(in_faces_copy[i].ptsCoords[2 * p], in_faces_copy[i].ptsCoords[2 * p + 1]); } } @@ -573,13 +580,13 @@ int main(int argc, char* argv[]) { const auto input_file_name = cmd.get("input"); const auto model_path_p = cmd.get("mtcnnpm"); const auto target_dev_p = cmd.get("mtcnnpd"); - const auto conf_thresh_p = cmd.get("thrp"); + const auto conf_thresh_p = cmd.get("thrp"); const auto model_path_r = cmd.get("mtcnnrm"); const auto target_dev_r = cmd.get("mtcnnrd"); - const auto conf_thresh_r = cmd.get("thrr"); + const auto conf_thresh_r = cmd.get("thrr"); const auto model_path_o = cmd.get("mtcnnom"); const auto target_dev_o = cmd.get("mtcnnod"); - const auto conf_thresh_o = cmd.get("thro"); + const auto conf_thresh_o = cmd.get("thro"); const auto use_half_scale = cmd.get("half_scale"); std::vector level_size; @@ -613,8 +620,10 @@ int main(int argc, char* argv[]) { in_resized[0] = cv::gapi::resize(in_originalRGB, level_size[0]); in_transposed[0] = custom::Transpose::on(in_resized[0]); std::tie(regressions[0], scores[0]) = run_mtcnn_p(in_transposed[0], get_pnet_level_name(level_size[0])); - cv::GArray faces0 = custom::BuildFaces::on(scores[0], regressions[0], scales[0], conf_thresh_p); - nms_p_faces[0] = custom::RunNMS::on(faces0, 0.5, false); + cv::GArray faces0 = custom::BuildFaces::on(scores[0], regressions[0], static_cast(scales[0]), conf_thresh_p); + cv::GArray final_p_faces_for_bb2squares = custom::ApplyRegression::on(faces0, true); + cv::GArray final_faces_pnet0 = custom::BBoxesToSquares::on(final_p_faces_for_bb2squares); + nms_p_faces[0] = custom::RunNMS::on(final_faces_pnet0, 0.5f, false); total_faces[0] = custom::AccumulatePyramidOutputs::on(faces_init, nms_p_faces[0]); //The rest PNet pyramid layers to accumlate all layers result in total_faces[PYRAMID_LEVELS - 1]] for (int i = 1; i < pyramid_levels; ++i) @@ -622,15 +631,15 @@ int main(int argc, char* argv[]) { in_resized[i] = cv::gapi::resize(in_originalRGB, level_size[i]); in_transposed[i] = custom::Transpose::on(in_resized[i]); std::tie(regressions[i], scores[i]) = run_mtcnn_p(in_transposed[i], get_pnet_level_name(level_size[i])); - cv::GArray faces = custom::BuildFaces::on(scores[i], regressions[i], scales[i], conf_thresh_p); - nms_p_faces[i] = custom::RunNMS::on(faces, 0.5, false); + cv::GArray faces = custom::BuildFaces::on(scores[i], regressions[i], static_cast(scales[i]), conf_thresh_p); + cv::GArray final_p_faces_for_bb2squares_i = custom::ApplyRegression::on(faces, true); + cv::GArray final_faces_pnet_i = custom::BBoxesToSquares::on(final_p_faces_for_bb2squares_i); + nms_p_faces[i] = custom::RunNMS::on(final_faces_pnet_i, 0.5f, false); total_faces[i] = custom::AccumulatePyramidOutputs::on(total_faces[i - 1], nms_p_faces[i]); } //Proposal post-processing - cv::GArray nms07_p_faces_total = custom::RunNMS::on(total_faces[pyramid_levels - 1], 0.7, false); - cv::GArray final_p_faces_for_bb2squares = custom::ApplyRegression::on(nms07_p_faces_total, false); - cv::GArray final_faces_pnet = custom::BBoxesToSquares::on(final_p_faces_for_bb2squares); + cv::GArray final_faces_pnet = custom::RunNMS::on(total_faces[pyramid_levels - 1], 0.7f, true); //Refinement part of MTCNN graph cv::GArray faces_roi_pnet = custom::R_O_NetPreProcGetROIs::on(final_faces_pnet, in_sz); @@ -640,7 +649,7 @@ int main(int argc, char* argv[]) { //Refinement post-processing cv::GArray rnet_post_proc_faces = custom::RNetPostProc::on(final_faces_pnet, scoresRNet, regressionsRNet, conf_thresh_r); - cv::GArray nms07_r_faces_total = custom::RunNMS::on(rnet_post_proc_faces, 0.7, false); + cv::GArray nms07_r_faces_total = custom::RunNMS::on(rnet_post_proc_faces, 0.7f, false); cv::GArray final_r_faces_for_bb2squares = custom::ApplyRegression::on(nms07_r_faces_total, true); cv::GArray final_faces_rnet = custom::BBoxesToSquares::on(final_r_faces_for_bb2squares); @@ -652,7 +661,7 @@ int main(int argc, char* argv[]) { //Output post-processing cv::GArray onet_post_proc_faces = custom::ONetPostProc::on(final_faces_rnet, scoresONet, regressionsONet, landmarksONet, conf_thresh_o); cv::GArray final_o_faces_for_nms07 = custom::ApplyRegression::on(onet_post_proc_faces, true); - cv::GArray nms07_o_faces_total = custom::RunNMS::on(final_o_faces_for_nms07, 0.7, true); + cv::GArray nms07_o_faces_total = custom::RunNMS::on(final_o_faces_for_nms07, 0.7f, true); cv::GArray final_faces_onet = custom::SwapFaces::on(nms07_o_faces_total); cv::GComputation graph_mtcnn(cv::GIn(in_original), cv::GOut(cv::gapi::copy(in_original), final_faces_onet)); @@ -723,7 +732,7 @@ int main(int argc, char* argv[]) { // show the image with faces in it for (const auto& out_face : out_faces) { std::vector pts; - for (int p = 0; p < NUM_PTS; ++p) { + for (size_t p = 0; p < NUM_PTS; ++p) { pts.push_back( cv::Point(static_cast(out_face.ptsCoords[2 * p]), static_cast(out_face.ptsCoords[2 * p + 1]))); } -- 2.34.1