Add letter box 31/284031/2
authorKwanghoon Son <k.son@samsung.com>
Wed, 2 Nov 2022 10:57:39 +0000 (06:57 -0400)
committerTae-Young Chung <ty83.chung@samsung.com>
Wed, 9 Nov 2022 03:01:04 +0000 (03:01 +0000)
[Issue type] New feature

SNP yolo model needs letterbox preprocessing.
Define resize method and parser.

Change-Id: I1a02f5a1823aa3d017cd8ba33dfcfa5558d9a592
Signed-off-by: Kwanghoon Son <k.son@samsung.com>
mv_machine_learning/inference/include/InputMetadata.h
mv_machine_learning/inference/include/PreProcess.h
mv_machine_learning/inference/src/InputMetadata.cpp
mv_machine_learning/inference/src/PreProcess.cpp

index f1c762c4145e3663cb07d1b3b0c023ca0441efcf..335b0b2cf3c7aff77a4be2273a02790abeaab78e 100644 (file)
@@ -49,10 +49,17 @@ struct Quantization
        std::vector<double> zeropoint;
 };
 
+enum class Resizer
+{
+       SQUARE,
+       LETTERBOX,
+};
+
 struct Options
 {
        Normalization normalization;
        Quantization quantization;
+       Resizer resizer = Resizer::SQUARE;
 };
 
 struct LayerInfo
index de769263ecfeec5b855329e7a0661b680f0e9814..bdb63768d89bb3525a2ed857aebaf84d3f73a522 100644 (file)
@@ -63,7 +63,7 @@ public:
                        const Options &options, void *buffer);
 
 private:
-       int Resize(cv::Mat &source, cv::Mat &dest, cv::Size size);
+       int Resize(cv::Mat &source, cv::Mat &dest, cv::Size size, Resizer resizer = Resizer::SQUARE);
        int ColorConvert(cv::Mat &source, cv::Mat &dest, int sType, int dType);
        int Normalize(cv::Mat &source, cv::Mat &dest, const std::vector<double> &mean, const std::vector<double> &std);
        int Quantize(cv::Mat &source, cv::Mat &dest, const std::vector<double> &scale,
index a2bbceb4598d73ce1c6f97443a396b09ba263356..bda1ec47ceb657df88404dbaf53d2fe29259d3a3 100644 (file)
@@ -174,6 +174,16 @@ int InputMetadata::GetPreProcess(JsonObject *root)
                option.insert(std::make_pair(iterLayer->first, opt));
        }
 
+       if (json_object_has_member(preprocess_object, "resize")) {
+               const char *resizer = static_cast<const char *>(json_object_get_string_member(preprocess_object, "resize"));
+               if (strcmp(resizer, "LETTERBOX") == 0) {
+                       opt.resizer = Resizer::LETTERBOX;
+                       LOGI("resizer changed to letterbox");
+               }
+       }
+
+       option.insert(std::make_pair(iterLayer->first, opt));
+
        LOGI("LEAVE");
 
        return MEDIA_VISION_ERROR_NONE;
index 475753bfe7afdea72a5ac556c7c23b5454153365..b0a17e9a3f4092b5ca909825b0217bb62c385666 100644 (file)
@@ -42,12 +42,28 @@ namespace mediavision
 {
 namespace inference
 {
-int PreProcess::Resize(cv::Mat &source, cv::Mat &dest, cv::Size size)
+int PreProcess::Resize(cv::Mat &source, cv::Mat &dest, cv::Size size, Resizer resizer)
 {
        LOGI("ENTER");
 
        try {
-               cv::resize(source, dest, size);
+               if (source.size() == size) {
+                       dest = source;
+               } else {
+                       if (resizer == Resizer::LETTERBOX) {
+                               double srcW = static_cast<double>(source.size().width);
+                               double srcH = static_cast<double>(source.size().height);
+                               double scale = std::min(1.0, std::min(size.width / srcW, size.height / srcH));
+                               int dstW = static_cast<int>(srcW * scale);
+                               int dstH = static_cast<int>(srcH * scale);
+                               cv::Mat _dest;
+                               cv::resize(source, _dest, cv::Size(dstW, dstH));
+                               dest = cv::Mat(size, source.type(), cv::Scalar(114, 114, 114));
+                               _dest.copyTo(dest(cv::Rect((size.width - dstW) / 2, (size.height - dstH) / 2, dstW, dstH)));
+                       } else {
+                               cv::resize(source, dest, size);
+                       }
+               }
        } catch (cv::Exception &e) {
                LOGE("Fail to resize with msg: %s", e.what());
                return MEDIA_VISION_ERROR_INVALID_OPERATION;
@@ -126,7 +142,7 @@ int PreProcess::Run(cv::Mat &source, const int colorSpace, const int dataType, c
 
        cv::Mat cvSource, cvDest;
        // cvSource has new allocation with dest.size()
-       Resize(source, cvSource, dest.size());
+       Resize(source, cvSource, dest.size(), options.resizer);
 
        // cvDest has new allocation if it's colorSpace is not RGB888
        // cvDest share the data with cvSource it's colorSpace is RGB888