Add letter box
authorKwanghoon Son <k.son@samsung.com>
Wed, 2 Nov 2022 10:57:39 +0000 (06:57 -0400)
committerInki Dae <inki.dae@samsung.com>
Mon, 7 Nov 2022 03:12:39 +0000 (12:12 +0900)
[Issue type] New feature

SNP yolo model needs letterbox preprocessing.
Define resize method and parser.

Change-Id: I1a02f5a1823aa3d017cd8ba33dfcfa5558d9a592
Signed-off-by: Kwanghoon Son <k.son@samsung.com>
mv_machine_learning/inference/include/InputMetadata.h
mv_machine_learning/inference/include/PreProcess.h
mv_machine_learning/inference/src/InputMetadata.cpp
mv_machine_learning/inference/src/PreProcess.cpp

index 1622a532120401eccf8ad919e9d9fba63d782a0b..7b7b13f447b75eb464e0eb6d1507a7eca6344b8a 100644 (file)
@@ -49,10 +49,17 @@ struct Quantization
        std::vector<double> zeropoint;
 };
 
+enum class Resizer
+{
+       SQUARE,
+       LETTERBOX,
+};
+
 struct Options
 {
        Normalization normalization;
        Quantization quantization;
+       Resizer resizer = Resizer::SQUARE;
 };
 
 struct InputMetadata
index de769263ecfeec5b855329e7a0661b680f0e9814..bdb63768d89bb3525a2ed857aebaf84d3f73a522 100644 (file)
@@ -63,7 +63,7 @@ public:
                        const Options &options, void *buffer);
 
 private:
-       int Resize(cv::Mat &source, cv::Mat &dest, cv::Size size);
+       int Resize(cv::Mat &source, cv::Mat &dest, cv::Size size, Resizer resizer = Resizer::SQUARE);
        int ColorConvert(cv::Mat &source, cv::Mat &dest, int sType, int dType);
        int Normalize(cv::Mat &source, cv::Mat &dest, const std::vector<double> &mean, const std::vector<double> &std);
        int Quantize(cv::Mat &source, cv::Mat &dest, const std::vector<double> &scale,
index ae5e66024239f5e1d3d61480bc89285edd4d0fa6..cb7d7c80af822e4fc7b87c7bb1976dd608c00160 100644 (file)
@@ -167,6 +167,14 @@ int InputMetadata::GetPreProcess(JsonObject *root)
                }
        }
 
+       if (json_object_has_member(preprocess_object, "resize")) {
+               const char *resizer = static_cast<const char *>(json_object_get_string_member(preprocess_object, "resize"));
+               if (strcmp(resizer, "LETTERBOX") == 0) {
+                       opt.resizer = Resizer::LETTERBOX;
+                       LOGI("resizer changed to letterbox");
+               }
+       }
+
        option.insert(std::make_pair(iterLayer->first, opt));
 
        LOGI("LEAVE");
@@ -183,12 +191,12 @@ int InputMetadata::Parse(JsonObject *root, std::string key_name)
 
        for (auto idx = 0; idx < json_array_get_length(inputList); ++idx) {
                JsonNode *node = json_array_get_element(inputList, idx);
-           std::string token(json_to_string(node, 1));
+               std::string token(json_to_string(node, 1));
                int pos = token.find(":");
                std::string tensor_name = token.substr(0, pos);
-               const std::vector<char> delimiters = {'{', ' ', ':', '\n', '\"'};
+               const std::vector<char> delimiters = { '{', ' ', ':', '\n', '\"' };
 
-               for (autodelimiter : delimiters)
+               for (auto &delimiter : delimiters)
                        tensor_name.erase(std::remove(tensor_name.begin(), tensor_name.end(), delimiter), tensor_name.end());
 
                if (tensor_name.compare((std::string("tensor") + std::to_string(idx + 1))) != 0) {
index 475753bfe7afdea72a5ac556c7c23b5454153365..b0a17e9a3f4092b5ca909825b0217bb62c385666 100644 (file)
@@ -42,12 +42,28 @@ namespace mediavision
 {
 namespace inference
 {
-int PreProcess::Resize(cv::Mat &source, cv::Mat &dest, cv::Size size)
+int PreProcess::Resize(cv::Mat &source, cv::Mat &dest, cv::Size size, Resizer resizer)
 {
        LOGI("ENTER");
 
        try {
-               cv::resize(source, dest, size);
+               if (source.size() == size) {
+                       dest = source;
+               } else {
+                       if (resizer == Resizer::LETTERBOX) {
+                               double srcW = static_cast<double>(source.size().width);
+                               double srcH = static_cast<double>(source.size().height);
+                               double scale = std::min(1.0, std::min(size.width / srcW, size.height / srcH));
+                               int dstW = static_cast<int>(srcW * scale);
+                               int dstH = static_cast<int>(srcH * scale);
+                               cv::Mat _dest;
+                               cv::resize(source, _dest, cv::Size(dstW, dstH));
+                               dest = cv::Mat(size, source.type(), cv::Scalar(114, 114, 114));
+                               _dest.copyTo(dest(cv::Rect((size.width - dstW) / 2, (size.height - dstH) / 2, dstW, dstH)));
+                       } else {
+                               cv::resize(source, dest, size);
+                       }
+               }
        } catch (cv::Exception &e) {
                LOGE("Fail to resize with msg: %s", e.what());
                return MEDIA_VISION_ERROR_INVALID_OPERATION;
@@ -126,7 +142,7 @@ int PreProcess::Run(cv::Mat &source, const int colorSpace, const int dataType, c
 
        cv::Mat cvSource, cvDest;
        // cvSource has new allocation with dest.size()
-       Resize(source, cvSource, dest.size());
+       Resize(source, cvSource, dest.size(), options.resizer);
 
        // cvDest has new allocation if it's colorSpace is not RGB888
        // cvDest share the data with cvSource it's colorSpace is RGB888