Add postprocess for letterbox

author Kwanghoon Son <k.son@samsung.com>

Mon, 14 Nov 2022 07:25:40 +0000 (02:25 -0500)

committer Kwanghoon Son <k.son@samsung.com>

Mon, 14 Nov 2022 09:46:27 +0000 (04:46 -0500)
author Kwanghoon Son <k.son@samsung.com>
Mon, 14 Nov 2022 07:25:40 +0000 (02:25 -0500)
committer Kwanghoon Son <k.son@samsung.com>
Mon, 14 Nov 2022 09:46:27 +0000 (04:46 -0500)
diff --git a/mv_machine_learning/inference/src/Inference.cpp b/mv_machine_learning/inference/src/Inference.cpp

index 14673a24d6ca1aacb0f1b708bb79f16058524c9a..24c4b1f0aacbe0b38b3e15d30952bb2933c67fbf 100644 (file)
--- a/mv_machine_learning/inference/src/Inference.cpp
+++ b/mv_machine_learning/inference/src/Inference.cpp
@@ -261,7 +261,7 @@ void Inference::ConfigureModelFiles(const std::string modelConfigFilePath, const
  }
  
  int Inference::ConfigureInputInfo(int width, int height, int dim, int ch, double stdValue, double meanValue,
-                                                                  int dataType, const std::vector<std::string> names)
+                                                                 int dataType, const std::vector<std::string> names)
  {
         LOGI("ENTER");
  
@@ -322,7 +322,7 @@ int Inference::configureInputMetaInfo()
         return ret;
  }
  
-int Inference::configureInputMetaInfo(MetaMap& inputMetaInfo)
+int Inference::configureInputMetaInfo(MetaMap &inputMetaInfo)
  {
         LOGI("ENTER");
  
@@ -331,7 +331,7 @@ int Inference::configureInputMetaInfo(MetaMap& inputMetaInfo)
         mConfig.mInputLayerNames.clear();
  
         try {
-               for (auto& meta : inputMetaInfo) {
+               for (auto &meta : inputMetaInfo) {
                         std::shared_ptr<MetaInfo> metaInfo = meta.second;
  
                         mConfig.mTensorInfo.ch = metaInfo->getChannel();
@@ -339,7 +339,8 @@ int Inference::configureInputMetaInfo(MetaMap& inputMetaInfo)
                         mConfig.mTensorInfo.width = metaInfo->getWidth();
                         mConfig.mTensorInfo.height = metaInfo->getHeight();
  
-                       auto normalization = std::static_pointer_cast<PreprocessInfoNormal>(metaInfo->decoding_data[DecodingType::PREPROCESS_NORMAL]);
+                       auto normalization = std::static_pointer_cast<PreprocessInfoNormal>(
+                                       metaInfo->decoding_data[DecodingType::PREPROCESS_NORMAL]);
                         if (normalization && normalization->use) {
                                 mConfig.mMeanValue = normalization->mean[0];
                                 mConfig.mStdValue = normalization->std[0];
@@ -348,7 +349,7 @@ int Inference::configureInputMetaInfo(MetaMap& inputMetaInfo)
                         mConfig.mDataType = metaInfo->dataType;
                         mConfig.mInputLayerNames.push_back(meta.first);
                 }
-       } catch (const std::exception& e) {
+       } catch (const std::exception &e) {
                 LOGE("Fail to configure input meta info.");
                 return MEDIA_VISION_ERROR_INVALID_OPERATION;
         }
@@ -360,7 +361,6 @@ int Inference::configureInputMetaInfo(MetaMap& inputMetaInfo)
         return ret;
  }
  
-
  int Inference::setInputInfo()
  {
         LOGI("ENTER");
@@ -405,7 +405,8 @@ int Inference::setInputInfo()
         return ret;
  }
  
-int Inference::ConfigureOutputInfo(const std::vector<std::string> names, std::vector<inference_engine_tensor_info> &tensors_info)
+int Inference::ConfigureOutputInfo(const std::vector<std::string> names,
+                                                                  std::vector<inference_engine_tensor_info> &tensors_info)
  {
         LOGI("ENTER");
  
@@ -418,18 +419,18 @@ int Inference::ConfigureOutputInfo(const std::vector<std::string> names, std::ve
                                                                                                          INFERENCE_TENSOR_DATA_TYPE_FLOAT32, 1 };
  
                 for (auto &name : mConfig.mOutputLayerNames) {
-                               LOGI("Configure %s layer as output", name.c_str());
-                               property.layers.insert(std::make_pair(name, tensor_info));
+                       LOGI("Configure %s layer as output", name.c_str());
+                       property.layers.insert(std::make_pair(name, tensor_info));
                 }
         } else {
                 if (mConfig.mOutputLayerNames.size() != tensors_info.size()) {
-                               LOGE("Output layer count is different from tensor info count.");
-                               return MEDIA_VISION_ERROR_INVALID_PARAMETER;
+                       LOGE("Output layer count is different from tensor info count.");
+                       return MEDIA_VISION_ERROR_INVALID_PARAMETER;
                 }
  
                 for (size_t idx = 0; idx < mConfig.mOutputLayerNames.size(); ++idx) {
-                               LOGI("Configure %s layer as output", mConfig.mOutputLayerNames[idx].c_str());
-                               property.layers.insert(std::make_pair(mConfig.mOutputLayerNames[idx], tensors_info[idx]));
+                       LOGI("Configure %s layer as output", mConfig.mOutputLayerNames[idx].c_str());
+                       property.layers.insert(std::make_pair(mConfig.mOutputLayerNames[idx], tensors_info[idx]));
                 }
         }
  
@@ -449,17 +450,17 @@ int Inference::configureOutputMetaInfo()
         mConfig.mOutputLayerNames.clear();
  
         if (!outputMeta._tensor_info.empty()) {
-               for (auto& info : outputMeta._tensor_info)
+               for (auto &info : outputMeta._tensor_info)
                         mConfig.mOutputLayerNames.push_back(info.first);
         }
  
         inference_engine_tensor_info tensor_info = { std::vector<size_t> { 1 }, INFERENCE_TENSOR_SHAPE_NCHW,
-                                                                                                       INFERENCE_TENSOR_DATA_TYPE_FLOAT32, 1 };
+                                                                                                INFERENCE_TENSOR_DATA_TYPE_FLOAT32, 1 };
         inference_engine_layer_property property;
  
         for (auto &name : mConfig.mOutputLayerNames) {
-                       LOGI("Configure %s layer as output", name.c_str());
-                       property.layers.insert(std::make_pair(name, tensor_info));
+               LOGI("Configure %s layer as output", name.c_str());
+               property.layers.insert(std::make_pair(name, tensor_info));
         }
  
         int ret = setOutputInfo(property);
@@ -469,20 +470,20 @@ int Inference::configureOutputMetaInfo()
         return ret;
  }
  
-int Inference::configureOutputMetaInfo(MetaMap& outputMetaInfo)
+int Inference::configureOutputMetaInfo(MetaMap &outputMetaInfo)
  {
         LOGI("ENTER");
  
         mConfig.mOutputLayerNames.clear();
  
         try {
-               for (auto& meta : outputMetaInfo) {
-                       std::shared_ptr<MetaInfo>& metaInfo = meta.second;
+               for (auto &meta : outputMetaInfo) {
+                       std::shared_ptr<MetaInfo> &metaInfo = meta.second;
  
                         mConfig.mDataType = metaInfo->dataType;
                         mConfig.mOutputLayerNames.push_back(meta.first);
                 }
-       } catch (const std::exception& e) {
+       } catch (const std::exception &e) {
                 LOGE("Fail to configure output meta info.");
                 return MEDIA_VISION_ERROR_INVALID_OPERATION;
         }
@@ -503,7 +504,7 @@ int Inference::configureOutputMetaInfo(MetaMap& outputMetaInfo)
         return ret;
  }
  
-int Inference::setOutputInfo(inference_engine_layer_property& property)
+int Inference::setOutputInfo(inference_engine_layer_property &property)
  {
         LOGI("ENTER");
  
@@ -1215,17 +1216,39 @@ int Inference::GetObjectDetectionResults(ObjectDetectionResults *results)
                 objDecoder.decode();
                 results->number_of_objects = 0;
  
+               auto &rLoc = results->locations;
+
                 for (auto &box : objDecoder.getObjectAll()) {
                         results->indices.push_back(box.index);
                         results->names.push_back(mUserListName[box.index]);
                         results->confidences.push_back(box.score);
-                       results->locations.push_back(
-                                       cv::Rect(static_cast<int>((box.location.x - box.location.width * 0.5f) *
-                                                                                         static_cast<float>(mSourceSize.width)),
-                                                        static_cast<int>((box.location.y - box.location.height * 0.5f) *
-                                                                                         static_cast<float>(mSourceSize.height)),
-                                                        static_cast<int>(box.location.width * static_cast<float>(mSourceSize.width)),
-                                                        static_cast<int>(box.location.height * static_cast<float>(mSourceSize.height))));
+                       auto &bLoc = box.location;
+
+                       auto srcW = static_cast<double>(mSourceSize.width);
+                       auto srcH = static_cast<double>(mSourceSize.height);
+
+                       auto halfW = (bLoc.x - bLoc.width * 0.5f);
+                       auto halfH = (bLoc.y - bLoc.height * 0.5f);
+
+                       if (mMetadata.GetInputMeta().option.begin()->second.resizer == Resizer::LETTERBOX) {
+                               double dstW = static_cast<double>(mMetadata.GetInputMeta().layer.begin()->second.getWidth());
+                               double dstH = static_cast<double>(mMetadata.GetInputMeta().layer.begin()->second.getHeight());
+                               double scale = std::min(1.0, std::min(dstW / srcW, dstH / srcH));
+                               double padSize[] = { (dstW - (scale * srcW)) / 2.0, (dstH - (scale * srcH)) / 2.0 };
+
+                               auto rect =
+                                               cv::Rect(static_cast<int>(std::min(srcW, std::max((halfW * dstW - padSize[0]) / scale, 0.0))),
+                                                                static_cast<int>(std::min(srcH, std::max((halfH * dstH - padSize[1]) / scale, 0.0))),
+                                                                static_cast<int>((bLoc.width * dstW) / scale + padSize[0]),
+                                                                static_cast<int>((bLoc.height * dstH) / scale + padSize[1]));
+
+                               rect.width = (rect.x + rect.width) > srcW ? srcW - rect.x : rect.width;
+                               rect.height = (rect.y + rect.height) > srcH ? srcH - rect.y : rect.height;
+
+                               rLoc.push_back(rect);
+                       } else {
+                               rLoc.push_back(cv::Rect(halfW * srcW, halfH * srcH, bLoc.width * srcW, bLoc.height * srcH));
+                       }
                         results->number_of_objects++;
                 }
author	Kwanghoon Son <k.son@samsung.com>
	Mon, 14 Nov 2022 07:25:40 +0000 (02:25 -0500)
committer	Kwanghoon Son <k.son@samsung.com>
	Mon, 14 Nov 2022 09:46:27 +0000 (04:46 -0500)