Add postprocess for letterbox 46/284246/2
authorKwanghoon Son <k.son@samsung.com>
Mon, 14 Nov 2022 07:25:40 +0000 (02:25 -0500)
committerKwanghoon Son <k.son@samsung.com>
Mon, 14 Nov 2022 09:46:27 +0000 (04:46 -0500)
Letterbox needs postprocessing for object result location.
Also fix some code indent

[Issue type] New

Change-Id: Ief7720f7a2260277e96aba5dc66a58c78e475794
Signed-off-by: Kwanghoon Son <k.son@samsung.com>
mv_machine_learning/inference/src/Inference.cpp

index 14673a2..24c4b1f 100644 (file)
@@ -261,7 +261,7 @@ void Inference::ConfigureModelFiles(const std::string modelConfigFilePath, const
 }
 
 int Inference::ConfigureInputInfo(int width, int height, int dim, int ch, double stdValue, double meanValue,
-                                                                  int dataType, const std::vector<std::string> names)
+                                                                 int dataType, const std::vector<std::string> names)
 {
        LOGI("ENTER");
 
@@ -322,7 +322,7 @@ int Inference::configureInputMetaInfo()
        return ret;
 }
 
-int Inference::configureInputMetaInfo(MetaMapinputMetaInfo)
+int Inference::configureInputMetaInfo(MetaMap &inputMetaInfo)
 {
        LOGI("ENTER");
 
@@ -331,7 +331,7 @@ int Inference::configureInputMetaInfo(MetaMap& inputMetaInfo)
        mConfig.mInputLayerNames.clear();
 
        try {
-               for (autometa : inputMetaInfo) {
+               for (auto &meta : inputMetaInfo) {
                        std::shared_ptr<MetaInfo> metaInfo = meta.second;
 
                        mConfig.mTensorInfo.ch = metaInfo->getChannel();
@@ -339,7 +339,8 @@ int Inference::configureInputMetaInfo(MetaMap& inputMetaInfo)
                        mConfig.mTensorInfo.width = metaInfo->getWidth();
                        mConfig.mTensorInfo.height = metaInfo->getHeight();
 
-                       auto normalization = std::static_pointer_cast<PreprocessInfoNormal>(metaInfo->decoding_data[DecodingType::PREPROCESS_NORMAL]);
+                       auto normalization = std::static_pointer_cast<PreprocessInfoNormal>(
+                                       metaInfo->decoding_data[DecodingType::PREPROCESS_NORMAL]);
                        if (normalization && normalization->use) {
                                mConfig.mMeanValue = normalization->mean[0];
                                mConfig.mStdValue = normalization->std[0];
@@ -348,7 +349,7 @@ int Inference::configureInputMetaInfo(MetaMap& inputMetaInfo)
                        mConfig.mDataType = metaInfo->dataType;
                        mConfig.mInputLayerNames.push_back(meta.first);
                }
-       } catch (const std::exceptione) {
+       } catch (const std::exception &e) {
                LOGE("Fail to configure input meta info.");
                return MEDIA_VISION_ERROR_INVALID_OPERATION;
        }
@@ -360,7 +361,6 @@ int Inference::configureInputMetaInfo(MetaMap& inputMetaInfo)
        return ret;
 }
 
-
 int Inference::setInputInfo()
 {
        LOGI("ENTER");
@@ -405,7 +405,8 @@ int Inference::setInputInfo()
        return ret;
 }
 
-int Inference::ConfigureOutputInfo(const std::vector<std::string> names, std::vector<inference_engine_tensor_info> &tensors_info)
+int Inference::ConfigureOutputInfo(const std::vector<std::string> names,
+                                                                  std::vector<inference_engine_tensor_info> &tensors_info)
 {
        LOGI("ENTER");
 
@@ -418,18 +419,18 @@ int Inference::ConfigureOutputInfo(const std::vector<std::string> names, std::ve
                                                                                                         INFERENCE_TENSOR_DATA_TYPE_FLOAT32, 1 };
 
                for (auto &name : mConfig.mOutputLayerNames) {
-                               LOGI("Configure %s layer as output", name.c_str());
-                               property.layers.insert(std::make_pair(name, tensor_info));
+                       LOGI("Configure %s layer as output", name.c_str());
+                       property.layers.insert(std::make_pair(name, tensor_info));
                }
        } else {
                if (mConfig.mOutputLayerNames.size() != tensors_info.size()) {
-                               LOGE("Output layer count is different from tensor info count.");
-                               return MEDIA_VISION_ERROR_INVALID_PARAMETER;
+                       LOGE("Output layer count is different from tensor info count.");
+                       return MEDIA_VISION_ERROR_INVALID_PARAMETER;
                }
 
                for (size_t idx = 0; idx < mConfig.mOutputLayerNames.size(); ++idx) {
-                               LOGI("Configure %s layer as output", mConfig.mOutputLayerNames[idx].c_str());
-                               property.layers.insert(std::make_pair(mConfig.mOutputLayerNames[idx], tensors_info[idx]));
+                       LOGI("Configure %s layer as output", mConfig.mOutputLayerNames[idx].c_str());
+                       property.layers.insert(std::make_pair(mConfig.mOutputLayerNames[idx], tensors_info[idx]));
                }
        }
 
@@ -449,17 +450,17 @@ int Inference::configureOutputMetaInfo()
        mConfig.mOutputLayerNames.clear();
 
        if (!outputMeta._tensor_info.empty()) {
-               for (autoinfo : outputMeta._tensor_info)
+               for (auto &info : outputMeta._tensor_info)
                        mConfig.mOutputLayerNames.push_back(info.first);
        }
 
        inference_engine_tensor_info tensor_info = { std::vector<size_t> { 1 }, INFERENCE_TENSOR_SHAPE_NCHW,
-                                                                                                       INFERENCE_TENSOR_DATA_TYPE_FLOAT32, 1 };
+                                                                                                INFERENCE_TENSOR_DATA_TYPE_FLOAT32, 1 };
        inference_engine_layer_property property;
 
        for (auto &name : mConfig.mOutputLayerNames) {
-                       LOGI("Configure %s layer as output", name.c_str());
-                       property.layers.insert(std::make_pair(name, tensor_info));
+               LOGI("Configure %s layer as output", name.c_str());
+               property.layers.insert(std::make_pair(name, tensor_info));
        }
 
        int ret = setOutputInfo(property);
@@ -469,20 +470,20 @@ int Inference::configureOutputMetaInfo()
        return ret;
 }
 
-int Inference::configureOutputMetaInfo(MetaMapoutputMetaInfo)
+int Inference::configureOutputMetaInfo(MetaMap &outputMetaInfo)
 {
        LOGI("ENTER");
 
        mConfig.mOutputLayerNames.clear();
 
        try {
-               for (autometa : outputMetaInfo) {
-                       std::shared_ptr<MetaInfo>metaInfo = meta.second;
+               for (auto &meta : outputMetaInfo) {
+                       std::shared_ptr<MetaInfo> &metaInfo = meta.second;
 
                        mConfig.mDataType = metaInfo->dataType;
                        mConfig.mOutputLayerNames.push_back(meta.first);
                }
-       } catch (const std::exceptione) {
+       } catch (const std::exception &e) {
                LOGE("Fail to configure output meta info.");
                return MEDIA_VISION_ERROR_INVALID_OPERATION;
        }
@@ -503,7 +504,7 @@ int Inference::configureOutputMetaInfo(MetaMap& outputMetaInfo)
        return ret;
 }
 
-int Inference::setOutputInfo(inference_engine_layer_propertyproperty)
+int Inference::setOutputInfo(inference_engine_layer_property &property)
 {
        LOGI("ENTER");
 
@@ -1215,17 +1216,39 @@ int Inference::GetObjectDetectionResults(ObjectDetectionResults *results)
                objDecoder.decode();
                results->number_of_objects = 0;
 
+               auto &rLoc = results->locations;
+
                for (auto &box : objDecoder.getObjectAll()) {
                        results->indices.push_back(box.index);
                        results->names.push_back(mUserListName[box.index]);
                        results->confidences.push_back(box.score);
-                       results->locations.push_back(
-                                       cv::Rect(static_cast<int>((box.location.x - box.location.width * 0.5f) *
-                                                                                         static_cast<float>(mSourceSize.width)),
-                                                        static_cast<int>((box.location.y - box.location.height * 0.5f) *
-                                                                                         static_cast<float>(mSourceSize.height)),
-                                                        static_cast<int>(box.location.width * static_cast<float>(mSourceSize.width)),
-                                                        static_cast<int>(box.location.height * static_cast<float>(mSourceSize.height))));
+                       auto &bLoc = box.location;
+
+                       auto srcW = static_cast<double>(mSourceSize.width);
+                       auto srcH = static_cast<double>(mSourceSize.height);
+
+                       auto halfW = (bLoc.x - bLoc.width * 0.5f);
+                       auto halfH = (bLoc.y - bLoc.height * 0.5f);
+
+                       if (mMetadata.GetInputMeta().option.begin()->second.resizer == Resizer::LETTERBOX) {
+                               double dstW = static_cast<double>(mMetadata.GetInputMeta().layer.begin()->second.getWidth());
+                               double dstH = static_cast<double>(mMetadata.GetInputMeta().layer.begin()->second.getHeight());
+                               double scale = std::min(1.0, std::min(dstW / srcW, dstH / srcH));
+                               double padSize[] = { (dstW - (scale * srcW)) / 2.0, (dstH - (scale * srcH)) / 2.0 };
+
+                               auto rect =
+                                               cv::Rect(static_cast<int>(std::min(srcW, std::max((halfW * dstW - padSize[0]) / scale, 0.0))),
+                                                                static_cast<int>(std::min(srcH, std::max((halfH * dstH - padSize[1]) / scale, 0.0))),
+                                                                static_cast<int>((bLoc.width * dstW) / scale + padSize[0]),
+                                                                static_cast<int>((bLoc.height * dstH) / scale + padSize[1]));
+
+                               rect.width = (rect.x + rect.width) > srcW ? srcW - rect.x : rect.width;
+                               rect.height = (rect.y + rect.height) > srcH ? srcH - rect.y : rect.height;
+
+                               rLoc.push_back(rect);
+                       } else {
+                               rLoc.push_back(cv::Rect(halfW * srcW, halfH * srcH, bLoc.width * srcW, bLoc.height * srcH));
+                       }
                        results->number_of_objects++;
                }