mv_machine_learning: add postprocess support

author Inki Dae <inki.dae@samsung.com>

Wed, 9 Nov 2022 08:56:20 +0000 (17:56 +0900)

committer Inki Dae <inki.dae@samsung.com>

Thu, 17 Nov 2022 04:41:59 +0000 (13:41 +0900)
author Inki Dae <inki.dae@samsung.com>
Wed, 9 Nov 2022 08:56:20 +0000 (17:56 +0900)
committer Inki Dae <inki.dae@samsung.com>
Thu, 17 Nov 2022 04:41:59 +0000 (13:41 +0900)
diff --git a/mv_machine_learning/inference/src/Inference.cpp b/mv_machine_learning/inference/src/Inference.cpp

index 24c4b1f..4a03950 100644 (file)
--- a/mv_machine_learning/inference/src/Inference.cpp
+++ b/mv_machine_learning/inference/src/Inference.cpp
@@ -339,8 +339,7 @@ int Inference::configureInputMetaInfo(MetaMap &inputMetaInfo)
                         mConfig.mTensorInfo.width = metaInfo->getWidth();
                         mConfig.mTensorInfo.height = metaInfo->getHeight();
  
-                       auto normalization = std::static_pointer_cast<PreprocessInfoNormal>(
-                                       metaInfo->decoding_data[DecodingType::PREPROCESS_NORMAL]);
+                       auto normalization = std::static_pointer_cast<DecodingNormal>(metaInfo->decodingTypeMap[DecodingType::NORMAL]);
                         if (normalization && normalization->use) {
                                 mConfig.mMeanValue = normalization->mean[0];
                                 mConfig.mStdValue = normalization->std[0];
diff --git a/mv_machine_learning/meta/include/MetaParser.h b/mv_machine_learning/meta/include/MetaParser.h

index c5cbfd9..6c16328 100644 (file)
--- a/mv_machine_learning/meta/include/MetaParser.h
+++ b/mv_machine_learning/meta/include/MetaParser.h
@@ -30,11 +30,28 @@ namespace machine_learning {
  
  using MetaMap = std::map<std::string, std::shared_ptr<MetaInfo>>;
  
+/**
+ * @brief A class for parsing a given meta file.
+ */
  class MetaParser
  {
  protected:
         JsonParser *_parser;
+       /**
+        * @brief A meta information map object to input tensors.
+        *        As a key of this object, abstraction tensor name
+        *        - such as tensor1, tensor2, ... - should be used.
+        *        All meta information to each tensor will be filled
+        *        after the completion of load function.
+        */
         MetaMap _inputMetaMap {};
+       /**
+        * @brief A meta information map object to output tensors.
+        *        As a key of this object, abstraction tensor name
+        *        - such as tensor1, tensor2, ... - should be used.
+        *        All meta information to each tensor will be filled
+        *        after the completion of load function.
+        */
         MetaMap _outputMetaMap {};
  
         void parse();
@@ -43,12 +60,22 @@ protected:
                                                  JsonObject *in_obj, std::string key);
         void parsePreprocess(std::shared_ptr<MetaInfo> metaInfo, JsonObject *in_obj);
  
+       /**
+        * @brief parse postprocess node from a given meta file.
+        *        This is a pure virtual funcation so each derived class
+        *        should implement this function properly.
+        *
+        * @param metaInfo A MetaInfo object to output tensor.
+        * @param in_obj A JsonObject object to postprocess node written in the given meta file.
+        */
         virtual void parsePostprocess(std::shared_ptr<MetaInfo> metaInfo, JsonObject *in_obj) = 0;
  
  public:
         MetaParser();
         virtual ~MetaParser();
-
+       /**
+        * @brief Invoke the parsing work to a given meta file.
+        */
         void load(std::string& meta_file_path);
         MetaMap& getInputMetaMap() { return _inputMetaMap; }
         MetaMap& getOutputMetaMap() { return _outputMetaMap; }
diff --git a/mv_machine_learning/meta/include/Postprocess.h b/mv_machine_learning/meta/include/Postprocess.h

new file mode 100644 (file)

index 0000000..fa5b545
--- /dev/null
+++ b/mv_machine_learning/meta/include/Postprocess.h
@@ -0,0 +1,50 @@
+/**
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __POSTPROCESS_H__
+#define __POSTPORCESS_H__
+
+#include "types.h"
+
+namespace mediavision
+{
+namespace machine_learning
+{
+
+class Postprocess
+{
+private:
+       InputSizeInfo _sizeInfo {};
+
+       float getScaledWidth() const;
+       float getScaledHeight() const;
+
+public:
+       Postprocess(InputSizeInfo info) : _sizeInfo(info) { }
+       ~Postprocess() = default;
+
+       size_t getScaledX(float input_x) const;
+       size_t getScaledY(float input_y) const;
+
+       /**
+        * Add new postprocess functions here.
+        */
+};
+
+} /* machine_learning */
+} /* mediavision */
+
+#endif /* __POSTPROCESS_H__ */
diff --git a/mv_machine_learning/meta/include/PostprocessParser.h b/mv_machine_learning/meta/include/PostprocessParser.h

new file mode 100644 (file)

index 0000000..e80c8c7
--- /dev/null
+++ b/mv_machine_learning/meta/include/PostprocessParser.h
@@ -0,0 +1,53 @@
+/**
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __POSTPROCESS_PARSER_H__
+#define __POSTPORCESS_PARSER_H__
+
+#include <string>
+#include <map>
+#include <memory>
+#include <vector>
+
+#include <dlog.h>
+#include "mv_private.h"
+#include "mv_common.h"
+
+#include "MetaParser.h"
+#include "types.h"
+
+namespace mediavision
+{
+namespace machine_learning
+{
+
+class PostprocessParser
+{
+public:
+       PostprocessParser() = default;
+       ~PostprocessParser() = default;
+
+       void parseBox(std::shared_ptr<MetaInfo> metaInfo, JsonObject *root);
+
+       /**
+        * Add new parsing functions.
+        */
+};
+
+} /* machine_learning */
+} /* mediavision */
+
+#endif /* __POSTPROCESS_PARSER_H__ */
diff --git a/mv_machine_learning/meta/include/common.h b/mv_machine_learning/meta/include/common.h

index c35f62f..518ecbf 100644 (file)
--- a/mv_machine_learning/meta/include/common.h
+++ b/mv_machine_learning/meta/include/common.h
@@ -30,21 +30,6 @@
  namespace mediavision {
  namespace machine_learning {
  
-std::map<std::string, inference_tensor_shape_type_e> gSupportedShapeType = {
-               { "NCHW", INFERENCE_TENSOR_SHAPE_NCHW },
-               { "NHWC", INFERENCE_TENSOR_SHAPE_NHWC }
-       };
-
-std::map<std::string, mv_inference_data_type_e> gSupportedDataType = {
-       { "FLOAT32", MV_INFERENCE_DATA_FLOAT32 },
-       { "UINT8", MV_INFERENCE_DATA_UINT8 }
-};
-
-std::map<std::string, mv_colorspace_e> gSupportedColorType = {
-       { "RGB888", MEDIA_VISION_COLORSPACE_RGB888 },
-       { "GRAY8", MEDIA_VISION_COLORSPACE_Y800 }
-};
-
  template<typename T, typename U>
  T GetSupportedType(JsonObject *in_obj, std::string key, U& in_map)
  {
@@ -53,7 +38,8 @@ T GetSupportedType(JsonObject *in_obj, std::string key, U& in_map)
                 throw mediavision::machine_learning::exception::InvalidParameter("invalid type.");
         }
  
-       LOGI("%s: %d:%s", key.c_str(), supportedType->second, supportedType->first.c_str());
+       LOGI("%s: %d:%s", key.c_str(), static_cast<int>(supportedType->second),
+                                         supportedType->first.c_str());
  
         return supportedType->second;
  }
diff --git a/mv_machine_learning/meta/include/types.h b/mv_machine_learning/meta/include/types.h

index b103b30..3d1129b 100644 (file)
--- a/mv_machine_learning/meta/include/types.h
+++ b/mv_machine_learning/meta/include/types.h
@@ -27,63 +27,95 @@ namespace mediavision {
  namespace machine_learning {
  
  enum class DecodingType {
-       PREPROCESS_NORMAL = 1,
-       PREPROCESS_QUAN = 2,
-       POSTPROCESS_BOX = 11,
-       DECODING_INFO_ANCHOR = 31,
-       DECODING_INFO_NMS = 32,
-       DECODING_INFO_ROTATE = 33,
-       DECODING_INFO_ROI = 34
+       NORMAL,
+       QUANTIZATION,
+       BOX,
+       SCORE,
+       LABEL,
+       NUMBER
  };
  
-struct DecodingInfoAnchor {
-
+enum class ScoreType {
+       NORMAL,
+       SIGMOID
  };
  
-struct DecodingInfoNms {
-
+enum class BoxDecodingType {
+       BYPASS,
+       ANCHOR,
+       NMS,
+       ROTATE,
+       ROI,
+       BBOX_3D
  };
  
-struct DecodingInfoRotate {
-
+enum class BoxCoordinateType {
+       RATIO,
+       PIXEL
  };
  
-struct DecodingInfoRoi {
-
+enum class BoxType {
+       LEFTTOP,
+       CENTER
  };
  
-struct DecodingInfo {
-
+struct InputSizeInfo {
+       size_t imageWidth;
+       size_t imageHeight;
+       size_t tensorWidth;
+       size_t tensorHeight;
  };
  
-struct PostprocessInfoBox {
-
+struct DecodingScore {
+       ScoreType type = ScoreType::NORMAL;
  };
  
-struct PostprocessInfo {
-
+struct DecodingBox {
+       BoxType type { BoxType::LEFTTOP };
+       std::vector<unsigned int> order;
+       std::vector<unsigned int> edges;
+       BoxCoordinateType coordinateType { BoxCoordinateType::RATIO };
+       BoxDecodingType decodingType { BoxDecodingType::BYPASS };
+       std::map<BoxDecodingType, std::shared_ptr<void>> decodingInfoMap;
  };
  
-struct PreprocessInfoNormal {
+struct DecodingNormal {
         bool use { false };
         std::vector<double> mean;
         std::vector<double> std;
  };
  
-struct PreprocessInfoQuan {
+struct DecodingQuantization {
         bool use { false };
         std::vector<double> scale;
         std::vector<double> zeropoint;
  };
  
+struct DecodingInfoAnchor {
+
+};
+
+struct DecodingInfoNms {
+
+};
+
+struct DecodingInfoRotate {
+
+};
+
+struct DecodingInfoRoi {
+
+};
+
  struct MetaInfo
  {
         std::string name;
+       std::string tensorName;
         std::vector<int> dims;
         mv_inference_data_type_e dataType {};
         mv_colorspace_e colorSpace {};
         inference_tensor_shape_type_e shapeType {};
-       std::map<DecodingType, std::shared_ptr<void>> decoding_data;
+       std::map<DecodingType, std::shared_ptr<void>> decodingTypeMap;
  
         int getWidth() const
         {
diff --git a/mv_machine_learning/meta/src/MetaParser.cpp b/mv_machine_learning/meta/src/MetaParser.cpp

index c43eac9..ab529e9 100644 (file)
--- a/mv_machine_learning/meta/src/MetaParser.cpp
+++ b/mv_machine_learning/meta/src/MetaParser.cpp
@@ -28,6 +28,21 @@ using namespace mediavision::machine_learning::exception;
  namespace mediavision {
  namespace machine_learning {
  
+std::map<std::string, inference_tensor_shape_type_e> gSupportedShapeType = {
+               { "NCHW", INFERENCE_TENSOR_SHAPE_NCHW },
+               { "NHWC", INFERENCE_TENSOR_SHAPE_NHWC }
+       };
+
+std::map<std::string, mv_inference_data_type_e> gSupportedDataType = {
+       { "FLOAT32", MV_INFERENCE_DATA_FLOAT32 },
+       { "UINT8", MV_INFERENCE_DATA_UINT8 }
+};
+
+std::map<std::string, mv_colorspace_e> gSupportedColorType = {
+       { "RGB888", MEDIA_VISION_COLORSPACE_RGB888 },
+       { "GRAY8", MEDIA_VISION_COLORSPACE_Y800 }
+};
+
  MetaParser::MetaParser() : _parser()
  {
         LOGI("ENTER");
@@ -98,6 +113,7 @@ void MetaParser::parseTensorInfo(MetaMap& metaMap,
  
         shared_ptr<MetaInfo> metaInfo = make_shared<MetaInfo>();
  
+       metaInfo->tensorName = key;
         metaInfo->name = static_cast<const char *>(json_object_get_string_member(object, "name"));
         LOGI("layer: %s", metaInfo->name.c_str());
         LOGI("tensor name : %s", json_to_string(node, 0));
@@ -135,8 +151,13 @@ void MetaParser::parseTensorInfo(MetaMap& metaMap,
         if (json_object_has_member(object, "preprocess"))
                 parsePreprocess(metaInfo, object);
  
-       if (json_object_has_member(object, "postprocess"))
-               parsePostprocess(metaInfo, object);
+       // if current tensor has postprocess node then parsePostprocess function of
+       // a derived class - which should be implemented in each Task API group directory - of
+       // MetaParser class will be called.
+       if (json_object_has_member(object, "postprocess")) {
+               JsonNode *postprocess_node = json_object_get_member(object, "postprocess");
+               parsePostprocess(metaInfo, json_node_get_object(postprocess_node));
+       }
  
         LOGI("LEAVE");
  }
@@ -182,7 +203,7 @@ void MetaParser::parsePreprocess(shared_ptr<MetaInfo> metaInfo, JsonObject *in_o
         if (json_object_has_member(preprocess_object, "normalization")) {
                 JsonNode *node = json_object_get_member(preprocess_object, "normalization");
                 JsonObject *object = json_node_get_object(node);
-               shared_ptr<PreprocessInfoNormal> normalization = make_shared<PreprocessInfoNormal>();
+               auto normalization = make_shared<DecodingNormal>();
  
                 normalization->use = true;
                 LOGI("use normalization");
@@ -202,13 +223,13 @@ void MetaParser::parsePreprocess(shared_ptr<MetaInfo> metaInfo, JsonObject *in_o
                         LOGI("%u: mean[%3.2f], std[%3.2f]", elem, m, s);
                 }
  
-               metaInfo->decoding_data[DecodingType::PREPROCESS_NORMAL] = static_pointer_cast<void>(normalization);
+               metaInfo->decodingTypeMap[DecodingType::NORMAL] = static_pointer_cast<void>(normalization);
         }
  
         if (json_object_has_member(preprocess_object, "quantization")) {
                 JsonNode *node = json_object_get_member(preprocess_object, "quantization");
                 JsonObject *object = json_node_get_object(node);
-               shared_ptr<PreprocessInfoQuan> quantization = make_shared<PreprocessInfoQuan>();
+               shared_ptr<DecodingQuantization> quantization = make_shared<DecodingQuantization>();
  
                 quantization->use = true;
                 LOGI("use quantization");
@@ -228,7 +249,7 @@ void MetaParser::parsePreprocess(shared_ptr<MetaInfo> metaInfo, JsonObject *in_o
                         LOGI("%u: scale[%3.2f], zeropoint[%3.2f]", elem, s, z);
                 }
  
-               metaInfo->decoding_data[DecodingType::PREPROCESS_QUAN] = static_pointer_cast<void>(quantization);
+               metaInfo->decodingTypeMap[DecodingType::QUANTIZATION] = static_pointer_cast<void>(quantization);
         }
  
         LOGI("LEAVE");
diff --git a/mv_machine_learning/meta/src/Postprocess.cpp b/mv_machine_learning/meta/src/Postprocess.cpp

new file mode 100644 (file)

index 0000000..1a67c3f
--- /dev/null
+++ b/mv_machine_learning/meta/src/Postprocess.cpp
@@ -0,0 +1,48 @@
+/**
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mv_private.h"
+#include "Postprocess.h"
+
+using namespace std;
+
+namespace mediavision
+{
+namespace machine_learning
+{
+
+float Postprocess::getScaledWidth() const
+{
+       return static_cast<float>(_sizeInfo.imageWidth) / static_cast<float>(_sizeInfo.tensorWidth);
+}
+
+float Postprocess::getScaledHeight() const
+{
+       return static_cast<float>(_sizeInfo.imageHeight) / static_cast<float>(_sizeInfo.tensorHeight);
+}
+
+size_t Postprocess::getScaledX(float input_x) const
+{
+       return static_cast<size_t>(input_x * getScaledWidth());
+}
+
+size_t Postprocess::getScaledY(float input_y) const
+{
+       return static_cast<size_t>(input_y * getScaledHeight());
+}
+
+} /* machine_learning */
+} /* mediavision */
diff --git a/mv_machine_learning/meta/src/PostprocessParser.cpp b/mv_machine_learning/meta/src/PostprocessParser.cpp

new file mode 100644 (file)

index 0000000..bbe07a5
--- /dev/null
+++ b/mv_machine_learning/meta/src/PostprocessParser.cpp
@@ -0,0 +1,120 @@
+/**
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <unistd.h>
+#include <string>
+#include <queue>
+#include <algorithm>
+#include "machine_learning_exception.h"
+#include "PostprocessParser.h"
+#include "common.h"
+
+using namespace std;
+using namespace mediavision::machine_learning::exception;
+
+namespace mediavision
+{
+namespace machine_learning
+{
+
+std::map<std::string, BoxType> gSupportedBoxTypes = {
+       { "ORIGIN_LEFTTOP", BoxType::LEFTTOP },
+       { "ORIGIN_CENTER", BoxType::CENTER }
+};
+
+std::map<std::string, BoxCoordinateType> gSupportedBoxCoordinateTypes = {
+       { "RATIO", BoxCoordinateType::RATIO },
+       { "PIXEL", BoxCoordinateType::PIXEL }
+};
+
+std::map<std::string, BoxDecodingType> gSupportedBoxDecodingTypes = {
+       { "BYPASS", BoxDecodingType::BYPASS },
+       { "SSD_ANCHOR", BoxDecodingType::ANCHOR },
+       { "YOLO_ANCHOR", BoxDecodingType::ANCHOR },
+       { "3D", BoxDecodingType::BBOX_3D }
+};
+
+/**
+ * Function template.
+ *
+ * void PostprocessParser::parseNodeName(shared_ptr<MetaInfo> metaInfo, JsonObject *root)
+ * {
+ *     if (!json_object_has_member(root, "NodeName"))
+ *         InvalidOperation("member NodeName not exists");
+ *
+ *     shared_ptr<NodeName> decodingNodeName = make_shared<NodeName>();
+ *        JsonObject *object = json_object_get_object_member(root, "NodeName");
+ *
+ *     [Parse nodes for a given NodeName in the meta file here]
+ *
+ *     metaInfo->decodingTypeMap[DecodingType::NodeName] = decodingNodeName;
+ * }
+ */
+
+void PostprocessParser::parseBox(shared_ptr<MetaInfo> metaInfo, JsonObject *root)
+{
+       LOGI("ENTER");
+
+       if (!json_object_has_member(root, "box"))
+               throw InvalidOperation("member box not exists");
+
+       shared_ptr<DecodingBox> decodingBox = make_shared<DecodingBox>();
+       JsonObject *object = json_object_get_object_member(root, "box");
+
+       try {
+               if (json_object_has_member(object, "box_type"))
+                       decodingBox->type = GetSupportedType<BoxType,
+                                               map<string, BoxType>>(object, "box_type", gSupportedBoxTypes);
+
+               if (json_object_has_member(object, "box_coordinate"))
+                       decodingBox->coordinateType = GetSupportedType<BoxCoordinateType,
+                                               map<string, BoxCoordinateType>>(object, "box_coordinate", gSupportedBoxCoordinateTypes);
+
+               if (json_object_has_member(object, "decoding_type"))
+                       decodingBox->decodingType = GetSupportedType<BoxDecodingType,
+                                               map<string, BoxDecodingType>>(object, "decoding_type", gSupportedBoxDecodingTypes);
+       } catch (const std::exception &e) {
+               LOGE("%s", e.what());
+               throw InvalidOperation("Invalid box meta information.");
+       }
+
+       // In case of bypss, we don't need to parse decoding_info.
+       if (decodingBox->decodingType == BoxDecodingType::BYPASS)
+               return;
+
+       if (!json_object_has_member(object, "decoding_info"))
+               throw InvalidOperation("decoding_info node is needed.");
+
+       JsonObject *decoding_info_obj = json_object_get_object_member(object, "decoding_info");
+
+       if (decodingBox->decodingType == BoxDecodingType::BBOX_3D) {
+               JsonArray *array = json_object_get_array_member(decoding_info_obj, "edges");
+               unsigned int elements = json_array_get_length(array);
+
+               for (unsigned int idx = 0; idx < elements; ++idx) {
+                       auto val = static_cast<int>(json_array_get_int_element(array, idx));
+                       decodingBox->edges.push_back(val);
+                       LOGI("%d", val);
+               }
+       }
+
+       metaInfo->decodingTypeMap[DecodingType::BOX] = decodingBox;
+
+       LOGI("LEAVE");
+}
+
+} /* machine_learning */
+} /* mediavision */
diff --git a/mv_machine_learning/meta/src/Preprocess.cpp b/mv_machine_learning/meta/src/Preprocess.cpp

index b5ebb0e..7b54412 100644 (file)
--- a/mv_machine_learning/meta/src/Preprocess.cpp
+++ b/mv_machine_learning/meta/src/Preprocess.cpp
@@ -186,7 +186,7 @@ void Preprocess::run(vector<mv_source_h>& mv_srcs, MetaMap& tensorMetaInfo,
                 cvDest.convertTo(dest, dest.type());
  
                 try {
-                       auto normalization = static_pointer_cast<PreprocessInfoNormal>(metaInfo->decoding_data.at(DecodingType::PREPROCESS_NORMAL));
+                       auto normalization = static_pointer_cast<DecodingNormal>(metaInfo->decodingTypeMap.at(DecodingType::NORMAL));
  
                         if (normalization && normalization->use)
                                 normalize(dest, dest, normalization->mean, normalization->std);
@@ -195,7 +195,7 @@ void Preprocess::run(vector<mv_source_h>& mv_srcs, MetaMap& tensorMetaInfo,
                 }
  
                 try {
-                       auto quantization = static_pointer_cast<PreprocessInfoQuan>(metaInfo->decoding_data.at(DecodingType::PREPROCESS_QUAN));
+                       auto quantization = static_pointer_cast<DecodingQuantization>(metaInfo->decodingTypeMap.at(DecodingType::QUANTIZATION));
  
                         if (quantization && quantization->use)
                                 quantize(dest, dest, quantization->scale, quantization->zeropoint);
diff --git a/mv_machine_learning/object_detection/include/ObjectDetectionParser.h b/mv_machine_learning/object_detection/include/ObjectDetectionParser.h

index 237c4bf..0baae5d 100644 (file)
--- a/mv_machine_learning/object_detection/include/ObjectDetectionParser.h
+++ b/mv_machine_learning/object_detection/include/ObjectDetectionParser.h
@@ -18,12 +18,16 @@
  #define __OBJECT_DETECTION_PARSER_H__
  
  #include "MetaParser.h"
+#include "PostprocessParser.h"
  
  namespace mediavision {
  namespace machine_learning {
  
  class ObjectDetectionParser : public MetaParser
  {
+private:
+       PostprocessParser _postprocessParser;
+
  protected:
         void parsePostprocess(std::shared_ptr<MetaInfo> meta_info, JsonObject *in_obj) override;
  
diff --git a/mv_machine_learning/object_detection/include/object_detection.h b/mv_machine_learning/object_detection/include/object_detection.h

index 024285b..ad44026 100644 (file)
--- a/mv_machine_learning/object_detection/include/object_detection.h
+++ b/mv_machine_learning/object_detection/include/object_detection.h
@@ -26,6 +26,7 @@
  #include "Inference.h"
  #include "object_detection_type.h"
  #include "ObjectDetectionParser.h"
+#include "Preprocess.h"
  
  namespace mediavision
  {
@@ -38,6 +39,7 @@ protected:
         std::unique_ptr<mediavision::inference::Inference> _inference;
         std::unique_ptr<MediaVision::Common::EngineConfig> _config;
         std::unique_ptr<MetaParser> _parser;
+       Preprocess _preprocess;
         std::string _modelFilePath;
         std::string _modelMetaFilePath;
         int _backendType;
@@ -49,7 +51,7 @@ public:
         virtual void parseMetaFile() = 0;
         void configure();
         void prepare();
-       virtual void preprocess(std::vector<mv_source_h>& mv_srcs);
+       void preprocess(mv_source_h& mv_src);
         void inference(mv_source_h source);
         virtual object_detection_3d_result_s& getResult() = 0;
  };
diff --git a/mv_machine_learning/object_detection/include/objectron.h b/mv_machine_learning/object_detection/include/objectron.h

index f642e4e..1f2ebb0 100644 (file)
--- a/mv_machine_learning/object_detection/include/objectron.h
+++ b/mv_machine_learning/object_detection/include/objectron.h
@@ -24,7 +24,6 @@
  #include "object_detection.h"
  #include <mv_inference_type.h>
  #include "EngineConfig.h"
-#include "Preprocess.h"
  
  namespace mediavision
  {
@@ -35,13 +34,11 @@ class Objectron : public ObjectDetection
  {
  private:
         object_detection_3d_result_s _result;
-       Preprocess _preprocess;
  
  public:
         Objectron();
         ~Objectron();
         void parseMetaFile() override;
-       void preprocess(std::vector<mv_source_h>& mv_srcs) override;
         object_detection_3d_result_s& getResult() override;
  };
  
diff --git a/mv_machine_learning/object_detection/src/ObjectDetectionParser.cpp b/mv_machine_learning/object_detection/src/ObjectDetectionParser.cpp

index 5e4d27b..8ea451e 100644 (file)
--- a/mv_machine_learning/object_detection/src/ObjectDetectionParser.cpp
+++ b/mv_machine_learning/object_detection/src/ObjectDetectionParser.cpp
@@ -37,6 +37,12 @@ ObjectDetectionParser::~ObjectDetectionParser()
  void ObjectDetectionParser::parsePostprocess(shared_ptr<MetaInfo> meta_info, JsonObject *in_obj)
  {
         LOGI("ENTER");
+
+       LOGI("tensor name : %s", meta_info->name.c_str());
+
+       if (json_object_has_member(in_obj, "box"))
+               _postprocessParser.parseBox(meta_info, in_obj);
+
         LOGI("LEAVE");
  }
  
diff --git a/mv_machine_learning/object_detection/src/mv_object_detection_3d_open.cpp b/mv_machine_learning/object_detection/src/mv_object_detection_3d_open.cpp

index 7d360d0..dcc55a9 100644 (file)
--- a/mv_machine_learning/object_detection/src/mv_object_detection_3d_open.cpp
+++ b/mv_machine_learning/object_detection/src/mv_object_detection_3d_open.cpp
@@ -233,6 +233,9 @@ int mv_object_detection_3d_get_points_open(mv_object_detection_3d_h handle, unsi
  
                 *out_x = result.x_vec.data();
                 *out_y = result.y_vec.data();
+
+               for (auto& edge : result.edge_index_vec)
+                       LOGI("%d,%d ", edge.start, edge.end);
         } catch (const BaseException &e) {
                 LOGE("%s", e.what());
                 return e.getError();
diff --git a/mv_machine_learning/object_detection/src/object_detection.cpp b/mv_machine_learning/object_detection/src/object_detection.cpp

index d1ba6f8..9fd706e 100644 (file)
--- a/mv_machine_learning/object_detection/src/object_detection.cpp
+++ b/mv_machine_learning/object_detection/src/object_detection.cpp
@@ -60,11 +60,15 @@ void ObjectDetection::prepare()
         if (ret != MEDIA_VISION_ERROR_NONE)
                 throw InvalidOperation("Fail to load model files.");
  }
-void ObjectDetection::preprocess(vector<mv_source_h>& mv_srcs)
+void ObjectDetection::preprocess(mv_source_h& mv_src)
  {
         LOGI("ENTER");
  
-       preprocess(mv_srcs);
+       TensorBuffer& tensor_buffer_obj = _inference->getInputTensorBuffer();
+       IETensorBuffer &ie_tensor_buffer = tensor_buffer_obj.getIETensorBuffer();
+       vector<mv_source_h> mv_srcs = { mv_src };
+
+       _preprocess.run(mv_srcs, _parser->getInputMetaMap(), ie_tensor_buffer);
  
         LOGI("LEAVE");
  }
@@ -76,7 +80,6 @@ void ObjectDetection::inference(mv_source_h source)
         vector<mv_source_h> sources;
  
         sources.push_back(source);
-       preprocess(sources);
  
         int ret = _inference->Run();
         if (ret != MEDIA_VISION_ERROR_NONE)
diff --git a/mv_machine_learning/object_detection/src/object_detection_adapter.cpp b/mv_machine_learning/object_detection/src/object_detection_adapter.cpp

index 7d95585..fc16729 100644 (file)
--- a/mv_machine_learning/object_detection/src/object_detection_adapter.cpp
+++ b/mv_machine_learning/object_detection/src/object_detection_adapter.cpp
@@ -74,6 +74,7 @@ template<typename T, typename V> void ObjectDetectionAdapter<T, V>::setInput(T &
  template<typename T, typename V> void ObjectDetectionAdapter<T, V>::perform()
  {
         try {
+               _object_detection->preprocess(_source.inference_src);
                 _object_detection->inference(_source.inference_src);
         } catch (const BaseException &e) {
                 throw e;
diff --git a/mv_machine_learning/object_detection/src/objectron.cpp b/mv_machine_learning/object_detection/src/objectron.cpp

index 58e0f07..ad61584 100644 (file)
--- a/mv_machine_learning/object_detection/src/objectron.cpp
+++ b/mv_machine_learning/object_detection/src/objectron.cpp
@@ -21,6 +21,7 @@
  #include "machine_learning_exception.h"
  #include "objectron.h"
  #include "mv_object_detection_3d_config.h"
+#include "Postprocess.h"
  
  using namespace std;
  using namespace mediavision::inference;
@@ -78,18 +79,6 @@ void Objectron::parseMetaFile()
         _parser->load(_modelMetaFilePath);
  }
  
-void Objectron::preprocess(vector<mv_source_h>& mv_srcs)
-{
-       LOGI("ENTER");
-
-       TensorBuffer& tensor_buffer_obj = _inference->getInputTensorBuffer();
-       IETensorBuffer &ie_tensor_buffer = tensor_buffer_obj.getIETensorBuffer();
-
-       _preprocess.run(mv_srcs, _parser->getInputMetaMap(), ie_tensor_buffer);
-
-       LOGI("LEAVE");
-}
-
  object_detection_3d_result_s& Objectron::getResult()
  {
         TensorBuffer& tensor_buffer_obj = _inference->GetOutputTensorBuffer();
@@ -112,17 +101,15 @@ object_detection_3d_result_s& Objectron::getResult()
         if (output_size != 18)
                 throw InvalidOperation("Invalid number of points. Number of points should be 18.");
  
-       float x_scale = static_cast<float>(_preprocess.getImageWidth()[0]) /
-                                       static_cast<float>(_inference->getInputWidth());
-       float y_scale = static_cast<float>(_preprocess.getImageHeight()[0]) /
-                                       static_cast<float>(_inference->getInputHeight());
+       Postprocess postprocess({_preprocess.getImageWidth()[0], _preprocess.getImageHeight()[0],
+                                                         _inference->getInputWidth(), _inference->getInputHeight()});
  
         _result.x_vec.clear();
         _result.y_vec.clear();
  
         for (unsigned int idx = 0; idx < output_size; idx += 2) {
-               _result.x_vec.push_back(static_cast<int>(keypoints[idx] * x_scale));
-               _result.y_vec.push_back(static_cast<int>(keypoints[idx + 1] * y_scale));
+               _result.x_vec.push_back(postprocess.getScaledX(keypoints[idx]));
+               _result.y_vec.push_back(postprocess.getScaledY(keypoints[idx + 1]));
         }
  
         _result.number_of_points = output_size / 2;
@@ -137,14 +124,17 @@ object_detection_3d_result_s& Objectron::getResult()
  
         _result.probability = static_cast<unsigned int>(prob[0] * 100);
  
-       const vector<edge_index_s> defaultEdges {
-               {2, 3}, {4, 5}, {6, 7}, {8, 9},
-               {2, 4}, {3, 5}, {6, 8}, {7, 9},
-               {2, 6}, {3, 7}, {4, 8}, {5, 9}
-       };
+       try {
+               auto metaInfo = _parser->getOutputMetaMap()["Identity_1"];
+               auto decodingBox = static_pointer_cast<DecodingBox>(metaInfo->decodingTypeMap[DecodingType::BOX]);
  
-       _result.edge_index_vec = defaultEdges;
-       _result.number_of_edges = defaultEdges.size();
+               for (auto idx = 0; idx < decodingBox->edges.size(); idx += 2)
+                       _result.edge_index_vec.push_back({ decodingBox->edges[idx], decodingBox->edges[idx + 1] });
+
+               _result.number_of_edges = decodingBox->edges.size();
+       } catch (const std::exception& e) {
+               throw InvalidOperation("Invalid meta info access.");
+       }
  
         return _result;
  }
author	Inki Dae <inki.dae@samsung.com>
	Wed, 9 Nov 2022 08:56:20 +0000 (17:56 +0900)
committer	Inki Dae <inki.dae@samsung.com>
	Thu, 17 Nov 2022 04:41:59 +0000 (13:41 +0900)
mv_machine_learning/inference/src/Inference.cpp		patch \| blob \| history
mv_machine_learning/meta/include/MetaParser.h		patch \| blob \| history
mv_machine_learning/meta/include/Postprocess.h	[new file with mode: 0644]	patch \| blob
mv_machine_learning/meta/include/PostprocessParser.h	[new file with mode: 0644]	patch \| blob
mv_machine_learning/meta/include/common.h		patch \| blob \| history
mv_machine_learning/meta/include/types.h		patch \| blob \| history
mv_machine_learning/meta/src/MetaParser.cpp		patch \| blob \| history
mv_machine_learning/meta/src/Postprocess.cpp	[new file with mode: 0644]	patch \| blob
mv_machine_learning/meta/src/PostprocessParser.cpp	[new file with mode: 0644]	patch \| blob
mv_machine_learning/meta/src/Preprocess.cpp		patch \| blob \| history
mv_machine_learning/object_detection/include/ObjectDetectionParser.h		patch \| blob \| history
mv_machine_learning/object_detection/include/object_detection.h		patch \| blob \| history
mv_machine_learning/object_detection/include/objectron.h		patch \| blob \| history
mv_machine_learning/object_detection/src/ObjectDetectionParser.cpp		patch \| blob \| history
mv_machine_learning/object_detection/src/mv_object_detection_3d_open.cpp		patch \| blob \| history
mv_machine_learning/object_detection/src/object_detection.cpp		patch \| blob \| history
mv_machine_learning/object_detection/src/object_detection_adapter.cpp		patch \| blob \| history
mv_machine_learning/object_detection/src/objectron.cpp		patch \| blob \| history