mv_machine_learning: add postprocess support 87/284287/7
authorInki Dae <inki.dae@samsung.com>
Wed, 9 Nov 2022 08:56:20 +0000 (17:56 +0900)
committerInki Dae <inki.dae@samsung.com>
Thu, 17 Nov 2022 04:41:59 +0000 (13:41 +0900)
[Issue type] new feature

Added postprocess support for objectron model of object detection Task
API group.

What this code refactoring does,
 - implement postprocess design for the new meta file approach.
 - add postprocess support to objectron model using this new design.

Change-Id: Ibcf0d9bfffd5dc411ff6b9f44107823021c7cfa6
Signed-off-by: Inki Dae <inki.dae@samsung.com>
18 files changed:
mv_machine_learning/inference/src/Inference.cpp
mv_machine_learning/meta/include/MetaParser.h
mv_machine_learning/meta/include/Postprocess.h [new file with mode: 0644]
mv_machine_learning/meta/include/PostprocessParser.h [new file with mode: 0644]
mv_machine_learning/meta/include/common.h
mv_machine_learning/meta/include/types.h
mv_machine_learning/meta/src/MetaParser.cpp
mv_machine_learning/meta/src/Postprocess.cpp [new file with mode: 0644]
mv_machine_learning/meta/src/PostprocessParser.cpp [new file with mode: 0644]
mv_machine_learning/meta/src/Preprocess.cpp
mv_machine_learning/object_detection/include/ObjectDetectionParser.h
mv_machine_learning/object_detection/include/object_detection.h
mv_machine_learning/object_detection/include/objectron.h
mv_machine_learning/object_detection/src/ObjectDetectionParser.cpp
mv_machine_learning/object_detection/src/mv_object_detection_3d_open.cpp
mv_machine_learning/object_detection/src/object_detection.cpp
mv_machine_learning/object_detection/src/object_detection_adapter.cpp
mv_machine_learning/object_detection/src/objectron.cpp

index 24c4b1f..4a03950 100644 (file)
@@ -339,8 +339,7 @@ int Inference::configureInputMetaInfo(MetaMap &inputMetaInfo)
                        mConfig.mTensorInfo.width = metaInfo->getWidth();
                        mConfig.mTensorInfo.height = metaInfo->getHeight();
 
-                       auto normalization = std::static_pointer_cast<PreprocessInfoNormal>(
-                                       metaInfo->decoding_data[DecodingType::PREPROCESS_NORMAL]);
+                       auto normalization = std::static_pointer_cast<DecodingNormal>(metaInfo->decodingTypeMap[DecodingType::NORMAL]);
                        if (normalization && normalization->use) {
                                mConfig.mMeanValue = normalization->mean[0];
                                mConfig.mStdValue = normalization->std[0];
index c5cbfd9..6c16328 100644 (file)
@@ -30,11 +30,28 @@ namespace machine_learning {
 
 using MetaMap = std::map<std::string, std::shared_ptr<MetaInfo>>;
 
+/**
+ * @brief A class for parsing a given meta file.
+ */
 class MetaParser
 {
 protected:
        JsonParser *_parser;
+       /**
+        * @brief A meta information map object to input tensors.
+        *        As a key of this object, abstraction tensor name
+        *        - such as tensor1, tensor2, ... - should be used.
+        *        All meta information to each tensor will be filled
+        *        after the completion of load function.
+        */
        MetaMap _inputMetaMap {};
+       /**
+        * @brief A meta information map object to output tensors.
+        *        As a key of this object, abstraction tensor name
+        *        - such as tensor1, tensor2, ... - should be used.
+        *        All meta information to each tensor will be filled
+        *        after the completion of load function.
+        */
        MetaMap _outputMetaMap {};
 
        void parse();
@@ -43,12 +60,22 @@ protected:
                                                 JsonObject *in_obj, std::string key);
        void parsePreprocess(std::shared_ptr<MetaInfo> metaInfo, JsonObject *in_obj);
 
+       /**
+        * @brief parse postprocess node from a given meta file.
+        *        This is a pure virtual funcation so each derived class
+        *        should implement this function properly.
+        *
+        * @param metaInfo A MetaInfo object to output tensor.
+        * @param in_obj A JsonObject object to postprocess node written in the given meta file.
+        */
        virtual void parsePostprocess(std::shared_ptr<MetaInfo> metaInfo, JsonObject *in_obj) = 0;
 
 public:
        MetaParser();
        virtual ~MetaParser();
-
+       /**
+        * @brief Invoke the parsing work to a given meta file.
+        */
        void load(std::string& meta_file_path);
        MetaMap& getInputMetaMap() { return _inputMetaMap; }
        MetaMap& getOutputMetaMap() { return _outputMetaMap; }
diff --git a/mv_machine_learning/meta/include/Postprocess.h b/mv_machine_learning/meta/include/Postprocess.h
new file mode 100644 (file)
index 0000000..fa5b545
--- /dev/null
@@ -0,0 +1,50 @@
+/**
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __POSTPROCESS_H__
+#define __POSTPORCESS_H__
+
+#include "types.h"
+
+namespace mediavision
+{
+namespace machine_learning
+{
+
+class Postprocess
+{
+private:
+       InputSizeInfo _sizeInfo {};
+
+       float getScaledWidth() const;
+       float getScaledHeight() const;
+
+public:
+       Postprocess(InputSizeInfo info) : _sizeInfo(info) { }
+       ~Postprocess() = default;
+
+       size_t getScaledX(float input_x) const;
+       size_t getScaledY(float input_y) const;
+
+       /**
+        * Add new postprocess functions here.
+        */
+};
+
+} /* machine_learning */
+} /* mediavision */
+
+#endif /* __POSTPROCESS_H__ */
diff --git a/mv_machine_learning/meta/include/PostprocessParser.h b/mv_machine_learning/meta/include/PostprocessParser.h
new file mode 100644 (file)
index 0000000..e80c8c7
--- /dev/null
@@ -0,0 +1,53 @@
+/**
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __POSTPROCESS_PARSER_H__
+#define __POSTPORCESS_PARSER_H__
+
+#include <string>
+#include <map>
+#include <memory>
+#include <vector>
+
+#include <dlog.h>
+#include "mv_private.h"
+#include "mv_common.h"
+
+#include "MetaParser.h"
+#include "types.h"
+
+namespace mediavision
+{
+namespace machine_learning
+{
+
+class PostprocessParser
+{
+public:
+       PostprocessParser() = default;
+       ~PostprocessParser() = default;
+
+       void parseBox(std::shared_ptr<MetaInfo> metaInfo, JsonObject *root);
+
+       /**
+        * Add new parsing functions.
+        */
+};
+
+} /* machine_learning */
+} /* mediavision */
+
+#endif /* __POSTPROCESS_PARSER_H__ */
index c35f62f..518ecbf 100644 (file)
 namespace mediavision {
 namespace machine_learning {
 
-std::map<std::string, inference_tensor_shape_type_e> gSupportedShapeType = {
-               { "NCHW", INFERENCE_TENSOR_SHAPE_NCHW },
-               { "NHWC", INFERENCE_TENSOR_SHAPE_NHWC }
-       };
-
-std::map<std::string, mv_inference_data_type_e> gSupportedDataType = {
-       { "FLOAT32", MV_INFERENCE_DATA_FLOAT32 },
-       { "UINT8", MV_INFERENCE_DATA_UINT8 }
-};
-
-std::map<std::string, mv_colorspace_e> gSupportedColorType = {
-       { "RGB888", MEDIA_VISION_COLORSPACE_RGB888 },
-       { "GRAY8", MEDIA_VISION_COLORSPACE_Y800 }
-};
-
 template<typename T, typename U>
 T GetSupportedType(JsonObject *in_obj, std::string key, U& in_map)
 {
@@ -53,7 +38,8 @@ T GetSupportedType(JsonObject *in_obj, std::string key, U& in_map)
                throw mediavision::machine_learning::exception::InvalidParameter("invalid type.");
        }
 
-       LOGI("%s: %d:%s", key.c_str(), supportedType->second, supportedType->first.c_str());
+       LOGI("%s: %d:%s", key.c_str(), static_cast<int>(supportedType->second),
+                                         supportedType->first.c_str());
 
        return supportedType->second;
 }
index b103b30..3d1129b 100644 (file)
@@ -27,63 +27,95 @@ namespace mediavision {
 namespace machine_learning {
 
 enum class DecodingType {
-       PREPROCESS_NORMAL = 1,
-       PREPROCESS_QUAN = 2,
-       POSTPROCESS_BOX = 11,
-       DECODING_INFO_ANCHOR = 31,
-       DECODING_INFO_NMS = 32,
-       DECODING_INFO_ROTATE = 33,
-       DECODING_INFO_ROI = 34
+       NORMAL,
+       QUANTIZATION,
+       BOX,
+       SCORE,
+       LABEL,
+       NUMBER
 };
 
-struct DecodingInfoAnchor {
-
+enum class ScoreType {
+       NORMAL,
+       SIGMOID
 };
 
-struct DecodingInfoNms {
-
+enum class BoxDecodingType {
+       BYPASS,
+       ANCHOR,
+       NMS,
+       ROTATE,
+       ROI,
+       BBOX_3D
 };
 
-struct DecodingInfoRotate {
-
+enum class BoxCoordinateType {
+       RATIO,
+       PIXEL
 };
 
-struct DecodingInfoRoi {
-
+enum class BoxType {
+       LEFTTOP,
+       CENTER
 };
 
-struct DecodingInfo {
-
+struct InputSizeInfo {
+       size_t imageWidth;
+       size_t imageHeight;
+       size_t tensorWidth;
+       size_t tensorHeight;
 };
 
-struct PostprocessInfoBox {
-
+struct DecodingScore {
+       ScoreType type = ScoreType::NORMAL;
 };
 
-struct PostprocessInfo {
-
+struct DecodingBox {
+       BoxType type { BoxType::LEFTTOP };
+       std::vector<unsigned int> order;
+       std::vector<unsigned int> edges;
+       BoxCoordinateType coordinateType { BoxCoordinateType::RATIO };
+       BoxDecodingType decodingType { BoxDecodingType::BYPASS };
+       std::map<BoxDecodingType, std::shared_ptr<void>> decodingInfoMap;
 };
 
-struct PreprocessInfoNormal {
+struct DecodingNormal {
        bool use { false };
        std::vector<double> mean;
        std::vector<double> std;
 };
 
-struct PreprocessInfoQuan {
+struct DecodingQuantization {
        bool use { false };
        std::vector<double> scale;
        std::vector<double> zeropoint;
 };
 
+struct DecodingInfoAnchor {
+
+};
+
+struct DecodingInfoNms {
+
+};
+
+struct DecodingInfoRotate {
+
+};
+
+struct DecodingInfoRoi {
+
+};
+
 struct MetaInfo
 {
        std::string name;
+       std::string tensorName;
        std::vector<int> dims;
        mv_inference_data_type_e dataType {};
        mv_colorspace_e colorSpace {};
        inference_tensor_shape_type_e shapeType {};
-       std::map<DecodingType, std::shared_ptr<void>> decoding_data;
+       std::map<DecodingType, std::shared_ptr<void>> decodingTypeMap;
 
        int getWidth() const
        {
index c43eac9..ab529e9 100644 (file)
@@ -28,6 +28,21 @@ using namespace mediavision::machine_learning::exception;
 namespace mediavision {
 namespace machine_learning {
 
+std::map<std::string, inference_tensor_shape_type_e> gSupportedShapeType = {
+               { "NCHW", INFERENCE_TENSOR_SHAPE_NCHW },
+               { "NHWC", INFERENCE_TENSOR_SHAPE_NHWC }
+       };
+
+std::map<std::string, mv_inference_data_type_e> gSupportedDataType = {
+       { "FLOAT32", MV_INFERENCE_DATA_FLOAT32 },
+       { "UINT8", MV_INFERENCE_DATA_UINT8 }
+};
+
+std::map<std::string, mv_colorspace_e> gSupportedColorType = {
+       { "RGB888", MEDIA_VISION_COLORSPACE_RGB888 },
+       { "GRAY8", MEDIA_VISION_COLORSPACE_Y800 }
+};
+
 MetaParser::MetaParser() : _parser()
 {
        LOGI("ENTER");
@@ -98,6 +113,7 @@ void MetaParser::parseTensorInfo(MetaMap& metaMap,
 
        shared_ptr<MetaInfo> metaInfo = make_shared<MetaInfo>();
 
+       metaInfo->tensorName = key;
        metaInfo->name = static_cast<const char *>(json_object_get_string_member(object, "name"));
        LOGI("layer: %s", metaInfo->name.c_str());
        LOGI("tensor name : %s", json_to_string(node, 0));
@@ -135,8 +151,13 @@ void MetaParser::parseTensorInfo(MetaMap& metaMap,
        if (json_object_has_member(object, "preprocess"))
                parsePreprocess(metaInfo, object);
 
-       if (json_object_has_member(object, "postprocess"))
-               parsePostprocess(metaInfo, object);
+       // if current tensor has postprocess node then parsePostprocess function of
+       // a derived class - which should be implemented in each Task API group directory - of
+       // MetaParser class will be called.
+       if (json_object_has_member(object, "postprocess")) {
+               JsonNode *postprocess_node = json_object_get_member(object, "postprocess");
+               parsePostprocess(metaInfo, json_node_get_object(postprocess_node));
+       }
 
        LOGI("LEAVE");
 }
@@ -182,7 +203,7 @@ void MetaParser::parsePreprocess(shared_ptr<MetaInfo> metaInfo, JsonObject *in_o
        if (json_object_has_member(preprocess_object, "normalization")) {
                JsonNode *node = json_object_get_member(preprocess_object, "normalization");
                JsonObject *object = json_node_get_object(node);
-               shared_ptr<PreprocessInfoNormal> normalization = make_shared<PreprocessInfoNormal>();
+               auto normalization = make_shared<DecodingNormal>();
 
                normalization->use = true;
                LOGI("use normalization");
@@ -202,13 +223,13 @@ void MetaParser::parsePreprocess(shared_ptr<MetaInfo> metaInfo, JsonObject *in_o
                        LOGI("%u: mean[%3.2f], std[%3.2f]", elem, m, s);
                }
 
-               metaInfo->decoding_data[DecodingType::PREPROCESS_NORMAL] = static_pointer_cast<void>(normalization);
+               metaInfo->decodingTypeMap[DecodingType::NORMAL] = static_pointer_cast<void>(normalization);
        }
 
        if (json_object_has_member(preprocess_object, "quantization")) {
                JsonNode *node = json_object_get_member(preprocess_object, "quantization");
                JsonObject *object = json_node_get_object(node);
-               shared_ptr<PreprocessInfoQuan> quantization = make_shared<PreprocessInfoQuan>();
+               shared_ptr<DecodingQuantization> quantization = make_shared<DecodingQuantization>();
 
                quantization->use = true;
                LOGI("use quantization");
@@ -228,7 +249,7 @@ void MetaParser::parsePreprocess(shared_ptr<MetaInfo> metaInfo, JsonObject *in_o
                        LOGI("%u: scale[%3.2f], zeropoint[%3.2f]", elem, s, z);
                }
 
-               metaInfo->decoding_data[DecodingType::PREPROCESS_QUAN] = static_pointer_cast<void>(quantization);
+               metaInfo->decodingTypeMap[DecodingType::QUANTIZATION] = static_pointer_cast<void>(quantization);
        }
 
        LOGI("LEAVE");
diff --git a/mv_machine_learning/meta/src/Postprocess.cpp b/mv_machine_learning/meta/src/Postprocess.cpp
new file mode 100644 (file)
index 0000000..1a67c3f
--- /dev/null
@@ -0,0 +1,48 @@
+/**
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mv_private.h"
+#include "Postprocess.h"
+
+using namespace std;
+
+namespace mediavision
+{
+namespace machine_learning
+{
+
+float Postprocess::getScaledWidth() const
+{
+       return static_cast<float>(_sizeInfo.imageWidth) / static_cast<float>(_sizeInfo.tensorWidth);
+}
+
+float Postprocess::getScaledHeight() const
+{
+       return static_cast<float>(_sizeInfo.imageHeight) / static_cast<float>(_sizeInfo.tensorHeight);
+}
+
+size_t Postprocess::getScaledX(float input_x) const
+{
+       return static_cast<size_t>(input_x * getScaledWidth());
+}
+
+size_t Postprocess::getScaledY(float input_y) const
+{
+       return static_cast<size_t>(input_y * getScaledHeight());
+}
+
+} /* machine_learning */
+} /* mediavision */
diff --git a/mv_machine_learning/meta/src/PostprocessParser.cpp b/mv_machine_learning/meta/src/PostprocessParser.cpp
new file mode 100644 (file)
index 0000000..bbe07a5
--- /dev/null
@@ -0,0 +1,120 @@
+/**
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <unistd.h>
+#include <string>
+#include <queue>
+#include <algorithm>
+#include "machine_learning_exception.h"
+#include "PostprocessParser.h"
+#include "common.h"
+
+using namespace std;
+using namespace mediavision::machine_learning::exception;
+
+namespace mediavision
+{
+namespace machine_learning
+{
+
+std::map<std::string, BoxType> gSupportedBoxTypes = {
+       { "ORIGIN_LEFTTOP", BoxType::LEFTTOP },
+       { "ORIGIN_CENTER", BoxType::CENTER }
+};
+
+std::map<std::string, BoxCoordinateType> gSupportedBoxCoordinateTypes = {
+       { "RATIO", BoxCoordinateType::RATIO },
+       { "PIXEL", BoxCoordinateType::PIXEL }
+};
+
+std::map<std::string, BoxDecodingType> gSupportedBoxDecodingTypes = {
+       { "BYPASS", BoxDecodingType::BYPASS },
+       { "SSD_ANCHOR", BoxDecodingType::ANCHOR },
+       { "YOLO_ANCHOR", BoxDecodingType::ANCHOR },
+       { "3D", BoxDecodingType::BBOX_3D }
+};
+
+/**
+ * Function template.
+ *
+ * void PostprocessParser::parseNodeName(shared_ptr<MetaInfo> metaInfo, JsonObject *root)
+ * {
+ *     if (!json_object_has_member(root, "NodeName"))
+ *         InvalidOperation("member NodeName not exists");
+ *
+ *     shared_ptr<NodeName> decodingNodeName = make_shared<NodeName>();
+ *        JsonObject *object = json_object_get_object_member(root, "NodeName");
+ *
+ *     [Parse nodes for a given NodeName in the meta file here]
+ *
+ *     metaInfo->decodingTypeMap[DecodingType::NodeName] = decodingNodeName;
+ * }
+ */
+
+void PostprocessParser::parseBox(shared_ptr<MetaInfo> metaInfo, JsonObject *root)
+{
+       LOGI("ENTER");
+
+       if (!json_object_has_member(root, "box"))
+               throw InvalidOperation("member box not exists");
+
+       shared_ptr<DecodingBox> decodingBox = make_shared<DecodingBox>();
+       JsonObject *object = json_object_get_object_member(root, "box");
+
+       try {
+               if (json_object_has_member(object, "box_type"))
+                       decodingBox->type = GetSupportedType<BoxType,
+                                               map<string, BoxType>>(object, "box_type", gSupportedBoxTypes);
+
+               if (json_object_has_member(object, "box_coordinate"))
+                       decodingBox->coordinateType = GetSupportedType<BoxCoordinateType,
+                                               map<string, BoxCoordinateType>>(object, "box_coordinate", gSupportedBoxCoordinateTypes);
+
+               if (json_object_has_member(object, "decoding_type"))
+                       decodingBox->decodingType = GetSupportedType<BoxDecodingType,
+                                               map<string, BoxDecodingType>>(object, "decoding_type", gSupportedBoxDecodingTypes);
+       } catch (const std::exception &e) {
+               LOGE("%s", e.what());
+               throw InvalidOperation("Invalid box meta information.");
+       }
+
+       // In case of bypss, we don't need to parse decoding_info.
+       if (decodingBox->decodingType == BoxDecodingType::BYPASS)
+               return;
+
+       if (!json_object_has_member(object, "decoding_info"))
+               throw InvalidOperation("decoding_info node is needed.");
+
+       JsonObject *decoding_info_obj = json_object_get_object_member(object, "decoding_info");
+
+       if (decodingBox->decodingType == BoxDecodingType::BBOX_3D) {
+               JsonArray *array = json_object_get_array_member(decoding_info_obj, "edges");
+               unsigned int elements = json_array_get_length(array);
+
+               for (unsigned int idx = 0; idx < elements; ++idx) {
+                       auto val = static_cast<int>(json_array_get_int_element(array, idx));
+                       decodingBox->edges.push_back(val);
+                       LOGI("%d", val);
+               }
+       }
+
+       metaInfo->decodingTypeMap[DecodingType::BOX] = decodingBox;
+
+       LOGI("LEAVE");
+}
+
+} /* machine_learning */
+} /* mediavision */
index b5ebb0e..7b54412 100644 (file)
@@ -186,7 +186,7 @@ void Preprocess::run(vector<mv_source_h>& mv_srcs, MetaMap& tensorMetaInfo,
                cvDest.convertTo(dest, dest.type());
 
                try {
-                       auto normalization = static_pointer_cast<PreprocessInfoNormal>(metaInfo->decoding_data.at(DecodingType::PREPROCESS_NORMAL));
+                       auto normalization = static_pointer_cast<DecodingNormal>(metaInfo->decodingTypeMap.at(DecodingType::NORMAL));
 
                        if (normalization && normalization->use)
                                normalize(dest, dest, normalization->mean, normalization->std);
@@ -195,7 +195,7 @@ void Preprocess::run(vector<mv_source_h>& mv_srcs, MetaMap& tensorMetaInfo,
                }
 
                try {
-                       auto quantization = static_pointer_cast<PreprocessInfoQuan>(metaInfo->decoding_data.at(DecodingType::PREPROCESS_QUAN));
+                       auto quantization = static_pointer_cast<DecodingQuantization>(metaInfo->decodingTypeMap.at(DecodingType::QUANTIZATION));
 
                        if (quantization && quantization->use)
                                quantize(dest, dest, quantization->scale, quantization->zeropoint);
index 237c4bf..0baae5d 100644 (file)
 #define __OBJECT_DETECTION_PARSER_H__
 
 #include "MetaParser.h"
+#include "PostprocessParser.h"
 
 namespace mediavision {
 namespace machine_learning {
 
 class ObjectDetectionParser : public MetaParser
 {
+private:
+       PostprocessParser _postprocessParser;
+
 protected:
        void parsePostprocess(std::shared_ptr<MetaInfo> meta_info, JsonObject *in_obj) override;
 
index 024285b..ad44026 100644 (file)
@@ -26,6 +26,7 @@
 #include "Inference.h"
 #include "object_detection_type.h"
 #include "ObjectDetectionParser.h"
+#include "Preprocess.h"
 
 namespace mediavision
 {
@@ -38,6 +39,7 @@ protected:
        std::unique_ptr<mediavision::inference::Inference> _inference;
        std::unique_ptr<MediaVision::Common::EngineConfig> _config;
        std::unique_ptr<MetaParser> _parser;
+       Preprocess _preprocess;
        std::string _modelFilePath;
        std::string _modelMetaFilePath;
        int _backendType;
@@ -49,7 +51,7 @@ public:
        virtual void parseMetaFile() = 0;
        void configure();
        void prepare();
-       virtual void preprocess(std::vector<mv_source_h>& mv_srcs);
+       void preprocess(mv_source_h& mv_src);
        void inference(mv_source_h source);
        virtual object_detection_3d_result_s& getResult() = 0;
 };
index f642e4e..1f2ebb0 100644 (file)
@@ -24,7 +24,6 @@
 #include "object_detection.h"
 #include <mv_inference_type.h>
 #include "EngineConfig.h"
-#include "Preprocess.h"
 
 namespace mediavision
 {
@@ -35,13 +34,11 @@ class Objectron : public ObjectDetection
 {
 private:
        object_detection_3d_result_s _result;
-       Preprocess _preprocess;
 
 public:
        Objectron();
        ~Objectron();
        void parseMetaFile() override;
-       void preprocess(std::vector<mv_source_h>& mv_srcs) override;
        object_detection_3d_result_s& getResult() override;
 };
 
index 5e4d27b..8ea451e 100644 (file)
@@ -37,6 +37,12 @@ ObjectDetectionParser::~ObjectDetectionParser()
 void ObjectDetectionParser::parsePostprocess(shared_ptr<MetaInfo> meta_info, JsonObject *in_obj)
 {
        LOGI("ENTER");
+
+       LOGI("tensor name : %s", meta_info->name.c_str());
+
+       if (json_object_has_member(in_obj, "box"))
+               _postprocessParser.parseBox(meta_info, in_obj);
+
        LOGI("LEAVE");
 }
 
index 7d360d0..dcc55a9 100644 (file)
@@ -233,6 +233,9 @@ int mv_object_detection_3d_get_points_open(mv_object_detection_3d_h handle, unsi
 
                *out_x = result.x_vec.data();
                *out_y = result.y_vec.data();
+
+               for (auto& edge : result.edge_index_vec)
+                       LOGI("%d,%d ", edge.start, edge.end);
        } catch (const BaseException &e) {
                LOGE("%s", e.what());
                return e.getError();
index d1ba6f8..9fd706e 100644 (file)
@@ -60,11 +60,15 @@ void ObjectDetection::prepare()
        if (ret != MEDIA_VISION_ERROR_NONE)
                throw InvalidOperation("Fail to load model files.");
 }
-void ObjectDetection::preprocess(vector<mv_source_h>& mv_srcs)
+void ObjectDetection::preprocess(mv_source_h& mv_src)
 {
        LOGI("ENTER");
 
-       preprocess(mv_srcs);
+       TensorBuffer& tensor_buffer_obj = _inference->getInputTensorBuffer();
+       IETensorBuffer &ie_tensor_buffer = tensor_buffer_obj.getIETensorBuffer();
+       vector<mv_source_h> mv_srcs = { mv_src };
+
+       _preprocess.run(mv_srcs, _parser->getInputMetaMap(), ie_tensor_buffer);
 
        LOGI("LEAVE");
 }
@@ -76,7 +80,6 @@ void ObjectDetection::inference(mv_source_h source)
        vector<mv_source_h> sources;
 
        sources.push_back(source);
-       preprocess(sources);
 
        int ret = _inference->Run();
        if (ret != MEDIA_VISION_ERROR_NONE)
index 7d95585..fc16729 100644 (file)
@@ -74,6 +74,7 @@ template<typename T, typename V> void ObjectDetectionAdapter<T, V>::setInput(T &
 template<typename T, typename V> void ObjectDetectionAdapter<T, V>::perform()
 {
        try {
+               _object_detection->preprocess(_source.inference_src);
                _object_detection->inference(_source.inference_src);
        } catch (const BaseException &e) {
                throw e;
index 58e0f07..ad61584 100644 (file)
@@ -21,6 +21,7 @@
 #include "machine_learning_exception.h"
 #include "objectron.h"
 #include "mv_object_detection_3d_config.h"
+#include "Postprocess.h"
 
 using namespace std;
 using namespace mediavision::inference;
@@ -78,18 +79,6 @@ void Objectron::parseMetaFile()
        _parser->load(_modelMetaFilePath);
 }
 
-void Objectron::preprocess(vector<mv_source_h>& mv_srcs)
-{
-       LOGI("ENTER");
-
-       TensorBuffer& tensor_buffer_obj = _inference->getInputTensorBuffer();
-       IETensorBuffer &ie_tensor_buffer = tensor_buffer_obj.getIETensorBuffer();
-
-       _preprocess.run(mv_srcs, _parser->getInputMetaMap(), ie_tensor_buffer);
-
-       LOGI("LEAVE");
-}
-
 object_detection_3d_result_s& Objectron::getResult()
 {
        TensorBuffer& tensor_buffer_obj = _inference->GetOutputTensorBuffer();
@@ -112,17 +101,15 @@ object_detection_3d_result_s& Objectron::getResult()
        if (output_size != 18)
                throw InvalidOperation("Invalid number of points. Number of points should be 18.");
 
-       float x_scale = static_cast<float>(_preprocess.getImageWidth()[0]) /
-                                       static_cast<float>(_inference->getInputWidth());
-       float y_scale = static_cast<float>(_preprocess.getImageHeight()[0]) /
-                                       static_cast<float>(_inference->getInputHeight());
+       Postprocess postprocess({_preprocess.getImageWidth()[0], _preprocess.getImageHeight()[0],
+                                                         _inference->getInputWidth(), _inference->getInputHeight()});
 
        _result.x_vec.clear();
        _result.y_vec.clear();
 
        for (unsigned int idx = 0; idx < output_size; idx += 2) {
-               _result.x_vec.push_back(static_cast<int>(keypoints[idx] * x_scale));
-               _result.y_vec.push_back(static_cast<int>(keypoints[idx + 1] * y_scale));
+               _result.x_vec.push_back(postprocess.getScaledX(keypoints[idx]));
+               _result.y_vec.push_back(postprocess.getScaledY(keypoints[idx + 1]));
        }
 
        _result.number_of_points = output_size / 2;
@@ -137,14 +124,17 @@ object_detection_3d_result_s& Objectron::getResult()
 
        _result.probability = static_cast<unsigned int>(prob[0] * 100);
 
-       const vector<edge_index_s> defaultEdges {
-               {2, 3}, {4, 5}, {6, 7}, {8, 9},
-               {2, 4}, {3, 5}, {6, 8}, {7, 9},
-               {2, 6}, {3, 7}, {4, 8}, {5, 9}
-       };
+       try {
+               auto metaInfo = _parser->getOutputMetaMap()["Identity_1"];
+               auto decodingBox = static_pointer_cast<DecodingBox>(metaInfo->decodingTypeMap[DecodingType::BOX]);
 
-       _result.edge_index_vec = defaultEdges;
-       _result.number_of_edges = defaultEdges.size();
+               for (auto idx = 0; idx < decodingBox->edges.size(); idx += 2)
+                       _result.edge_index_vec.push_back({ decodingBox->edges[idx], decodingBox->edges[idx + 1] });
+
+               _result.number_of_edges = decodingBox->edges.size();
+       } catch (const std::exception& e) {
+               throw InvalidOperation("Invalid meta info access.");
+       }
 
        return _result;
 }