mv_machine_learning: drop meta approach dependency from preprocess
authorInki Dae <inki.dae@samsung.com>
Thu, 20 Jul 2023 06:12:33 +0000 (15:12 +0900)
committerKwanghoon Son <k.son@samsung.com>
Mon, 7 Aug 2023 04:25:06 +0000 (13:25 +0900)
[Issue type] : code cleanup

Drop meta approach dependency from Preprocess class.

As for this, this patch introduces a new member function of the Preprocess
class, setConfig. This function sets user-desired configuration to
the Preprocess object so that preprocessing operation can be performed with
the configuration. This is required for external plugin such as
ObjectDetectionExternal class to use the Preprocess class to preprocess
a given input data because the external plugin can use its own approach
instead of Mediavision's one.

Change-Id: I3600ed06335f196da3885daca0c2da85c931cd6d
Signed-off-by: Inki Dae <inki.dae@samsung.com>
mv_machine_learning/face_recognition/src/facenet.cpp
mv_machine_learning/image_classification/src/image_classification.cpp
mv_machine_learning/landmark_detection/src/landmark_detection.cpp
mv_machine_learning/meta/include/Preprocess.h
mv_machine_learning/meta/src/Preprocess.cpp
mv_machine_learning/object_detection/src/object_detection.cpp
mv_machine_learning/object_detection_3d/src/object_detection_3d.cpp

index 7551b4ca7e833cb03c52ef83e650ae15bc5c6b6f..ed85b72e244e7036784d57e56b38e562bd6f82b5 100644 (file)
@@ -131,7 +131,30 @@ void Facenet::preprocess(mv_source_h &mv_src, shared_ptr<MetaInfo> metaInfo, vec
 {
        LOGI("ENTER");
 
-       _preprocess.run<T>(mv_src, metaInfo, inputVector);
+       PreprocessConfig config = { false,
+                                                               metaInfo->colorSpace,
+                                                               metaInfo->dataType,
+                                                               metaInfo->getChannel(),
+                                                               metaInfo->getWidth(),
+                                                               metaInfo->getHeight() };
+
+       auto normalization = static_pointer_cast<DecodingNormal>(metaInfo->decodingTypeMap.at(DecodingType::NORMAL));
+       if (normalization) {
+               config.normalize = normalization->use;
+               config.mean = normalization->mean;
+               config.std = normalization->std;
+       }
+
+       auto quantization =
+                       static_pointer_cast<DecodingQuantization>(metaInfo->decodingTypeMap.at(DecodingType::QUANTIZATION));
+       if (quantization) {
+               config.quantize = quantization->use;
+               config.scale = quantization->scale;
+               config.zeropoint = quantization->zeropoint;
+       }
+
+       _preprocess.setConfig(config);
+       _preprocess.run<T>(mv_src, inputVector);
 
        LOGI("LEAVE");
 }
index 3638fcdc128168b6ec08470a30c64b3bbbf4438e..21544d7c0abe3c59b5a32bf55565e7af50a272e8 100644 (file)
@@ -278,7 +278,30 @@ void ImageClassification::preprocess(mv_source_h &mv_src, shared_ptr<MetaInfo> m
 {
        LOGI("ENTER");
 
-       _preprocess.run<T>(mv_src, metaInfo, inputVector);
+       PreprocessConfig config = { false,
+                                                               metaInfo->colorSpace,
+                                                               metaInfo->dataType,
+                                                               metaInfo->getChannel(),
+                                                               metaInfo->getWidth(),
+                                                               metaInfo->getHeight() };
+
+       auto normalization = static_pointer_cast<DecodingNormal>(metaInfo->decodingTypeMap.at(DecodingType::NORMAL));
+       if (normalization) {
+               config.normalize = normalization->use;
+               config.mean = normalization->mean;
+               config.std = normalization->std;
+       }
+
+       auto quantization =
+                       static_pointer_cast<DecodingQuantization>(metaInfo->decodingTypeMap.at(DecodingType::QUANTIZATION));
+       if (quantization) {
+               config.quantize = quantization->use;
+               config.scale = quantization->scale;
+               config.zeropoint = quantization->zeropoint;
+       }
+
+       _preprocess.setConfig(config);
+       _preprocess.run<T>(mv_src, inputVector);
 
        LOGI("LEAVE");
 }
index bb8377082eed1a6f9cff64141a8e2e118acc5da2..3aa4d20ca657a57d9d617a0e2d1c233117f6febf 100644 (file)
@@ -283,7 +283,30 @@ void LandmarkDetection::preprocess(mv_source_h &mv_src, shared_ptr<MetaInfo> met
 {
        LOGI("ENTER");
 
-       _preprocess.run<T>(mv_src, metaInfo, inputVector);
+       PreprocessConfig config = { false,
+                                                               metaInfo->colorSpace,
+                                                               metaInfo->dataType,
+                                                               metaInfo->getChannel(),
+                                                               metaInfo->getWidth(),
+                                                               metaInfo->getHeight() };
+
+       auto normalization = static_pointer_cast<DecodingNormal>(metaInfo->decodingTypeMap.at(DecodingType::NORMAL));
+       if (normalization) {
+               config.normalize = normalization->use;
+               config.mean = normalization->mean;
+               config.std = normalization->std;
+       }
+
+       auto quantization =
+                       static_pointer_cast<DecodingQuantization>(metaInfo->decodingTypeMap.at(DecodingType::QUANTIZATION));
+       if (quantization) {
+               config.quantize = quantization->use;
+               config.scale = quantization->scale;
+               config.zeropoint = quantization->zeropoint;
+       }
+
+       _preprocess.setConfig(config);
+       _preprocess.run<T>(mv_src, inputVector);
 
        LOGI("LEAVE");
 }
index 7479a9ded7c428a940ee08a843281d142f84cc7b..4ef3dd07df07e55dfcd491bbde7932bb870e5cdc 100644 (file)
 #include <memory>
 #include <vector>
 
+#include <inference_engine_type.h>
+#include <mv_inference_type.h>
+
 #include "mv_private.h"
 #include "mv_common.h"
-#include "TensorBuffer.h"
 
 #include <opencv2/core.hpp>
 #include <opencv2/imgproc.hpp>
 
-#include "MetaParser.h"
-#include "types.h"
-
 /**
  * @file Preprocess.h
  * @brief This file contains the Preprocess class definition which
@@ -42,6 +41,21 @@ namespace mediavision
 {
 namespace machine_learning
 {
+struct PreprocessConfig {
+       bool skip_csc {}; /**< It indicates whether color space conversion operation should be skipped or not. */
+       mv_colorspace_e output_format {}; /**< The pixel format of output tensor to be converted. */
+       mv_inference_data_type_e output_data_type {}; /**< The data type of output tensor to be converted. */
+       int output_channel {}; /** The channel size of output tensor te be converted. */
+       int output_width {}; /** The width size of output tensor to be converted. */
+       int output_height {}; /** The height size of output tensor to be converted. */
+       bool normalize {}; /**< It indicates whether normalization to input data should be performed or not. */
+       std::vector<double> mean;
+       std::vector<double> std;
+       bool quantize {}; /**< It indicates whether quantization to input data should be performed or not. */
+       std::vector<double> scale;
+       std::vector<double> zeropoint;
+};
+
 class Preprocess
 {
 public:
@@ -49,8 +63,8 @@ public:
        {}
        ~Preprocess() = default;
 
-       //void run(std::vector<mv_source_h> &mv_srcs, MetaMap &tensorMetaInfo, IETensorBuffer &tensorBufferMap);
-       template<typename T> void run(mv_source_h &mv_src, std::shared_ptr<MetaInfo> metaInfo, std::vector<T> &inputVector);
+       void setConfig(const PreprocessConfig &config);
+       template<typename T> void run(mv_source_h &mv_src, std::vector<T> &inputVector);
 
        std::vector<unsigned int> &getImageWidth()
        {
@@ -64,6 +78,7 @@ public:
 private:
        std::vector<unsigned int> _vImageWidth;
        std::vector<unsigned int> _vImageHeight;
+       PreprocessConfig _config;
 
        int convertToCv(int given_type, int ch);
        void colorConvert(cv::Mat &source, cv::Mat &dest, int sType, int dType);
index aab2ce25c2c4fa60b3d022cf23db0aa3dd0bf38a..d611e064844f4d2760568d488ee51e6e1c6af94f 100644 (file)
@@ -38,7 +38,6 @@ constexpr int colorConvertTable[][12] = {
 };
 
 using namespace std;
-using namespace mediavision::inference;
 using namespace mediavision::machine_learning::exception;
 
 namespace mediavision
@@ -146,7 +145,12 @@ void Preprocess::convertToCvSource(vector<mv_source_h> &mv_srcs, vector<cv::Mat>
        }
 }
 
-template<typename T> void Preprocess::run(mv_source_h &mv_src, shared_ptr<MetaInfo> metaInfo, vector<T> &inputVector)
+void Preprocess::setConfig(const PreprocessConfig &config)
+{
+       _config = config;
+}
+
+template<typename T> void Preprocess::run(mv_source_h &mv_src, vector<T> &inputVector)
 {
        LOGI("ENTER");
 
@@ -157,52 +161,41 @@ template<typename T> void Preprocess::run(mv_source_h &mv_src, shared_ptr<MetaIn
        _vImageHeight.clear();
        convertToCvSource(mv_srcs, oriCvSources);
 
-       inputVector.resize(metaInfo->getHeight() * metaInfo->getWidth() * metaInfo->getChannel());
+       inputVector.resize(_config.output_height * _config.output_width * _config.output_channel);
 
-       mv_colorspace_e colorspace = MEDIA_VISION_COLORSPACE_INVALID;
-       int ret = mv_source_get_colorspace(mv_srcs[0], &colorspace);
-       if (ret != MEDIA_VISION_ERROR_NONE)
-               throw InvalidOperation("Fail to get color space.");
-
-       int data_type = convertToCv(metaInfo->dataType, metaInfo->getChannel());
+       int data_type = convertToCv(_config.output_data_type, _config.output_channel);
        // dest is a wrapper of the buffer.
-       cv::Mat dest(cv::Size(metaInfo->getWidth(), metaInfo->getHeight()), data_type, inputVector.data());
+       cv::Mat dest(cv::Size(_config.output_width, _config.output_height), data_type, inputVector.data());
        cv::Mat cvSource, cvDest;
 
        // cvSource has new allocation with dest.size()
        cv::resize(oriCvSources[0], cvSource, dest.size());
 
-       // cvDest has new allocation if it's colorSpace is not RGB888
-       // cvDest share the data with cvSource it's colorSpace is RGB888
-       colorConvert(cvSource, cvDest, colorspace, metaInfo->colorSpace);
-
-       cvDest.convertTo(dest, dest.type());
-
-       try {
-               auto normalization = static_pointer_cast<DecodingNormal>(metaInfo->decodingTypeMap.at(DecodingType::NORMAL));
-
-               if (normalization && normalization->use)
-                       normalize(dest, dest, normalization->mean, normalization->std);
-       } catch (const std::exception &e) {
-               LOGI("No normalization node.");
+       if (_config.skip_csc) {
+               cvSource.convertTo(dest, dest.type());
+       } else {
+               mv_colorspace_e colorspace = MEDIA_VISION_COLORSPACE_INVALID;
+               int ret = mv_source_get_colorspace(mv_srcs[0], &colorspace);
+               if (ret != MEDIA_VISION_ERROR_NONE)
+                       throw InvalidOperation("Fail to get color space.");
+
+               // cvDest is allocated if colorspace is not RGB888, and
+               // cvDest shares the data with cvSource if the colorspace is RGB888.
+               colorConvert(cvSource, cvDest, colorspace, _config.output_format);
+               cvDest.convertTo(dest, dest.type());
        }
 
-       try {
-               auto quantization =
-                               static_pointer_cast<DecodingQuantization>(metaInfo->decodingTypeMap.at(DecodingType::QUANTIZATION));
+       if (_config.normalize)
+               normalize(dest, dest, _config.mean, _config.std);
 
-               if (quantization && quantization->use)
-                       quantize(dest, dest, quantization->scale, quantization->zeropoint);
-       } catch (const std::exception &e) {
-               LOGI("No quantization node.");
-       }
+       if (_config.quantize)
+               quantize(dest, dest, _config.scale, _config.zeropoint);
 
        LOGI("LEAVE");
 }
 
-template void Preprocess::run<float>(mv_source_h &mv_src, shared_ptr<MetaInfo> metaInfo, vector<float> &inputVector);
-template void Preprocess::run<unsigned char>(mv_source_h &mv_src, shared_ptr<MetaInfo> metaInfo,
-                                                                                        vector<unsigned char> &inputVector);
+template void Preprocess::run<float>(mv_source_h &mv_src, vector<float> &inputVector);
+template void Preprocess::run<unsigned char>(mv_source_h &mv_src, vector<unsigned char> &inputVector);
 
 } /* machine_learning */
 } /* mediavision */
index a84b730e68b4e563363cac6da0a23b9638c4804e..2ebf37353345e6653739b983ad602bb82a04ebd8 100644 (file)
@@ -292,7 +292,30 @@ void ObjectDetection::preprocess(mv_source_h &mv_src, shared_ptr<MetaInfo> metaI
 {
        LOGI("ENTER");
 
-       _preprocess.run<T>(mv_src, metaInfo, inputVector);
+       PreprocessConfig config = { false,
+                                                               metaInfo->colorSpace,
+                                                               metaInfo->dataType,
+                                                               metaInfo->getChannel(),
+                                                               metaInfo->getWidth(),
+                                                               metaInfo->getHeight() };
+
+       auto normalization = static_pointer_cast<DecodingNormal>(metaInfo->decodingTypeMap.at(DecodingType::NORMAL));
+       if (normalization) {
+               config.normalize = normalization->use;
+               config.mean = normalization->mean;
+               config.std = normalization->std;
+       }
+
+       auto quantization =
+                       static_pointer_cast<DecodingQuantization>(metaInfo->decodingTypeMap.at(DecodingType::QUANTIZATION));
+       if (quantization) {
+               config.quantize = quantization->use;
+               config.scale = quantization->scale;
+               config.zeropoint = quantization->zeropoint;
+       }
+
+       _preprocess.setConfig(config);
+       _preprocess.run<T>(mv_src, inputVector);
 
        LOGI("LEAVE");
 }
index 86e49d9b5b06ad49cad173459d7b09eea1a2ced3..4804da551493ba2789ab50804948c2572140eb52 100644 (file)
@@ -243,7 +243,30 @@ void ObjectDetection3d::preprocess(mv_source_h &mv_src, shared_ptr<MetaInfo> met
 {
        LOGI("ENTER");
 
-       _preprocess.run<T>(mv_src, metaInfo, inputVector);
+       PreprocessConfig config = { false,
+                                                               metaInfo->colorSpace,
+                                                               metaInfo->dataType,
+                                                               metaInfo->getChannel(),
+                                                               metaInfo->getWidth(),
+                                                               metaInfo->getHeight() };
+
+       auto normalization = static_pointer_cast<DecodingNormal>(metaInfo->decodingTypeMap.at(DecodingType::NORMAL));
+       if (normalization) {
+               config.normalize = normalization->use;
+               config.mean = normalization->mean;
+               config.std = normalization->std;
+       }
+
+       auto quantization =
+                       static_pointer_cast<DecodingQuantization>(metaInfo->decodingTypeMap.at(DecodingType::QUANTIZATION));
+       if (quantization) {
+               config.quantize = quantization->use;
+               config.scale = quantization->scale;
+               config.zeropoint = quantization->zeropoint;
+       }
+
+       _preprocess.setConfig(config);
+       _preprocess.run<T>(mv_src, inputVector);
 
        LOGI("LEAVE");
 }