mv_machine_learning: drop Mediavision dependency from landmark detection adapters
authorInki Dae <inki.dae@samsung.com>
Mon, 25 Sep 2023 07:55:03 +0000 (16:55 +0900)
committerKwanghoon Son <k.son@samsung.com>
Wed, 25 Oct 2023 01:54:03 +0000 (10:54 +0900)
[Issue type] : code refactoring

Drop Mediavision dependency from adapter classies of landmark detection task
group. There is a use case that pre-trained model file is used in private.
Therefore, the model relevant code cannot be opened. And even such users want
to configure the inference or training engines in their way.

In this case, we need to manage it properly by providing plugin approach of
behavior class. And this patch is a first step for supporting plugin based
behavior class which can be delivered as separate package.

Change-Id: I54a3f2f8de86290718129b8a44a724d3d9a3f246
Signed-off-by: Inki Dae <inki.dae@samsung.com>
mv_machine_learning/landmark_detection/include/landmark_detection.h
mv_machine_learning/landmark_detection/src/facial_landmark_adapter.cpp
mv_machine_learning/landmark_detection/src/landmark_detection.cpp
mv_machine_learning/landmark_detection/src/pose_landmark_adapter.cpp

index 2b82356..0bba8e5 100644 (file)
@@ -41,7 +41,8 @@ private:
 
        template<typename T>
        void preprocess(mv_source_h &mv_src, std::shared_ptr<MetaInfo> metaInfo, std::vector<T> &inputVector);
-       template<typename T> void inference(std::vector<std::vector<T> > &inputVectors);
+       std::shared_ptr<MetaInfo> getInputMetaInfo();
+       template<typename T> void perform(mv_source_h &mv_src, std::shared_ptr<MetaInfo> metaInfo);
 
        LandmarkDetectionTaskType _task_type;
 
@@ -63,6 +64,10 @@ protected:
 
        void getOutputNames(std::vector<std::string> &names);
        void getOutputTensor(std::string target_name, std::vector<float> &tensor);
+       void parseMetaFile(const std::string &meta_file_name);
+
+       template<typename T> void inference(std::vector<std::vector<T> > &inputVectors);
+       virtual LandmarkDetectionResult &result() = 0;
 
 public:
        LandmarkDetection(LandmarkDetectionTaskType task_type);
@@ -74,12 +79,10 @@ public:
        void getEngineType(unsigned int engine_index, char **engine_type);
        void getNumberOfDevices(const char *engine_type, unsigned int *number_of_devices);
        void getDeviceType(const char *engine_type, const unsigned int device_index, char **device_type);
-       std::shared_ptr<MetaInfo> getInputMetaInfo();
-       void parseMetaFile(const char *meta_file_name);
-       void configure();
+       void configure(const std::string &configFile);
        void prepare();
-       template<typename T> void perform(mv_source_h &mv_src, std::shared_ptr<MetaInfo> metaInfo);
-       virtual LandmarkDetectionResult &result() = 0;
+       void perform(mv_source_h &mv_src);
+       LandmarkDetectionResult &getOutput();
 };
 
 } // machine_learning
index 90a235e..88095df 100644 (file)
@@ -90,8 +90,7 @@ void FacialLandmarkAdapter<T, V>::setEngineInfo(const char *engine_type, const c
 
 template<typename T, typename V> void FacialLandmarkAdapter<T, V>::configure()
 {
-       _landmark_detection->parseMetaFile("facial_landmark.json");
-       _landmark_detection->configure();
+       _landmark_detection->configure("facial_landmark.json");
 }
 
 template<typename T, typename V> void FacialLandmarkAdapter<T, V>::getNumberOfEngines(unsigned int *number_of_engines)
@@ -129,13 +128,7 @@ template<typename T, typename V> void FacialLandmarkAdapter<T, V>::setInput(T &t
 
 template<typename T, typename V> void FacialLandmarkAdapter<T, V>::perform()
 {
-       shared_ptr<MetaInfo> metaInfo = _landmark_detection->getInputMetaInfo();
-       if (metaInfo->dataType == MV_INFERENCE_DATA_UINT8)
-               _landmark_detection->perform<unsigned char>(_source.inference_src, metaInfo);
-       else if (metaInfo->dataType == MV_INFERENCE_DATA_FLOAT32)
-               _landmark_detection->perform<float>(_source.inference_src, metaInfo);
-       else
-               throw InvalidOperation("Invalid model data type.");
+       _landmark_detection->perform(_source.inference_src);
 }
 
 template<typename T, typename V> void FacialLandmarkAdapter<T, V>::performAsync(T &t)
@@ -145,7 +138,7 @@ template<typename T, typename V> void FacialLandmarkAdapter<T, V>::performAsync(
 
 template<typename T, typename V> V &FacialLandmarkAdapter<T, V>::getOutput()
 {
-       return _landmark_detection->result();
+       return _landmark_detection->getOutput();
 }
 
 template<typename T, typename V> V &FacialLandmarkAdapter<T, V>::getOutputCache()
index 3aa4d20..ead533f 100644 (file)
@@ -174,9 +174,9 @@ void LandmarkDetection::loadLabel()
        readFile.close();
 }
 
-void LandmarkDetection::parseMetaFile(const char *meta_file_name)
+void LandmarkDetection::parseMetaFile(const string &meta_file_name)
 {
-       _config = make_unique<EngineConfig>(string(MV_CONFIG_PATH) + string(meta_file_name));
+       _config = make_unique<EngineConfig>(string(MV_CONFIG_PATH) + meta_file_name);
 
        int ret = _config->getIntegerAttribute(string(MV_LANDMARK_DETECTION_BACKEND_TYPE), &_backendType);
        if (ret != MEDIA_VISION_ERROR_NONE)
@@ -238,8 +238,10 @@ void LandmarkDetection::parseMetaFile(const char *meta_file_name)
        loadLabel();
 }
 
-void LandmarkDetection::configure()
+void LandmarkDetection::configure(const string &configFile)
 {
+       parseMetaFile(configFile);
+
        int ret = _inference->bind(_backendType, _targetDeviceType);
        if (ret != MEDIA_VISION_ERROR_NONE)
                throw InvalidOperation("Fail to bind a backend engine.");
@@ -333,6 +335,23 @@ template<typename T> void LandmarkDetection::perform(mv_source_h &mv_src, shared
        inference<T>(inputVectors);
 }
 
+void LandmarkDetection::perform(mv_source_h &mv_src)
+{
+       shared_ptr<MetaInfo> metaInfo = getInputMetaInfo();
+
+       if (metaInfo->dataType == MV_INFERENCE_DATA_UINT8)
+               perform<unsigned char>(mv_src, metaInfo);
+       else if (metaInfo->dataType == MV_INFERENCE_DATA_FLOAT32)
+               perform<float>(mv_src, metaInfo);
+       else
+               throw InvalidOperation("Invalid model data type.");
+}
+
+LandmarkDetectionResult &LandmarkDetection::getOutput()
+{
+       return result();
+}
+
 void LandmarkDetection::getOutputNames(vector<string> &names)
 {
        TensorBuffer &tensor_buffer_obj = _inference->getOutputTensorBuffer();
index f7b38d7..4816b96 100644 (file)
@@ -90,8 +90,7 @@ void PoseLandmarkAdapter<T, V>::setEngineInfo(const char *engine_type, const cha
 
 template<typename T, typename V> void PoseLandmarkAdapter<T, V>::configure()
 {
-       _landmark_detection->parseMetaFile("pose_landmark.json");
-       _landmark_detection->configure();
+       _landmark_detection->configure("pose_landmark.json");
 }
 
 template<typename T, typename V> void PoseLandmarkAdapter<T, V>::getNumberOfEngines(unsigned int *number_of_engines)
@@ -129,13 +128,7 @@ template<typename T, typename V> void PoseLandmarkAdapter<T, V>::setInput(T &t)
 
 template<typename T, typename V> void PoseLandmarkAdapter<T, V>::perform()
 {
-       shared_ptr<MetaInfo> metaInfo = _landmark_detection->getInputMetaInfo();
-       if (metaInfo->dataType == MV_INFERENCE_DATA_UINT8)
-               _landmark_detection->perform<unsigned char>(_source.inference_src, metaInfo);
-       else if (metaInfo->dataType == MV_INFERENCE_DATA_FLOAT32)
-               _landmark_detection->perform<float>(_source.inference_src, metaInfo);
-       else
-               throw InvalidOperation("Invalid model data type.");
+       _landmark_detection->perform(_source.inference_src);
 }
 
 template<typename T, typename V> void PoseLandmarkAdapter<T, V>::performAsync(T &t)
@@ -145,7 +138,7 @@ template<typename T, typename V> void PoseLandmarkAdapter<T, V>::performAsync(T
 
 template<typename T, typename V> V &PoseLandmarkAdapter<T, V>::getOutput()
 {
-       return _landmark_detection->result();
+       return _landmark_detection->getOutput();
 }
 
 template<typename T, typename V> V &PoseLandmarkAdapter<T, V>::getOutputCache()