mv_machine_learning: code refactoring to Face Recognition
authorInki Dae <inki.dae@samsung.com>
Fri, 13 May 2022 07:39:40 +0000 (16:39 +0900)
committerInki Dae <inki.dae@samsung.com>
Wed, 20 Jul 2022 05:16:40 +0000 (14:16 +0900)
[Version] : 0.21.4-0
[Issue type] : code refactoring

Did code refactoring to Face Recognition Framework by doing,
- Make a top module of Face Recognition Framework to pass
  backend engine information such as bacnend type and target device type.
- Move binding code of training engine to Initialize function from
  RegisterNewFace and RecognizeFace functions. By doing this,
  it enhance initial launching time because engine binding is done
  only one time.
- Introduce ClearDataSet API which is a internal API of
  TrainingModel class to drop all previous dataset data.
- Rename Prepare function to ImportLabel one because this function
  imports the label data to in-memory from a file only now.

This is just a step for next code refactoring.

Change-Id: If8b4021793e62c290ce59362ed605215c2bf5e3e
Signed-off-by: Inki Dae <inki.dae@samsung.com>
mv_machine_learning/face_recognition/include/face_recognition.h
mv_machine_learning/face_recognition/include/simple_shot.h
mv_machine_learning/face_recognition/src/face_recognition.cpp
mv_machine_learning/face_recognition/src/mv_face_recognition_open.cpp
mv_machine_learning/face_recognition/src/simple_shot.cpp
mv_machine_learning/inference/include/inference_engine_helper.h
mv_machine_learning/inference/src/inference_engine_helper.cpp
mv_machine_learning/training/include/training_model.h
mv_machine_learning/training/src/training_model.cpp
packaging/capi-media-vision.spec

index 7e31354..8c5af0c 100644 (file)
 #include "data_augment_rotate.h"
 
 typedef struct {
-       std::string backbone_backend_name;
-       inference_target_type_e backbone_target_device_type;
+       training_target_type_e training_target_device_type;
        training_engine_backend_type_e training_engine_backend_type;
+       inference_target_type_e inference_target_device_type;
+       inference_backend_type_e inference_engine_backend_type;
+       inference_target_type_e backbone_target_device_type;
+       inference_backend_type_e backbone_engine_backend_type;
        std::string backbone_model_file_path;
        std::string internal_model_file_path;
        std::string label_file_path;
@@ -47,7 +50,7 @@ private:
        std::vector<std::unique_ptr<DataAugment>> _data_augments;
 
        // FYI. This function should be called every time a new face is registered.
-       int Prepare();
+       void ImportLabel();
        void CheckFeatureVectorFile(std::unique_ptr<FeatureVectorManager>& old_fvm, std::unique_ptr<FeatureVectorManager>& new_fvm);
        std::unique_ptr<DataSetManager> CreateDSM(const training_engine_backend_type_e backend_type);
        std::unique_ptr<FeatureVectorManager> CreateFVM(const training_engine_backend_type_e backend_type, std::string file_name);
@@ -59,7 +62,6 @@ private:
 
 protected:
        bool _initialized;
-       bool _prepared;
        std::unique_ptr<InferenceEngineHelper> _internal;
        std::unique_ptr<InferenceEngineHelper> _backbone;
        std::unique_ptr<FaceNetInfo> _face_net_info;
index 8a60f6e..f00f644 100644 (file)
@@ -25,6 +25,7 @@ private:
 
 public:
        SimpleShot(const training_engine_backend_type_e backend_type = TRAINING_ENGINE_BACKEND_NNTRAINER,
+                          const training_target_type_e target_type = TRAINING_TARGET_CPU,
                           const std::string internal_model_file = "model_and_weights.ini");
        ~SimpleShot();
 
index a46dff6..1021946 100644 (file)
@@ -38,7 +38,7 @@ using namespace TrainingEngineInterface::Common;
 using namespace Mediavision::MachineLearning::Exception;
 
 FaceRecognition::FaceRecognition() :
-               _initialized(false), _prepared(false), _internal(), _backbone(), _face_net_info(), _training_model(), _label_manager()
+               _initialized(false), _internal(), _backbone(), _face_net_info(), _training_model(), _label_manager()
 {
        _data_augments.push_back(std::make_unique<DataAugmentDefault>());
        /* Add other data argument classes. */
@@ -208,7 +208,8 @@ int FaceRecognition::Initialize()
        }
 
        // Initialize inference engine object for backbone model.
-       _backbone = make_unique<InferenceEngineHelper>(_config.backbone_backend_name, _config.backbone_target_device_type);
+       _backbone = make_unique<InferenceEngineHelper>(_config.backbone_engine_backend_type,
+                                                                                                  _config.backbone_target_device_type);
 
        vector<string> input_layer_names, output_layer_names;
        vector<inference_engine_tensor_info> input_tensor_info, output_tensor_info;
@@ -228,43 +229,33 @@ int FaceRecognition::Initialize()
 
        _backbone->Load(_face_net_info->GetModelFilePath());
 
+       _training_model = make_unique<SimpleShot>(_config.training_engine_backend_type,
+                                                                                               _config.training_target_device_type,
+                                                                                               _config.internal_model_file_path);
+
+       _internal = make_unique<InferenceEngineHelper>(_config.inference_engine_backend_type,
+                                                                                                       _config.inference_target_device_type);
+
        _initialized = true;
 
        return MEDIA_VISION_ERROR_NONE;
 }
 
-int FaceRecognition::Prepare()
+void FaceRecognition::ImportLabel()
 {
-       if (!_initialized) {
-               LOGE("Initialization not ready yet.");
-               return MEDIA_VISION_ERROR_INVALID_OPERATION;
-       }
-
        try {
                // Prepare can be called several times after initialization is done so previous data should be dropped.
-               _training_model.reset();
                _label_manager.reset();
-               _internal.reset();
-
-               _training_model = make_unique<SimpleShot>(_config.training_engine_backend_type, _config.internal_model_file_path);
                _label_manager = make_unique<LabelManager>(_config.label_file_path, _config.decision_threshold);
 
                // Update label manager from a given label file.
                int cnt = _label_manager->ImportLabel();
 
                LOGD("%d labels have been imported", cnt);
-
-               TrainingEngineBackendInfo engine_info = _training_model->GetTrainingEngineInfo();
-
-               _internal = make_unique<InferenceEngineHelper>(engine_info.backend_name, engine_info.target_device);
        } catch (const BaseException& e) {
                LOGE("%s", e.what());
-               return e.getError();
+               throw e;
        }
-
-       _prepared = true;
-
-       return MEDIA_VISION_ERROR_NONE;
 }
 
 int FaceRecognition::RegisterNewFace(mv_source_h img_src, string label_name)
@@ -273,6 +264,11 @@ int FaceRecognition::RegisterNewFace(mv_source_h img_src, string label_name)
        vector<model_layer_info>& output_layer_info = _face_net_info->GetOutputLayerInfo();
        vector<float> in_vec;
 
+       if (!_initialized) {
+               LOGE("Initialization not ready yet.");
+               return MEDIA_VISION_ERROR_INVALID_OPERATION;
+       }
+
        int ret = GetVecFromMvSource(img_src, in_vec);
        if (ret != MEDIA_VISION_ERROR_NONE) {
                LOGE("Fail to get tensor vector from mv_source.(%d)", ret);
@@ -280,13 +276,10 @@ int FaceRecognition::RegisterNewFace(mv_source_h img_src, string label_name)
        }
 
        for (auto& data_augment : _data_augments) {
-               ret = Prepare();
-               if (ret != MEDIA_VISION_ERROR_NONE) {
-                       LOGE("Fail to prepare face recognition.");
-                       return ret;
-               }
-
                try {
+                       // Import label data from a label file.
+                       ImportLabel();
+
                        // 1. Store only label names to label file, which aren't duplicated.
                        bool duplicated  = _label_manager->AddLabelToMap(label_name, label_name);
                        if (!duplicated) {
@@ -333,6 +326,10 @@ int FaceRecognition::RegisterNewFace(mv_source_h img_src, string label_name)
                        _training_model->ApplyDataSet(data_set);
                        _training_model->Compile();
                        _training_model->Train();
+
+                       // label_cnt can be changed every time the training is performed and all data set will be used for the training
+                       // again in this case. So make sure to clear previous data set before next training.
+                       _training_model->ClearDataSet(data_set);
                } catch (const BaseException& e) {
                        LOGE("%s", e.what());
                        return e.getError();
@@ -383,10 +380,9 @@ int FaceRecognition::GetAnswer(vector<float>& result_tensor, unsigned int *out_i
 
 int FaceRecognition::RecognizeFace(mv_source_h img_src, vector<float>& out_vec, unsigned int *out_idx)
 {
-       int ret = Prepare();
-       if (ret != MEDIA_VISION_ERROR_NONE) {
-               LOGE("Fail to prepare face recognition.");
-               return ret;
+       if (!_initialized) {
+               LOGE("Initialization not ready yet.");
+               return MEDIA_VISION_ERROR_INVALID_OPERATION;
        }
 
        TrainingEngineBackendInfo engine_info = _training_model->GetTrainingEngineInfo();
@@ -396,13 +392,16 @@ int FaceRecognition::RecognizeFace(mv_source_h img_src, vector<float>& out_vec,
        vector<inference_engine_tensor_info>& output_tensor_info = engine_info.output_tensor_info;
        vector<float> in_vec;
 
-       ret = GetVecFromMvSource(img_src, in_vec);
+       int ret = GetVecFromMvSource(img_src, in_vec);
        if (ret != MEDIA_VISION_ERROR_NONE) {
                LOGE("Fail to get tensor vector from mv_source.(%d)", ret);
                return ret;
        }
 
        try {
+               // Import label data from a label file.
+               ImportLabel();
+
                if (_face_net_info->GetInputLayerInfo().empty() || _face_net_info->GetInputLayerInfo().size() > 1) {
                        LOGE("Invalid input layer size - input layer size should be 1.");
                        return MEDIA_VISION_ERROR_INVALID_PARAMETER;
index 127b008..80299ae 100644 (file)
@@ -111,9 +111,12 @@ int mv_face_recognition_prepare_open(mv_face_recognition_h handle)
        }
 
        FaceRecognitionConfig config = {
-               "tflite",
-               INFERENCE_TARGET_CPU,
+               TRAINING_TARGET_CPU,
                TRAINING_ENGINE_BACKEND_NNTRAINER,
+               INFERENCE_TARGET_CPU,
+               INFERENCE_BACKEND_NNTRAINER,
+               INFERENCE_TARGET_CPU,
+               INFERENCE_BACKEND_TFLITE,
                backboneModelFilePath,
                string(defaultPath) + "model_and_weights.ini",
                string(defaultPath) + "labels.dat",
index bc470d3..0ac10f9 100644 (file)
@@ -32,15 +32,16 @@ using namespace std;
 using namespace TrainingEngineInterface::Common;
 using namespace Mediavision::MachineLearning::Exception;
 
-SimpleShot::SimpleShot(const training_engine_backend_type_e backend_type, const string internal_model_file) :
-                                               TrainingModel(backend_type, internal_model_file)
+SimpleShot::SimpleShot(const training_engine_backend_type_e backend_type,
+                                          const training_target_type_e target_type,
+                                          const string internal_model_file) :
+                                               TrainingModel(backend_type, target_type, internal_model_file)
 {
-       map<int, string>::iterator item = _backend_lookup.find(backend_type);
-       if (item == _backend_lookup.end())
-               throw InvalidParameter("Invalid training engine backend type.");
-
-       _engine_info.backend_name = item->second;
-       _engine_info.target_device = INFERENCE_TARGET_CPU;
+       _engine_info.backend_type = backend_type;
+       // TODO. training engine interface has no target type attribute yet.
+       //       Add target type to training_engine_config and consider setting this type
+       //       to a given training engine backend.
+       _engine_info.target_device = target_type;
 
        const inference_engine_tensor_info nntrainer_input_tensor_info = {
                { 192, 1, 1, 1 },
@@ -78,15 +79,7 @@ SimpleShot::~SimpleShot()
 
 void SimpleShot::ConfigureModel(int num_of_class)
 {
-       training_engine_config config = { _engine_info.backend_name };
-       int ret = _training->BindBackend(config);
-       if (ret != TRAINING_ENGINE_ERROR_NONE)
-               throw InvalidOperation("Fail to bind backend engine.");
-
-       training_engine_capacity capacity = { TRAINING_TENSOR_SHAPE_MIN };
-       ret = _training->GetBackendCapacity(capacity);
-       if (ret != TRAINING_ENGINE_ERROR_NONE)
-               throw InvalidOperation("Fail to get backend capacity.");
+       // TODO. Check the capacity.
 
        _model = _training->CreateModel();
        if (!_model)
@@ -103,7 +96,8 @@ void SimpleShot::ConfigureModel(int num_of_class)
        // Ps. In case of the first layer, input_shape property is mandatorily required.
        // 1:192 is a shape of backbone model output tensor.
        training_engine_layer_property l2norm_property = { .options = { "input_shape=1:192", "trainable=false" } };
-       ret = _training->SetLayerProperty(l2norm.get(), l2norm_property);
+
+       int ret = _training->SetLayerProperty(l2norm.get(), l2norm_property);
        if (ret != TRAINING_ENGINE_ERROR_NONE)
                throw InvalidOperation("Fail to set layer propery.");
 
index 1a8e1a4..8745a72 100644 (file)
@@ -38,7 +38,7 @@ private:
        void CleanupTensorBuffers(IETensorBuffer &inputs, IETensorBuffer &outputs);
 
 public:
-       InferenceEngineHelper(std::string backend_name, int target_device);
+       InferenceEngineHelper(int backend_type, int target_device);
        ~InferenceEngineHelper();
        int Load(std::string backbone_path);
        int UpdateLayerInfo(const std::vector<std::string>& input_layers,
index 7b735eb..405b512 100644 (file)
@@ -28,13 +28,13 @@ static map<string, int> Model_Formats = {
        { "onnx", INFERENCE_MODEL_ONNX }, {"ini", INFERENCE_MODEL_NNTRAINER }
 };
 
-InferenceEngineHelper::InferenceEngineHelper(string backend_name, int target_device)
+InferenceEngineHelper::InferenceEngineHelper(int backend_type, int target_device)
 {
        _engine.reset();
 
        _engine = make_unique<InferenceEngineCommon>();
 
-       inference_engine_config config = { backend_name, -1, target_device };
+       inference_engine_config config = { "", backend_type, target_device };
 
        int ret = _engine->BindBackend(&config);
        if (ret != INFERENCE_ENGINE_ERROR_NONE)
index 552e610..95f44d1 100644 (file)
@@ -27,7 +27,7 @@
 #include "feature_vector_manager.h"
 
 typedef struct {
-       std::string backend_name;
+       int backend_type;
        unsigned int target_device;
        std::vector<std::string> input_layer_names;
        std::vector<std::string> output_layer_names;
@@ -42,15 +42,16 @@ protected:
        std::unique_ptr<TrainingEngineInterface::Common::TrainingEngineCommon> _training;
        std::unique_ptr<training_engine_model> _model;
        std::unique_ptr<training_engine_dataset> _data_set;
-       std::map<int, std::string> _backend_lookup;
        std::string _internal_model_file;
 
 public:
        TrainingModel(const training_engine_backend_type_e backend_type = TRAINING_ENGINE_BACKEND_NNTRAINER,
+                                 const training_target_type_e target_type = TRAINING_TARGET_CPU,
                                  const std::string internal_model_file = "model_and_weights.ini");
        virtual ~TrainingModel();
 
        void ApplyDataSet(std::unique_ptr<DataSetManager>& data_set);
+       void ClearDataSet(std::unique_ptr<DataSetManager>& data_set);
        void Compile();
        void Train();
 
index bea433b..f8b40bb 100644 (file)
@@ -33,15 +33,25 @@ using namespace std;
 using namespace TrainingEngineInterface::Common;
 using namespace Mediavision::MachineLearning::Exception;
 
-TrainingModel::TrainingModel(const training_engine_backend_type_e backend_type, const string internal_model_file)
+TrainingModel::TrainingModel(const training_engine_backend_type_e backend_type,
+                                                        const training_target_type_e target_type,
+                                                        const string internal_model_file)
 {
        _internal_model_file = internal_model_file;
-
-       _backend_lookup.insert(make_pair<int, string>(TRAINING_ENGINE_BACKEND_NNTRAINER, "nntrainer"));
-
        _training = make_unique<TrainingEngineInterface::Common::TrainingEngineCommon>();
 
+       // TODO. Use backend type instead of backend name, and then set the backend type later.
+       training_engine_config config = { "", TRAINING_BACKEND_NNTRAINER, TRAINING_TARGET_CPU };
+       int ret = _training->BindBackend(&config);
+       if (ret != TRAINING_ENGINE_ERROR_NONE)
+               throw InvalidOperation("Fail to bind backend engine.");
+
+       training_engine_capacity capacity = { TRAINING_TENSOR_SHAPE_MIN };
+       ret = _training->GetBackendCapacity(capacity);
+       if (ret != TRAINING_ENGINE_ERROR_NONE)
+               throw InvalidOperation("Fail to get backend capacity.");
 }
+
 TrainingModel::~ TrainingModel()
 {
        if (_training)
@@ -74,6 +84,12 @@ void TrainingModel::ApplyDataSet(unique_ptr<DataSetManager>& data_set)
                throw InvalidOperation("Fail to set dataset to model.", ret);
 }
 
+void TrainingModel::ClearDataSet(unique_ptr<DataSetManager>& data_set)
+{
+       data_set->Clear();
+       _training->DestroyDataset(_data_set.get());
+}
+
 void TrainingModel::Compile()
 {
        auto optimizer = _training->CreateOptimizer(TRAINING_OPTIMIZER_TYPE_SGD);
index e89c1d6..9fb61cb 100644 (file)
@@ -1,6 +1,6 @@
 Name:        capi-media-vision
 Summary:     Media Vision library for Tizen Native API
-Version:     0.21.3
+Version:     0.21.4
 Release:     0
 Group:       Multimedia/Framework
 License:     Apache-2.0 and BSD-3-Clause