mv_machine_learning: code refactoring to Face Recognition

author Inki Dae <inki.dae@samsung.com>

Fri, 13 May 2022 07:39:40 +0000 (16:39 +0900)

committer Inki Dae <inki.dae@samsung.com>

Wed, 20 Jul 2022 05:16:40 +0000 (14:16 +0900)
author Inki Dae <inki.dae@samsung.com>
Fri, 13 May 2022 07:39:40 +0000 (16:39 +0900)
committer Inki Dae <inki.dae@samsung.com>
Wed, 20 Jul 2022 05:16:40 +0000 (14:16 +0900)
diff --git a/mv_machine_learning/face_recognition/include/face_recognition.h b/mv_machine_learning/face_recognition/include/face_recognition.h

index 7e31354..8c5af0c 100644 (file)
--- a/mv_machine_learning/face_recognition/include/face_recognition.h
+++ b/mv_machine_learning/face_recognition/include/face_recognition.h
@@ -31,9 +31,12 @@
  #include "data_augment_rotate.h"
  
  typedef struct {
-       std::string backbone_backend_name;
-       inference_target_type_e backbone_target_device_type;
+       training_target_type_e training_target_device_type;
         training_engine_backend_type_e training_engine_backend_type;
+       inference_target_type_e inference_target_device_type;
+       inference_backend_type_e inference_engine_backend_type;
+       inference_target_type_e backbone_target_device_type;
+       inference_backend_type_e backbone_engine_backend_type;
         std::string backbone_model_file_path;
         std::string internal_model_file_path;
         std::string label_file_path;
@@ -47,7 +50,7 @@ private:
         std::vector<std::unique_ptr<DataAugment>> _data_augments;
  
         // FYI. This function should be called every time a new face is registered.
-       int Prepare();
+       void ImportLabel();
         void CheckFeatureVectorFile(std::unique_ptr<FeatureVectorManager>& old_fvm, std::unique_ptr<FeatureVectorManager>& new_fvm);
         std::unique_ptr<DataSetManager> CreateDSM(const training_engine_backend_type_e backend_type);
         std::unique_ptr<FeatureVectorManager> CreateFVM(const training_engine_backend_type_e backend_type, std::string file_name);
@@ -59,7 +62,6 @@ private:
  
  protected:
         bool _initialized;
-       bool _prepared;
         std::unique_ptr<InferenceEngineHelper> _internal;
         std::unique_ptr<InferenceEngineHelper> _backbone;
         std::unique_ptr<FaceNetInfo> _face_net_info;
diff --git a/mv_machine_learning/face_recognition/include/simple_shot.h b/mv_machine_learning/face_recognition/include/simple_shot.h

index 8a60f6e..f00f644 100644 (file)
--- a/mv_machine_learning/face_recognition/include/simple_shot.h
+++ b/mv_machine_learning/face_recognition/include/simple_shot.h
@@ -25,6 +25,7 @@ private:
  
  public:
         SimpleShot(const training_engine_backend_type_e backend_type = TRAINING_ENGINE_BACKEND_NNTRAINER,
+                          const training_target_type_e target_type = TRAINING_TARGET_CPU,
                            const std::string internal_model_file = "model_and_weights.ini");
         ~SimpleShot();
  
diff --git a/mv_machine_learning/face_recognition/src/face_recognition.cpp b/mv_machine_learning/face_recognition/src/face_recognition.cpp

index a46dff6..1021946 100644 (file)
--- a/mv_machine_learning/face_recognition/src/face_recognition.cpp
+++ b/mv_machine_learning/face_recognition/src/face_recognition.cpp
@@ -38,7 +38,7 @@ using namespace TrainingEngineInterface::Common;
  using namespace Mediavision::MachineLearning::Exception;
  
  FaceRecognition::FaceRecognition() :
-               _initialized(false), _prepared(false), _internal(), _backbone(), _face_net_info(), _training_model(), _label_manager()
+               _initialized(false), _internal(), _backbone(), _face_net_info(), _training_model(), _label_manager()
  {
         _data_augments.push_back(std::make_unique<DataAugmentDefault>());
         /* Add other data argument classes. */
@@ -208,7 +208,8 @@ int FaceRecognition::Initialize()
         }
  
         // Initialize inference engine object for backbone model.
-       _backbone = make_unique<InferenceEngineHelper>(_config.backbone_backend_name, _config.backbone_target_device_type);
+       _backbone = make_unique<InferenceEngineHelper>(_config.backbone_engine_backend_type,
+                                                                                                  _config.backbone_target_device_type);
  
         vector<string> input_layer_names, output_layer_names;
         vector<inference_engine_tensor_info> input_tensor_info, output_tensor_info;
@@ -228,43 +229,33 @@ int FaceRecognition::Initialize()
  
         _backbone->Load(_face_net_info->GetModelFilePath());
  
+       _training_model = make_unique<SimpleShot>(_config.training_engine_backend_type,
+                                                                                               _config.training_target_device_type,
+                                                                                               _config.internal_model_file_path);
+
+       _internal = make_unique<InferenceEngineHelper>(_config.inference_engine_backend_type,
+                                                                                                       _config.inference_target_device_type);
+
         _initialized = true;
  
         return MEDIA_VISION_ERROR_NONE;
  }
  
-int FaceRecognition::Prepare()
+void FaceRecognition::ImportLabel()
  {
-       if (!_initialized) {
-               LOGE("Initialization not ready yet.");
-               return MEDIA_VISION_ERROR_INVALID_OPERATION;
-       }
-
         try {
                 // Prepare can be called several times after initialization is done so previous data should be dropped.
-               _training_model.reset();
                 _label_manager.reset();
-               _internal.reset();
-
-               _training_model = make_unique<SimpleShot>(_config.training_engine_backend_type, _config.internal_model_file_path);
                 _label_manager = make_unique<LabelManager>(_config.label_file_path, _config.decision_threshold);
  
                 // Update label manager from a given label file.
                 int cnt = _label_manager->ImportLabel();
  
                 LOGD("%d labels have been imported", cnt);
-
-               TrainingEngineBackendInfo engine_info = _training_model->GetTrainingEngineInfo();
-
-               _internal = make_unique<InferenceEngineHelper>(engine_info.backend_name, engine_info.target_device);
         } catch (const BaseException& e) {
                 LOGE("%s", e.what());
-               return e.getError();
+               throw e;
         }
-
-       _prepared = true;
-
-       return MEDIA_VISION_ERROR_NONE;
  }
  
  int FaceRecognition::RegisterNewFace(mv_source_h img_src, string label_name)
@@ -273,6 +264,11 @@ int FaceRecognition::RegisterNewFace(mv_source_h img_src, string label_name)
         vector<model_layer_info>& output_layer_info = _face_net_info->GetOutputLayerInfo();
         vector<float> in_vec;
  
+       if (!_initialized) {
+               LOGE("Initialization not ready yet.");
+               return MEDIA_VISION_ERROR_INVALID_OPERATION;
+       }
+
         int ret = GetVecFromMvSource(img_src, in_vec);
         if (ret != MEDIA_VISION_ERROR_NONE) {
                 LOGE("Fail to get tensor vector from mv_source.(%d)", ret);
@@ -280,13 +276,10 @@ int FaceRecognition::RegisterNewFace(mv_source_h img_src, string label_name)
         }
  
         for (auto& data_augment : _data_augments) {
-               ret = Prepare();
-               if (ret != MEDIA_VISION_ERROR_NONE) {
-                       LOGE("Fail to prepare face recognition.");
-                       return ret;
-               }
-
                 try {
+                       // Import label data from a label file.
+                       ImportLabel();
+
                         // 1. Store only label names to label file, which aren't duplicated.
                         bool duplicated  = _label_manager->AddLabelToMap(label_name, label_name);
                         if (!duplicated) {
@@ -333,6 +326,10 @@ int FaceRecognition::RegisterNewFace(mv_source_h img_src, string label_name)
                         _training_model->ApplyDataSet(data_set);
                         _training_model->Compile();
                         _training_model->Train();
+
+                       // label_cnt can be changed every time the training is performed and all data set will be used for the training
+                       // again in this case. So make sure to clear previous data set before next training.
+                       _training_model->ClearDataSet(data_set);
                 } catch (const BaseException& e) {
                         LOGE("%s", e.what());
                         return e.getError();
@@ -383,10 +380,9 @@ int FaceRecognition::GetAnswer(vector<float>& result_tensor, unsigned int *out_i
  
  int FaceRecognition::RecognizeFace(mv_source_h img_src, vector<float>& out_vec, unsigned int *out_idx)
  {
-       int ret = Prepare();
-       if (ret != MEDIA_VISION_ERROR_NONE) {
-               LOGE("Fail to prepare face recognition.");
-               return ret;
+       if (!_initialized) {
+               LOGE("Initialization not ready yet.");
+               return MEDIA_VISION_ERROR_INVALID_OPERATION;
         }
  
         TrainingEngineBackendInfo engine_info = _training_model->GetTrainingEngineInfo();
@@ -396,13 +392,16 @@ int FaceRecognition::RecognizeFace(mv_source_h img_src, vector<float>& out_vec,
         vector<inference_engine_tensor_info>& output_tensor_info = engine_info.output_tensor_info;
         vector<float> in_vec;
  
-       ret = GetVecFromMvSource(img_src, in_vec);
+       int ret = GetVecFromMvSource(img_src, in_vec);
         if (ret != MEDIA_VISION_ERROR_NONE) {
                 LOGE("Fail to get tensor vector from mv_source.(%d)", ret);
                 return ret;
         }
  
         try {
+               // Import label data from a label file.
+               ImportLabel();
+
                 if (_face_net_info->GetInputLayerInfo().empty() || _face_net_info->GetInputLayerInfo().size() > 1) {
                         LOGE("Invalid input layer size - input layer size should be 1.");
                         return MEDIA_VISION_ERROR_INVALID_PARAMETER;
diff --git a/mv_machine_learning/face_recognition/src/mv_face_recognition_open.cpp b/mv_machine_learning/face_recognition/src/mv_face_recognition_open.cpp

index 127b008..80299ae 100644 (file)
--- a/mv_machine_learning/face_recognition/src/mv_face_recognition_open.cpp
+++ b/mv_machine_learning/face_recognition/src/mv_face_recognition_open.cpp
@@ -111,9 +111,12 @@ int mv_face_recognition_prepare_open(mv_face_recognition_h handle)
         }
  
         FaceRecognitionConfig config = {
-               "tflite",
-               INFERENCE_TARGET_CPU,
+               TRAINING_TARGET_CPU,
                 TRAINING_ENGINE_BACKEND_NNTRAINER,
+               INFERENCE_TARGET_CPU,
+               INFERENCE_BACKEND_NNTRAINER,
+               INFERENCE_TARGET_CPU,
+               INFERENCE_BACKEND_TFLITE,
                 backboneModelFilePath,
                 string(defaultPath) + "model_and_weights.ini",
                 string(defaultPath) + "labels.dat",
diff --git a/mv_machine_learning/face_recognition/src/simple_shot.cpp b/mv_machine_learning/face_recognition/src/simple_shot.cpp

index bc470d3..0ac10f9 100644 (file)
--- a/mv_machine_learning/face_recognition/src/simple_shot.cpp
+++ b/mv_machine_learning/face_recognition/src/simple_shot.cpp
@@ -32,15 +32,16 @@ using namespace std;
  using namespace TrainingEngineInterface::Common;
  using namespace Mediavision::MachineLearning::Exception;
  
-SimpleShot::SimpleShot(const training_engine_backend_type_e backend_type, const string internal_model_file) :
-                                               TrainingModel(backend_type, internal_model_file)
+SimpleShot::SimpleShot(const training_engine_backend_type_e backend_type,
+                                          const training_target_type_e target_type,
+                                          const string internal_model_file) :
+                                               TrainingModel(backend_type, target_type, internal_model_file)
  {
-       map<int, string>::iterator item = _backend_lookup.find(backend_type);
-       if (item == _backend_lookup.end())
-               throw InvalidParameter("Invalid training engine backend type.");
-
-       _engine_info.backend_name = item->second;
-       _engine_info.target_device = INFERENCE_TARGET_CPU;
+       _engine_info.backend_type = backend_type;
+       // TODO. training engine interface has no target type attribute yet.
+       //       Add target type to training_engine_config and consider setting this type
+       //       to a given training engine backend.
+       _engine_info.target_device = target_type;
  
         const inference_engine_tensor_info nntrainer_input_tensor_info = {
                 { 192, 1, 1, 1 },
@@ -78,15 +79,7 @@ SimpleShot::~SimpleShot()
  
  void SimpleShot::ConfigureModel(int num_of_class)
  {
-       training_engine_config config = { _engine_info.backend_name };
-       int ret = _training->BindBackend(config);
-       if (ret != TRAINING_ENGINE_ERROR_NONE)
-               throw InvalidOperation("Fail to bind backend engine.");
-
-       training_engine_capacity capacity = { TRAINING_TENSOR_SHAPE_MIN };
-       ret = _training->GetBackendCapacity(capacity);
-       if (ret != TRAINING_ENGINE_ERROR_NONE)
-               throw InvalidOperation("Fail to get backend capacity.");
+       // TODO. Check the capacity.
  
         _model = _training->CreateModel();
         if (!_model)
@@ -103,7 +96,8 @@ void SimpleShot::ConfigureModel(int num_of_class)
         // Ps. In case of the first layer, input_shape property is mandatorily required.
         // 1:192 is a shape of backbone model output tensor.
         training_engine_layer_property l2norm_property = { .options = { "input_shape=1:192", "trainable=false" } };
-       ret = _training->SetLayerProperty(l2norm.get(), l2norm_property);
+
+       int ret = _training->SetLayerProperty(l2norm.get(), l2norm_property);
         if (ret != TRAINING_ENGINE_ERROR_NONE)
                 throw InvalidOperation("Fail to set layer propery.");
  
diff --git a/mv_machine_learning/inference/include/inference_engine_helper.h b/mv_machine_learning/inference/include/inference_engine_helper.h

index 1a8e1a4..8745a72 100644 (file)
--- a/mv_machine_learning/inference/include/inference_engine_helper.h
+++ b/mv_machine_learning/inference/include/inference_engine_helper.h
@@ -38,7 +38,7 @@ private:
         void CleanupTensorBuffers(IETensorBuffer &inputs, IETensorBuffer &outputs);
  
  public:
-       InferenceEngineHelper(std::string backend_name, int target_device);
+       InferenceEngineHelper(int backend_type, int target_device);
         ~InferenceEngineHelper();
         int Load(std::string backbone_path);
         int UpdateLayerInfo(const std::vector<std::string>& input_layers,
diff --git a/mv_machine_learning/inference/src/inference_engine_helper.cpp b/mv_machine_learning/inference/src/inference_engine_helper.cpp

index 7b735eb..405b512 100644 (file)
--- a/mv_machine_learning/inference/src/inference_engine_helper.cpp
+++ b/mv_machine_learning/inference/src/inference_engine_helper.cpp
@@ -28,13 +28,13 @@ static map<string, int> Model_Formats = {
         { "onnx", INFERENCE_MODEL_ONNX }, {"ini", INFERENCE_MODEL_NNTRAINER }
  };
  
-InferenceEngineHelper::InferenceEngineHelper(string backend_name, int target_device)
+InferenceEngineHelper::InferenceEngineHelper(int backend_type, int target_device)
  {
         _engine.reset();
  
         _engine = make_unique<InferenceEngineCommon>();
  
-       inference_engine_config config = { backend_name, -1, target_device };
+       inference_engine_config config = { "", backend_type, target_device };
  
         int ret = _engine->BindBackend(&config);
         if (ret != INFERENCE_ENGINE_ERROR_NONE)
diff --git a/mv_machine_learning/training/include/training_model.h b/mv_machine_learning/training/include/training_model.h

index 552e610..95f44d1 100644 (file)
--- a/mv_machine_learning/training/include/training_model.h
+++ b/mv_machine_learning/training/include/training_model.h
@@ -27,7 +27,7 @@
  #include "feature_vector_manager.h"
  
  typedef struct {
-       std::string backend_name;
+       int backend_type;
         unsigned int target_device;
         std::vector<std::string> input_layer_names;
         std::vector<std::string> output_layer_names;
@@ -42,15 +42,16 @@ protected:
         std::unique_ptr<TrainingEngineInterface::Common::TrainingEngineCommon> _training;
         std::unique_ptr<training_engine_model> _model;
         std::unique_ptr<training_engine_dataset> _data_set;
-       std::map<int, std::string> _backend_lookup;
         std::string _internal_model_file;
  
  public:
         TrainingModel(const training_engine_backend_type_e backend_type = TRAINING_ENGINE_BACKEND_NNTRAINER,
+                                 const training_target_type_e target_type = TRAINING_TARGET_CPU,
                                   const std::string internal_model_file = "model_and_weights.ini");
         virtual ~TrainingModel();
  
         void ApplyDataSet(std::unique_ptr<DataSetManager>& data_set);
+       void ClearDataSet(std::unique_ptr<DataSetManager>& data_set);
         void Compile();
         void Train();
  
diff --git a/mv_machine_learning/training/src/training_model.cpp b/mv_machine_learning/training/src/training_model.cpp

index bea433b..f8b40bb 100644 (file)
--- a/mv_machine_learning/training/src/training_model.cpp
+++ b/mv_machine_learning/training/src/training_model.cpp
@@ -33,15 +33,25 @@ using namespace std;
  using namespace TrainingEngineInterface::Common;
  using namespace Mediavision::MachineLearning::Exception;
  
-TrainingModel::TrainingModel(const training_engine_backend_type_e backend_type, const string internal_model_file)
+TrainingModel::TrainingModel(const training_engine_backend_type_e backend_type,
+                                                        const training_target_type_e target_type,
+                                                        const string internal_model_file)
  {
         _internal_model_file = internal_model_file;
-
-       _backend_lookup.insert(make_pair<int, string>(TRAINING_ENGINE_BACKEND_NNTRAINER, "nntrainer"));
-
         _training = make_unique<TrainingEngineInterface::Common::TrainingEngineCommon>();
  
+       // TODO. Use backend type instead of backend name, and then set the backend type later.
+       training_engine_config config = { "", TRAINING_BACKEND_NNTRAINER, TRAINING_TARGET_CPU };
+       int ret = _training->BindBackend(&config);
+       if (ret != TRAINING_ENGINE_ERROR_NONE)
+               throw InvalidOperation("Fail to bind backend engine.");
+
+       training_engine_capacity capacity = { TRAINING_TENSOR_SHAPE_MIN };
+       ret = _training->GetBackendCapacity(capacity);
+       if (ret != TRAINING_ENGINE_ERROR_NONE)
+               throw InvalidOperation("Fail to get backend capacity.");
  }
+
  TrainingModel::~ TrainingModel()
  {
         if (_training)
@@ -74,6 +84,12 @@ void TrainingModel::ApplyDataSet(unique_ptr<DataSetManager>& data_set)
                 throw InvalidOperation("Fail to set dataset to model.", ret);
  }
  
+void TrainingModel::ClearDataSet(unique_ptr<DataSetManager>& data_set)
+{
+       data_set->Clear();
+       _training->DestroyDataset(_data_set.get());
+}
+
  void TrainingModel::Compile()
  {
         auto optimizer = _training->CreateOptimizer(TRAINING_OPTIMIZER_TYPE_SGD);
diff --git a/packaging/capi-media-vision.spec b/packaging/capi-media-vision.spec

index e89c1d6..9fb61cb 100644 (file)
--- a/packaging/capi-media-vision.spec
+++ b/packaging/capi-media-vision.spec
@@ -1,6 +1,6 @@
  Name:        capi-media-vision
  Summary:     Media Vision library for Tizen Native API
-Version:     0.21.3
+Version:     0.21.4
  Release:     0
  Group:       Multimedia/Framework
  License:     Apache-2.0 and BSD-3-Clause
author	Inki Dae <inki.dae@samsung.com>
	Fri, 13 May 2022 07:39:40 +0000 (16:39 +0900)
committer	Inki Dae <inki.dae@samsung.com>
	Wed, 20 Jul 2022 05:16:40 +0000 (14:16 +0900)
mv_machine_learning/face_recognition/include/face_recognition.h		patch \| blob \| history
mv_machine_learning/face_recognition/include/simple_shot.h		patch \| blob \| history
mv_machine_learning/face_recognition/src/face_recognition.cpp		patch \| blob \| history
mv_machine_learning/face_recognition/src/mv_face_recognition_open.cpp		patch \| blob \| history
mv_machine_learning/face_recognition/src/simple_shot.cpp		patch \| blob \| history
mv_machine_learning/inference/include/inference_engine_helper.h		patch \| blob \| history
mv_machine_learning/inference/src/inference_engine_helper.cpp		patch \| blob \| history
mv_machine_learning/training/include/training_model.h		patch \| blob \| history
mv_machine_learning/training/src/training_model.cpp		patch \| blob \| history
packaging/capi-media-vision.spec		patch \| blob \| history