mv_machine_learning: drop backbone model code dependency
authorInki Dae <inki.dae@samsung.com>
Thu, 19 Jan 2023 01:25:49 +0000 (10:25 +0900)
committerKwanghoon Son <k.son@samsung.com>
Mon, 13 Feb 2023 02:31:48 +0000 (11:31 +0900)
[Issue type] code refactoring

Did code refactoring by dropping backbone model code dependency.

With this patch, we can change facenet to other without code modification.
Only what we have to do for the use of new backbone model is to update
each "value" attribute of two types of face_recognition.json file,
    "name"  : "FACE_RECOGNITION_INPUT_TENSOR_SHAPE",
    "type" : "array",
    "subtype" : "integer",
    "value" : [ 512, 1 ] <- input tensor shape of training model which is same
                            as output tensor shape of backbone model.

    "name"  : "FACENET_OUTPUT_TENSOR_NAME",
    "type"  : "string",
    "value" : "normalize/l2_normalize" <- output tensor name of backbone model.

Change-Id: I9e32c73e029d67a1c86e8b2c7b424cb09d614463
Signed-off-by: Inki Dae <inki.dae@samsung.com>
12 files changed:
mv_common/include/EngineConfig.h
mv_common/src/EngineConfig.cpp
mv_machine_learning/face_recognition/include/face_recognition.h
mv_machine_learning/face_recognition/include/face_recognition_type.h
mv_machine_learning/face_recognition/include/simple_shot.h
mv_machine_learning/face_recognition/meta/face_recognition.json
mv_machine_learning/face_recognition/src/face_recognition.cpp
mv_machine_learning/face_recognition/src/face_recognition_adapter.cpp
mv_machine_learning/face_recognition/src/facenet.cpp
mv_machine_learning/face_recognition/src/simple_shot.cpp
mv_machine_learning/training/include/training_model.h
mv_machine_learning/training/src/training_model.cpp

index 9be5ed6..29183b4 100644 (file)
@@ -39,6 +39,7 @@ using DictIntConstIter = std::map<std::string, int>::const_iterator;
 using DictBoolConstIter = std::map<std::string, bool>::const_iterator;
 using DictStrConstIter = std::map<std::string, std::string>::const_iterator;
 using DictVecStrConstIter = std::map<std::string, std::vector<std::string> >::const_iterator;
+using DictVecIntConstIter = std::map<std::string, std::vector<int> >::const_iterator;
 
 class EngineConfig
 {
@@ -119,7 +120,7 @@ public:
         *
         * @since_tizen @if MOBILE 2.4 @else 3.0 @endif
         * @param [in]  key          The string name of the attribute
-        * @param [out] value   r    The double attribute value to be obtained
+        * @param [out] value        The double attribute value to be obtained
         * @return @c MEDIA_VISION_ERROR_NONE on success,\n
         *         otherwise a negative error value
         * @retval #MEDIA_VISION_ERROR_KEY_NOT_AVAILABLE If attribute with name @a key
@@ -141,6 +142,19 @@ public:
        int getIntegerAttribute(const std::string &key, int *value) const;
 
        /**
+        * @brief Gets integer attribute value by attribute name.
+        *
+        * @since_tizen 7.5
+        * @param [in]  key          The string name of the attribute
+        * @param [out] value         The vector attribute value of integer to be obtained
+        * @return @c MEDIA_VISION_ERROR_NONE on success,\n
+        *         otherwise a negative error value
+        * @retval #MEDIA_VISION_ERROR_KEY_NOT_AVAILABLE If attribute with name @a key
+        *         doesn't exist in the engine configuration dictionary
+        */
+       int getIntegerAttribute(const std::string &key, std::vector<int> *value) const;
+
+       /**
         * @brief Gets boolean attribute value by attribute name.
         *
         * @since_tizen @if MOBILE 2.4 @else 3.0 @endif
@@ -184,6 +198,7 @@ public:
        const std::map<std::string, bool> &getDefaultBoolDict();
        const std::map<std::string, std::string> &getDefaultStrDict();
        const std::map<std::string, std::vector<std::string> > &getDefaultVecStrDict();
+       const std::map<std::string, std::vector<int> > &getDefaultVecIntDict();
 
 private:
        std::map<std::string, double> __dblDict;
@@ -191,6 +206,7 @@ private:
        std::map<std::string, bool> __boolDict;
        std::map<std::string, std::string> __strDict;
        std::map<std::string, std::vector<std::string> > __vecStrDict;
+       std::map<std::string, std::vector<int> > __vecIntDict;
 
        int loadDictionaries(std::string &config_file_path);
 };
index 212be17..19a42f1 100644 (file)
@@ -149,6 +149,24 @@ int EngineConfig::getIntegerAttribute(const std::string &key, int *value) const
        return MEDIA_VISION_ERROR_NONE;
 }
 
+int EngineConfig::getIntegerAttribute(const std::string &key, std::vector<int> *value) const
+{
+       DictVecIntConstIter dictIter = __vecIntDict.find(key);
+       if (dictIter == __vecIntDict.end()) {
+               LOGE("Attempt to access to the unsupported vector attribute [%s] of integer "
+                        "of the engine config %p",
+                        key.c_str(), this);
+               return MEDIA_VISION_ERROR_KEY_NOT_AVAILABLE;
+       }
+
+       LOGD("Get vector attribute of integer from the engine config %p. [%s] = [%d,...]", this, dictIter->first.c_str(),
+                dictIter->second[0]);
+
+       *value = dictIter->second;
+
+       return MEDIA_VISION_ERROR_NONE;
+}
+
 int EngineConfig::getBooleanAttribute(const std::string &key, bool *value) const
 {
        DictBoolConstIter dictIter = __boolDict.find(key);
@@ -229,6 +247,11 @@ const std::map<std::string, std::vector<std::string> > &EngineConfig::getDefault
        return __vecStrDict;
 }
 
+const std::map<std::string, std::vector<int> > &EngineConfig::getDefaultVecIntDict()
+{
+       return __vecIntDict;
+}
+
 int EngineConfig::loadDictionaries(std::string &config_file_path)
 {
        LOGI("Start to cache default attributes from engine configuration file.");
@@ -238,6 +261,7 @@ int EngineConfig::loadDictionaries(std::string &config_file_path)
        __boolDict.clear();
        __strDict.clear();
        __vecStrDict.clear();
+       __vecIntDict.clear();
 
        const char *conf_file = config_file_path.c_str();
        GError *error = NULL;
@@ -297,10 +321,10 @@ int EngineConfig::loadDictionaries(std::string &config_file_path)
                        continue;
                }
 
-               const char *nameStr = (const char *) json_object_get_string_member(attr_obj, "name");
+               const std::string nameStr = (const char *) json_object_get_string_member(attr_obj, "name");
                const char *typeStr = (const char *) json_object_get_string_member(attr_obj, "type");
 
-               if (NULL == nameStr || NULL == typeStr) {
+               if (nameStr.empty() || NULL == typeStr) {
                        LOGW("Attribute %i wasn't parsed from json file. name and/or "
                                 "type of the attribute are parsed as NULL.",
                                 attrInd);
@@ -308,13 +332,13 @@ int EngineConfig::loadDictionaries(std::string &config_file_path)
                }
 
                if (0 == strcmp("double", typeStr)) {
-                       __dblDict[std::string(nameStr)] = (double) json_object_get_double_member(attr_obj, "value");
+                       __dblDict[nameStr] = (double) json_object_get_double_member(attr_obj, "value");
                } else if (0 == strcmp("integer", typeStr)) {
-                       __intDict[std::string(nameStr)] = (int) json_object_get_int_member(attr_obj, "value");
+                       __intDict[nameStr] = (int) json_object_get_int_member(attr_obj, "value");
                } else if (0 == strcmp("boolean", typeStr)) {
-                       __boolDict[std::string(nameStr)] = json_object_get_boolean_member(attr_obj, "value") ? true : false;
+                       __boolDict[nameStr] = json_object_get_boolean_member(attr_obj, "value") ? true : false;
                } else if (0 == strcmp("string", typeStr)) {
-                       __strDict[std::string(nameStr)] = (char *) json_object_get_string_member(attr_obj, "value");
+                       __strDict[nameStr] = (char *) json_object_get_string_member(attr_obj, "value");
                } else if (0 == strcmp("array", typeStr)) {
                        const char *subTypeStr = (const char *) json_object_get_string_member(attr_obj, "subtype");
 
@@ -328,13 +352,24 @@ int EngineConfig::loadDictionaries(std::string &config_file_path)
                                for (unsigned int item = 0; item < json_array_get_length(attr_array); ++item) {
                                        defaultVecStr.push_back(std::string(json_array_get_string_element(attr_array, item)));
                                }
-                               __vecStrDict[std::string(nameStr)] = defaultVecStr;
+                               __vecStrDict[nameStr] = defaultVecStr;
                        }
+
+                       if (0 == strcmp("integer", subTypeStr)) {
+                               JsonArray *attr_array = json_object_get_array_member(attr_obj, "value");
+                               std::vector<int> defaultVecInt;
+
+                               for (unsigned int item = 0; item < json_array_get_length(attr_array); ++item) {
+                                       defaultVecInt.push_back(static_cast<int>(json_array_get_int_element(attr_array, item)));
+                               }
+                               __vecIntDict[nameStr] = defaultVecInt;
+                       }
+
                        //TO-DO: add other subtypes
                } else {
                        LOGW("Attribute %i:%s wasn't parsed from json file. "
                                 "Type isn't supported.",
-                                attrInd, nameStr);
+                                attrInd, nameStr.c_str());
                        continue;
                }
        }
index bcc4689..f280e2f 100644 (file)
@@ -82,6 +82,7 @@ struct FaceRecognitionConfig {
        std::string label_file_path;
        std::string feature_vector_file_path;
        double decision_threshold;
+       std::vector<size_t> input_tensor_shape;
 };
 
 class FaceRecognition
index 649bc54..422040e 100644 (file)
  * @brief Defines #MV_FACENET_MODEL_FILE_PATH
  *        to set the backbone model file path.
  * @details This model file is used to extract the feature vectors from a given face image data.
- *
- * @since_tizen 7.0
- * @see mv_engine_config_set_string_attribute()
- * @see mv_engine_config_get_string_attribute()
  */
 #define MV_FACENET_MODEL_FILE_PATH "FACENET_MODEL_FILE_PATH"
 
  * @brief Defines #MV_FACENET_MODEL_META_FILE_PATH
  *        to set the backbone model meta file path.
  * @details This model meta file is used to provide input and output tensor info of a given model file.
- *
- * @since_tizen 7.0
- * @see mv_engine_config_set_string_attribute()
- * @see mv_engine_config_get_string_attribute()
  */
 #define MV_FACENET_MODEL_META_FILE_PATH "FACENET_MODEL_META_FILE_PATH"
 
  * @brief Defines #MV_FACE_RECOGNITION_DEFAULT_PATH
  *        to set the path where the training relevant files are created.
  * @details This path is used as a default location where the trained model, label and feature vector files are created.
- *
- * @since_tizen 7.0
- * @see mv_engine_config_set_string_attribute()
- * @see mv_engine_config_get_string_attribute()
  */
 #define MV_FACE_RECOGNITION_DEFAULT_PATH "FACE_RECOGNITION_DEFAULT_PATH"
 
 /**
  * @brief Defines #MV_FACE_RECOGNITION_DECISION_THRESHOLD
  *        to set the decision threshold file+.
- * @details This file is used to determine face recognition result with a given face image data is true or false..
- *
- * @since_tizen 7.0
- * @see mv_engine_config_set_string_attribute()
- * @see mv_engine_config_get_string_attribute()
+ * @details This file is used to determine face recognition result with a given face image data is true or false.
  */
 #define MV_FACE_RECOGNITION_DECISION_THRESHOLD "FACE_RECOGNITION_DECISION_THRESHOLD"
 
+/**
+ * @brief Defines #MV_FACE_RECOGNITION_INPUT_TENSOR_SHAPE
+ *        to set input tensor shape.
+ * @details This file is used to describe input tensor shape.
+ */
+#define MV_FACE_RECOGNITION_INPUT_TENSOR_SHAPE "FACE_RECOGNITION_INPUT_TENSOR_SHAPE"
+
 struct facenet_input_s {
        std::vector<mv_source_h> inputs;
 };
index 3866cda..6f9c47d 100644 (file)
@@ -30,9 +30,8 @@ private:
        void RemoveModel(const std::string file_path) override;
 
 public:
-       SimpleShot(const training_backend_type_e backend_type = TRAINING_BACKEND_NNTRAINER,
-                          const training_target_type_e target_type = TRAINING_TARGET_CPU,
-                          const std::string internal_model_file = "model_and_weights.ini");
+       SimpleShot(const training_backend_type_e backend_type, const training_target_type_e target_type,
+                          const std::vector<size_t> input_tensor_shape, const std::string internal_model_file);
        ~SimpleShot();
 
        // Configure layers for SimpleShot learning.
index 1076f4c..36451aa 100644 (file)
             "value" : "/home/owner/media/res/face_recognition/training/"
         },
         {
+            "name"  : "FACE_RECOGNITION_INPUT_TENSOR_SHAPE",
+            "type" : "array",
+            "subtype" : "integer",
+            "value" : [ 512, 1 ]
+        },
+        {
             "name"  : "FACENET_MODEL_BACKEND_TYPE",
             "type"  : "integer",
             "value" : 1
index 6ffc4d7..c7a9c3e 100644 (file)
@@ -74,7 +74,7 @@ void FaceRecognition::CheckFeatureVectorFile(string fv_file_name, string new_fv_
        }
 
        // Make sure to remove a temp file in case that current process is terminated just after generating new feature vector file
-       // which is not correct file but existing one isn't removed. In this cae, existing file is used again.
+       // which is not correct file but existing one isn't removed. In this case, existing file is used again.
        if (FaceRecogUtil::IsFileExist(new_fv_file_name)) {
                int ret = ::remove(new_fv_file_name.c_str());
                if (ret)
@@ -160,7 +160,7 @@ int FaceRecognition::GetVecFromMvSource(mv_source_h img_src, std::vector<float>
 int FaceRecognition::Initialize()
 {
        _training_model = make_unique<SimpleShot>(_config.training_engine_backend_type, _config.training_target_device_type,
-                                                                                         _config.internal_model_file_path);
+                                                                                         _config.input_tensor_shape, _config.internal_model_file_path);
 
        _internal = make_unique<Inference>();
 
@@ -389,7 +389,7 @@ int FaceRecognition::DeleteLabel(string label_name)
                ImportLabel();
 
                if (_label_manager->IsExist(label_name) == false) {
-                       LOGE("%s doesn't exsit in label file.", label_name.c_str());
+                       LOGE("%s doesn't exist in label file.", label_name.c_str());
                        return MEDIA_VISION_ERROR_INVALID_OPERATION;
                }
 
index 1fe4ade..65a8c53 100644 (file)
@@ -57,6 +57,11 @@ template<typename T, typename V> void FaceRecognitionAdapter<T, V>::configure()
        if (ret != MEDIA_VISION_ERROR_NONE)
                throw InvalidOperation("Fail to get default decision threshold value.");
 
+       std::vector<int> vecIntValues;
+       ret = _config->getIntegerAttribute(string(MV_FACE_RECOGNITION_INPUT_TENSOR_SHAPE), &vecIntValues);
+       if (ret != MEDIA_VISION_ERROR_NONE)
+               throw InvalidOperation("Fail to get input tensor shape.");
+
        FaceRecognitionConfig config = { TRAINING_TARGET_CPU, // device type for training.
                                                                         TRAINING_BACKEND_NNTRAINER, // backend type for training.
                                                                         MV_INFERENCE_TARGET_DEVICE_CPU, // device type for internal model interface.
@@ -66,6 +71,9 @@ template<typename T, typename V> void FaceRecognitionAdapter<T, V>::configure()
                                                                         string(defaultPath) + "feature_vector_file.dat",
                                                                         decisionThreshold };
 
+       for (auto value : vecIntValues)
+               config.input_tensor_shape.push_back(static_cast<size_t>(value));
+
        _face_recognition->SetConfig(config);
 }
 
index f878475..9834e35 100644 (file)
@@ -100,6 +100,7 @@ void Facenet::prepare()
        if (ret != MEDIA_VISION_ERROR_NONE)
                throw InvalidOperation("Fail to load model files.");
 }
+
 void Facenet::preprocess(mv_source_h &mv_src)
 {
        LOGI("ENTER");
index 2f49344..ef46f9d 100644 (file)
@@ -34,13 +34,20 @@ using namespace TrainingEngineInterface::Common;
 using namespace mediavision::machine_learning::exception;
 
 SimpleShot::SimpleShot(const training_backend_type_e backend_type, const training_target_type_e target_type,
-                                          const string internal_model_file)
-               : TrainingModel(backend_type, target_type, internal_model_file)
+                                          vector<size_t> input_tensor_shape, const string internal_model_file)
+               : TrainingModel(backend_type, target_type, input_tensor_shape, internal_model_file)
 {
-       const inference_engine_tensor_info input_tensor_info = {
-               { 512, 1, 1, 1 }, INFERENCE_TENSOR_SHAPE_NCHW, INFERENCE_TENSOR_DATA_TYPE_FLOAT32, (size_t)(512 * 1 * 1 * 1)
+       // Make sure to initialize input_tesnor_info.shape with { 1, 1, 1, 1}
+       // because 4 channels are used as N C H W.
+       inference_engine_tensor_info input_tensor_info = {
+               { 1, 1, 1, 1 }, INFERENCE_TENSOR_SHAPE_NCHW, INFERENCE_TENSOR_DATA_TYPE_FLOAT32, 1
        };
 
+       for (auto idx = 0; idx < input_tensor_shape.size(); ++idx) {
+               input_tensor_info.size *= input_tensor_shape[idx];
+               input_tensor_info.shape[idx] = input_tensor_shape[idx];
+       }
+
        _engine_info.input_layer_names.push_back("preprocess_l2norm0");
        _engine_info.input_tensor_info.push_back(input_tensor_info);
 
@@ -80,13 +87,24 @@ void SimpleShot::ConfigureModel(int num_of_class)
        if (!knn)
                throw InvalidOperation("Fail to create knn layer.");
 
+       inference_engine_tensor_info input_tensor_info = _engine_info.input_tensor_info[0];
+       string input_shape_str = "input_shape=";
+
+       // NNTrainer needs revered tensor order so make sure to reverse the tensor order.
+       reverse(input_tensor_info.shape.begin(), input_tensor_info.shape.end());
+
+       for (auto shape_idx = 0; shape_idx < input_tensor_info.shape.size(); ++shape_idx) {
+               input_shape_str += to_string(input_tensor_info.shape[shape_idx]);
+               if (shape_idx < input_tensor_info.shape.size() - 1)
+                       input_shape_str += ":";
+       }
+
        // Ps. In case of the first layer, input_shape property is mandatorily required.
-       // 1:192 is a shape of backbone model output tensor.
-       training_engine_layer_property l2norm_property = { .options = { "input_shape=1:512", "trainable=false" } };
+       training_engine_layer_property l2norm_property = { .options = { input_shape_str.c_str(), "trainable=false" } };
 
        int ret = _training->SetLayerProperty(l2norm.get(), l2norm_property);
        if (ret != TRAINING_ENGINE_ERROR_NONE)
-               throw InvalidOperation("Fail to set layer propery.");
+               throw InvalidOperation("Fail to set layer property.");
 
        const string num_class_prop = "num_class=" + to_string(num_of_class);
        training_engine_layer_property knn_property = { .options = { num_class_prop, "trainable=false" } };
index ba99ae1..ee49b9e 100644 (file)
@@ -50,9 +50,8 @@ protected:
        std::string _internal_model_file;
 
 public:
-       TrainingModel(const training_backend_type_e backend_type = TRAINING_BACKEND_NNTRAINER,
-                                 const training_target_type_e target_type = TRAINING_TARGET_CPU,
-                                 const std::string internal_model_file = "model_and_weights.ini");
+       TrainingModel(const training_backend_type_e backend_type, const training_target_type_e target_type,
+                                 const std::vector<size_t> input_tensor_shape, const std::string internal_model_file);
        virtual ~TrainingModel();
 
        void ApplyDataSet(std::unique_ptr<DataSetManager> &data_set);
index 1432576..9f20fab 100644 (file)
@@ -34,7 +34,7 @@ using namespace TrainingEngineInterface::Common;
 using namespace mediavision::machine_learning::exception;
 
 TrainingModel::TrainingModel(const training_backend_type_e backend_type, const training_target_type_e target_type,
-                                                        const string internal_model_file)
+                                                        vector<size_t> input_tensor_shape, const string internal_model_file)
 {
        _internal_model_file = internal_model_file;
        _training = make_unique<TrainingEngineInterface::Common::TrainingEngineCommon>();