mv_machine_learning: drop backbone model code dependency
authorInki Dae <inki.dae@samsung.com>
Thu, 19 Jan 2023 01:25:49 +0000 (10:25 +0900)
committerKwanghoon Son <k.son@samsung.com>
Mon, 13 Feb 2023 02:31:48 +0000 (11:31 +0900)
[Issue type] code refactoring

Did code refactoring by dropping backbone model code dependency.

With this patch, we can change facenet to other without code modification.
Only what we have to do for the use of new backbone model is to update
each "value" attribute of two types of face_recognition.json file,
    "name"  : "FACE_RECOGNITION_INPUT_TENSOR_SHAPE",
    "type" : "array",
    "subtype" : "integer",
    "value" : [ 512, 1 ] <- input tensor shape of training model which is same
                            as output tensor shape of backbone model.

    "name"  : "FACENET_OUTPUT_TENSOR_NAME",
    "type"  : "string",
    "value" : "normalize/l2_normalize" <- output tensor name of backbone model.

Change-Id: I9e32c73e029d67a1c86e8b2c7b424cb09d614463
Signed-off-by: Inki Dae <inki.dae@samsung.com>
12 files changed:
mv_common/include/EngineConfig.h
mv_common/src/EngineConfig.cpp
mv_machine_learning/face_recognition/include/face_recognition.h
mv_machine_learning/face_recognition/include/face_recognition_type.h
mv_machine_learning/face_recognition/include/simple_shot.h
mv_machine_learning/face_recognition/meta/face_recognition.json
mv_machine_learning/face_recognition/src/face_recognition.cpp
mv_machine_learning/face_recognition/src/face_recognition_adapter.cpp
mv_machine_learning/face_recognition/src/facenet.cpp
mv_machine_learning/face_recognition/src/simple_shot.cpp
mv_machine_learning/training/include/training_model.h
mv_machine_learning/training/src/training_model.cpp

index 9be5ed66b775dc7f93aa70d15fb1e87d64002260..29183b4bc9f345f2091ad130fb51dffc769334e2 100644 (file)
@@ -39,6 +39,7 @@ using DictIntConstIter = std::map<std::string, int>::const_iterator;
 using DictBoolConstIter = std::map<std::string, bool>::const_iterator;
 using DictStrConstIter = std::map<std::string, std::string>::const_iterator;
 using DictVecStrConstIter = std::map<std::string, std::vector<std::string> >::const_iterator;
+using DictVecIntConstIter = std::map<std::string, std::vector<int> >::const_iterator;
 
 class EngineConfig
 {
@@ -119,7 +120,7 @@ public:
         *
         * @since_tizen @if MOBILE 2.4 @else 3.0 @endif
         * @param [in]  key          The string name of the attribute
-        * @param [out] value   r    The double attribute value to be obtained
+        * @param [out] value        The double attribute value to be obtained
         * @return @c MEDIA_VISION_ERROR_NONE on success,\n
         *         otherwise a negative error value
         * @retval #MEDIA_VISION_ERROR_KEY_NOT_AVAILABLE If attribute with name @a key
@@ -140,6 +141,19 @@ public:
         */
        int getIntegerAttribute(const std::string &key, int *value) const;
 
+       /**
+        * @brief Gets integer attribute value by attribute name.
+        *
+        * @since_tizen 7.5
+        * @param [in]  key          The string name of the attribute
+        * @param [out] value         The vector attribute value of integer to be obtained
+        * @return @c MEDIA_VISION_ERROR_NONE on success,\n
+        *         otherwise a negative error value
+        * @retval #MEDIA_VISION_ERROR_KEY_NOT_AVAILABLE If attribute with name @a key
+        *         doesn't exist in the engine configuration dictionary
+        */
+       int getIntegerAttribute(const std::string &key, std::vector<int> *value) const;
+
        /**
         * @brief Gets boolean attribute value by attribute name.
         *
@@ -184,6 +198,7 @@ public:
        const std::map<std::string, bool> &getDefaultBoolDict();
        const std::map<std::string, std::string> &getDefaultStrDict();
        const std::map<std::string, std::vector<std::string> > &getDefaultVecStrDict();
+       const std::map<std::string, std::vector<int> > &getDefaultVecIntDict();
 
 private:
        std::map<std::string, double> __dblDict;
@@ -191,6 +206,7 @@ private:
        std::map<std::string, bool> __boolDict;
        std::map<std::string, std::string> __strDict;
        std::map<std::string, std::vector<std::string> > __vecStrDict;
+       std::map<std::string, std::vector<int> > __vecIntDict;
 
        int loadDictionaries(std::string &config_file_path);
 };
index 212be17d53e7ae59946cc0396916c3603f783dd9..19a42f1cff0c3c30016bfad24699df40a084d8b5 100644 (file)
@@ -149,6 +149,24 @@ int EngineConfig::getIntegerAttribute(const std::string &key, int *value) const
        return MEDIA_VISION_ERROR_NONE;
 }
 
+int EngineConfig::getIntegerAttribute(const std::string &key, std::vector<int> *value) const
+{
+       DictVecIntConstIter dictIter = __vecIntDict.find(key);
+       if (dictIter == __vecIntDict.end()) {
+               LOGE("Attempt to access to the unsupported vector attribute [%s] of integer "
+                        "of the engine config %p",
+                        key.c_str(), this);
+               return MEDIA_VISION_ERROR_KEY_NOT_AVAILABLE;
+       }
+
+       LOGD("Get vector attribute of integer from the engine config %p. [%s] = [%d,...]", this, dictIter->first.c_str(),
+                dictIter->second[0]);
+
+       *value = dictIter->second;
+
+       return MEDIA_VISION_ERROR_NONE;
+}
+
 int EngineConfig::getBooleanAttribute(const std::string &key, bool *value) const
 {
        DictBoolConstIter dictIter = __boolDict.find(key);
@@ -229,6 +247,11 @@ const std::map<std::string, std::vector<std::string> > &EngineConfig::getDefault
        return __vecStrDict;
 }
 
+const std::map<std::string, std::vector<int> > &EngineConfig::getDefaultVecIntDict()
+{
+       return __vecIntDict;
+}
+
 int EngineConfig::loadDictionaries(std::string &config_file_path)
 {
        LOGI("Start to cache default attributes from engine configuration file.");
@@ -238,6 +261,7 @@ int EngineConfig::loadDictionaries(std::string &config_file_path)
        __boolDict.clear();
        __strDict.clear();
        __vecStrDict.clear();
+       __vecIntDict.clear();
 
        const char *conf_file = config_file_path.c_str();
        GError *error = NULL;
@@ -297,10 +321,10 @@ int EngineConfig::loadDictionaries(std::string &config_file_path)
                        continue;
                }
 
-               const char *nameStr = (const char *) json_object_get_string_member(attr_obj, "name");
+               const std::string nameStr = (const char *) json_object_get_string_member(attr_obj, "name");
                const char *typeStr = (const char *) json_object_get_string_member(attr_obj, "type");
 
-               if (NULL == nameStr || NULL == typeStr) {
+               if (nameStr.empty() || NULL == typeStr) {
                        LOGW("Attribute %i wasn't parsed from json file. name and/or "
                                 "type of the attribute are parsed as NULL.",
                                 attrInd);
@@ -308,13 +332,13 @@ int EngineConfig::loadDictionaries(std::string &config_file_path)
                }
 
                if (0 == strcmp("double", typeStr)) {
-                       __dblDict[std::string(nameStr)] = (double) json_object_get_double_member(attr_obj, "value");
+                       __dblDict[nameStr] = (double) json_object_get_double_member(attr_obj, "value");
                } else if (0 == strcmp("integer", typeStr)) {
-                       __intDict[std::string(nameStr)] = (int) json_object_get_int_member(attr_obj, "value");
+                       __intDict[nameStr] = (int) json_object_get_int_member(attr_obj, "value");
                } else if (0 == strcmp("boolean", typeStr)) {
-                       __boolDict[std::string(nameStr)] = json_object_get_boolean_member(attr_obj, "value") ? true : false;
+                       __boolDict[nameStr] = json_object_get_boolean_member(attr_obj, "value") ? true : false;
                } else if (0 == strcmp("string", typeStr)) {
-                       __strDict[std::string(nameStr)] = (char *) json_object_get_string_member(attr_obj, "value");
+                       __strDict[nameStr] = (char *) json_object_get_string_member(attr_obj, "value");
                } else if (0 == strcmp("array", typeStr)) {
                        const char *subTypeStr = (const char *) json_object_get_string_member(attr_obj, "subtype");
 
@@ -328,13 +352,24 @@ int EngineConfig::loadDictionaries(std::string &config_file_path)
                                for (unsigned int item = 0; item < json_array_get_length(attr_array); ++item) {
                                        defaultVecStr.push_back(std::string(json_array_get_string_element(attr_array, item)));
                                }
-                               __vecStrDict[std::string(nameStr)] = defaultVecStr;
+                               __vecStrDict[nameStr] = defaultVecStr;
                        }
+
+                       if (0 == strcmp("integer", subTypeStr)) {
+                               JsonArray *attr_array = json_object_get_array_member(attr_obj, "value");
+                               std::vector<int> defaultVecInt;
+
+                               for (unsigned int item = 0; item < json_array_get_length(attr_array); ++item) {
+                                       defaultVecInt.push_back(static_cast<int>(json_array_get_int_element(attr_array, item)));
+                               }
+                               __vecIntDict[nameStr] = defaultVecInt;
+                       }
+
                        //TO-DO: add other subtypes
                } else {
                        LOGW("Attribute %i:%s wasn't parsed from json file. "
                                 "Type isn't supported.",
-                                attrInd, nameStr);
+                                attrInd, nameStr.c_str());
                        continue;
                }
        }
index bcc46891e605b8c52b79cc120b2934080f32c560..f280e2ff94f5326fecb5612a6eb1a31509024c24 100644 (file)
@@ -82,6 +82,7 @@ struct FaceRecognitionConfig {
        std::string label_file_path;
        std::string feature_vector_file_path;
        double decision_threshold;
+       std::vector<size_t> input_tensor_shape;
 };
 
 class FaceRecognition
index 649bc54efa3eeead54ad2cf153d36f0a234f24ca..422040e488a0e5e803b79cb0210804cc5e8ea7e5 100644 (file)
  * @brief Defines #MV_FACENET_MODEL_FILE_PATH
  *        to set the backbone model file path.
  * @details This model file is used to extract the feature vectors from a given face image data.
- *
- * @since_tizen 7.0
- * @see mv_engine_config_set_string_attribute()
- * @see mv_engine_config_get_string_attribute()
  */
 #define MV_FACENET_MODEL_FILE_PATH "FACENET_MODEL_FILE_PATH"
 
  * @brief Defines #MV_FACENET_MODEL_META_FILE_PATH
  *        to set the backbone model meta file path.
  * @details This model meta file is used to provide input and output tensor info of a given model file.
- *
- * @since_tizen 7.0
- * @see mv_engine_config_set_string_attribute()
- * @see mv_engine_config_get_string_attribute()
  */
 #define MV_FACENET_MODEL_META_FILE_PATH "FACENET_MODEL_META_FILE_PATH"
 
  * @brief Defines #MV_FACE_RECOGNITION_DEFAULT_PATH
  *        to set the path where the training relevant files are created.
  * @details This path is used as a default location where the trained model, label and feature vector files are created.
- *
- * @since_tizen 7.0
- * @see mv_engine_config_set_string_attribute()
- * @see mv_engine_config_get_string_attribute()
  */
 #define MV_FACE_RECOGNITION_DEFAULT_PATH "FACE_RECOGNITION_DEFAULT_PATH"
 
 /**
  * @brief Defines #MV_FACE_RECOGNITION_DECISION_THRESHOLD
  *        to set the decision threshold file+.
- * @details This file is used to determine face recognition result with a given face image data is true or false..
- *
- * @since_tizen 7.0
- * @see mv_engine_config_set_string_attribute()
- * @see mv_engine_config_get_string_attribute()
+ * @details This file is used to determine face recognition result with a given face image data is true or false.
  */
 #define MV_FACE_RECOGNITION_DECISION_THRESHOLD "FACE_RECOGNITION_DECISION_THRESHOLD"
 
+/**
+ * @brief Defines #MV_FACE_RECOGNITION_INPUT_TENSOR_SHAPE
+ *        to set input tensor shape.
+ * @details This file is used to describe input tensor shape.
+ */
+#define MV_FACE_RECOGNITION_INPUT_TENSOR_SHAPE "FACE_RECOGNITION_INPUT_TENSOR_SHAPE"
+
 struct facenet_input_s {
        std::vector<mv_source_h> inputs;
 };
index 3866cda3f421030b5159df86dfe8f0b198e4b013..6f9c47d450481567c33b5700bb65cb82cf29a06f 100644 (file)
@@ -30,9 +30,8 @@ private:
        void RemoveModel(const std::string file_path) override;
 
 public:
-       SimpleShot(const training_backend_type_e backend_type = TRAINING_BACKEND_NNTRAINER,
-                          const training_target_type_e target_type = TRAINING_TARGET_CPU,
-                          const std::string internal_model_file = "model_and_weights.ini");
+       SimpleShot(const training_backend_type_e backend_type, const training_target_type_e target_type,
+                          const std::vector<size_t> input_tensor_shape, const std::string internal_model_file);
        ~SimpleShot();
 
        // Configure layers for SimpleShot learning.
index 1076f4c398b157e25f78a2c65c75735112e6896f..36451aa47b45bbe17a2a3e62f59b7ac4389948a3 100644 (file)
             "type"  : "string",
             "value" : "/home/owner/media/res/face_recognition/training/"
         },
+        {
+            "name"  : "FACE_RECOGNITION_INPUT_TENSOR_SHAPE",
+            "type" : "array",
+            "subtype" : "integer",
+            "value" : [ 512, 1 ]
+        },
         {
             "name"  : "FACENET_MODEL_BACKEND_TYPE",
             "type"  : "integer",
index 6ffc4d70d44741548d7eb2dea5f7f3a8bfc8c68a..c7a9c3e107314059fd126520104dcaebea0fa915 100644 (file)
@@ -74,7 +74,7 @@ void FaceRecognition::CheckFeatureVectorFile(string fv_file_name, string new_fv_
        }
 
        // Make sure to remove a temp file in case that current process is terminated just after generating new feature vector file
-       // which is not correct file but existing one isn't removed. In this cae, existing file is used again.
+       // which is not correct file but existing one isn't removed. In this case, existing file is used again.
        if (FaceRecogUtil::IsFileExist(new_fv_file_name)) {
                int ret = ::remove(new_fv_file_name.c_str());
                if (ret)
@@ -160,7 +160,7 @@ int FaceRecognition::GetVecFromMvSource(mv_source_h img_src, std::vector<float>
 int FaceRecognition::Initialize()
 {
        _training_model = make_unique<SimpleShot>(_config.training_engine_backend_type, _config.training_target_device_type,
-                                                                                         _config.internal_model_file_path);
+                                                                                         _config.input_tensor_shape, _config.internal_model_file_path);
 
        _internal = make_unique<Inference>();
 
@@ -389,7 +389,7 @@ int FaceRecognition::DeleteLabel(string label_name)
                ImportLabel();
 
                if (_label_manager->IsExist(label_name) == false) {
-                       LOGE("%s doesn't exsit in label file.", label_name.c_str());
+                       LOGE("%s doesn't exist in label file.", label_name.c_str());
                        return MEDIA_VISION_ERROR_INVALID_OPERATION;
                }
 
index 1fe4ade284175e30f0a5166c8a88a58f78cd4e8c..65a8c5332b9f221ad73e3cb0b7724a8d25815c59 100644 (file)
@@ -57,6 +57,11 @@ template<typename T, typename V> void FaceRecognitionAdapter<T, V>::configure()
        if (ret != MEDIA_VISION_ERROR_NONE)
                throw InvalidOperation("Fail to get default decision threshold value.");
 
+       std::vector<int> vecIntValues;
+       ret = _config->getIntegerAttribute(string(MV_FACE_RECOGNITION_INPUT_TENSOR_SHAPE), &vecIntValues);
+       if (ret != MEDIA_VISION_ERROR_NONE)
+               throw InvalidOperation("Fail to get input tensor shape.");
+
        FaceRecognitionConfig config = { TRAINING_TARGET_CPU, // device type for training.
                                                                         TRAINING_BACKEND_NNTRAINER, // backend type for training.
                                                                         MV_INFERENCE_TARGET_DEVICE_CPU, // device type for internal model interface.
@@ -66,6 +71,9 @@ template<typename T, typename V> void FaceRecognitionAdapter<T, V>::configure()
                                                                         string(defaultPath) + "feature_vector_file.dat",
                                                                         decisionThreshold };
 
+       for (auto value : vecIntValues)
+               config.input_tensor_shape.push_back(static_cast<size_t>(value));
+
        _face_recognition->SetConfig(config);
 }
 
index f8784756e56d6dbd6caefd43b56898215d65a402..9834e353c051fa1a51418c9f273fcbd2cc2367b9 100644 (file)
@@ -100,6 +100,7 @@ void Facenet::prepare()
        if (ret != MEDIA_VISION_ERROR_NONE)
                throw InvalidOperation("Fail to load model files.");
 }
+
 void Facenet::preprocess(mv_source_h &mv_src)
 {
        LOGI("ENTER");
index 2f49344aa741c8baf9ffd9ad2ee40e8a9b2b2715..ef46f9d0287359513fe5a6c8c8ee93e89ed8313f 100644 (file)
@@ -34,13 +34,20 @@ using namespace TrainingEngineInterface::Common;
 using namespace mediavision::machine_learning::exception;
 
 SimpleShot::SimpleShot(const training_backend_type_e backend_type, const training_target_type_e target_type,
-                                          const string internal_model_file)
-               : TrainingModel(backend_type, target_type, internal_model_file)
+                                          vector<size_t> input_tensor_shape, const string internal_model_file)
+               : TrainingModel(backend_type, target_type, input_tensor_shape, internal_model_file)
 {
-       const inference_engine_tensor_info input_tensor_info = {
-               { 512, 1, 1, 1 }, INFERENCE_TENSOR_SHAPE_NCHW, INFERENCE_TENSOR_DATA_TYPE_FLOAT32, (size_t)(512 * 1 * 1 * 1)
+       // Make sure to initialize input_tesnor_info.shape with { 1, 1, 1, 1}
+       // because 4 channels are used as N C H W.
+       inference_engine_tensor_info input_tensor_info = {
+               { 1, 1, 1, 1 }, INFERENCE_TENSOR_SHAPE_NCHW, INFERENCE_TENSOR_DATA_TYPE_FLOAT32, 1
        };
 
+       for (auto idx = 0; idx < input_tensor_shape.size(); ++idx) {
+               input_tensor_info.size *= input_tensor_shape[idx];
+               input_tensor_info.shape[idx] = input_tensor_shape[idx];
+       }
+
        _engine_info.input_layer_names.push_back("preprocess_l2norm0");
        _engine_info.input_tensor_info.push_back(input_tensor_info);
 
@@ -80,13 +87,24 @@ void SimpleShot::ConfigureModel(int num_of_class)
        if (!knn)
                throw InvalidOperation("Fail to create knn layer.");
 
+       inference_engine_tensor_info input_tensor_info = _engine_info.input_tensor_info[0];
+       string input_shape_str = "input_shape=";
+
+       // NNTrainer needs revered tensor order so make sure to reverse the tensor order.
+       reverse(input_tensor_info.shape.begin(), input_tensor_info.shape.end());
+
+       for (auto shape_idx = 0; shape_idx < input_tensor_info.shape.size(); ++shape_idx) {
+               input_shape_str += to_string(input_tensor_info.shape[shape_idx]);
+               if (shape_idx < input_tensor_info.shape.size() - 1)
+                       input_shape_str += ":";
+       }
+
        // Ps. In case of the first layer, input_shape property is mandatorily required.
-       // 1:192 is a shape of backbone model output tensor.
-       training_engine_layer_property l2norm_property = { .options = { "input_shape=1:512", "trainable=false" } };
+       training_engine_layer_property l2norm_property = { .options = { input_shape_str.c_str(), "trainable=false" } };
 
        int ret = _training->SetLayerProperty(l2norm.get(), l2norm_property);
        if (ret != TRAINING_ENGINE_ERROR_NONE)
-               throw InvalidOperation("Fail to set layer propery.");
+               throw InvalidOperation("Fail to set layer property.");
 
        const string num_class_prop = "num_class=" + to_string(num_of_class);
        training_engine_layer_property knn_property = { .options = { num_class_prop, "trainable=false" } };
index ba99ae1c5ad4edbe6d893d93c415e1de625616b5..ee49b9ee8707fe90fdfba2c7402fd3a543d34f05 100644 (file)
@@ -50,9 +50,8 @@ protected:
        std::string _internal_model_file;
 
 public:
-       TrainingModel(const training_backend_type_e backend_type = TRAINING_BACKEND_NNTRAINER,
-                                 const training_target_type_e target_type = TRAINING_TARGET_CPU,
-                                 const std::string internal_model_file = "model_and_weights.ini");
+       TrainingModel(const training_backend_type_e backend_type, const training_target_type_e target_type,
+                                 const std::vector<size_t> input_tensor_shape, const std::string internal_model_file);
        virtual ~TrainingModel();
 
        void ApplyDataSet(std::unique_ptr<DataSetManager> &data_set);
index 1432576e96eab308be5513dbc30871a3fd0cec5d..9f20fabd0fb09a085d182c3b95b81551ef347891 100644 (file)
@@ -34,7 +34,7 @@ using namespace TrainingEngineInterface::Common;
 using namespace mediavision::machine_learning::exception;
 
 TrainingModel::TrainingModel(const training_backend_type_e backend_type, const training_target_type_e target_type,
-                                                        const string internal_model_file)
+                                                        vector<size_t> input_tensor_shape, const string internal_model_file)
 {
        _internal_model_file = internal_model_file;
        _training = make_unique<TrainingEngineInterface::Common::TrainingEngineCommon>();