From: Inki Dae <inki.dae@samsung.com>
Date: Thu, 19 Jan 2023 01:25:49 +0000 (+0900)
Subject: mv_machine_learning: drop backbone model code dependency
X-Git-Tag: accepted/tizen/unified/20230220.174431~8
X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=d3767875a124f792cec21e3124c4d9ca9a0a405c;p=platform%2Fcore%2Fapi%2Fmediavision.git

mv_machine_learning: drop backbone model code dependency

[Issue type] code refactoring

Did code refactoring by dropping backbone model code dependency.

With this patch, we can change facenet to other without code modification.
Only what we have to do for the use of new backbone model is to update
each "value" attribute of two types of face_recognition.json file,
    "name"  : "FACE_RECOGNITION_INPUT_TENSOR_SHAPE",
    "type" : "array",
    "subtype" : "integer",
    "value" : [ 512, 1 ] <- input tensor shape of training model which is same
                            as output tensor shape of backbone model.

    "name"  : "FACENET_OUTPUT_TENSOR_NAME",
    "type"  : "string",
    "value" : "normalize/l2_normalize" <- output tensor name of backbone model.

Change-Id: I9e32c73e029d67a1c86e8b2c7b424cb09d614463
Signed-off-by: Inki Dae <inki.dae@samsung.com>
---

diff --git a/mv_common/include/EngineConfig.h b/mv_common/include/EngineConfig.h
index 9be5ed66..29183b4b 100644
--- a/mv_common/include/EngineConfig.h
+++ b/mv_common/include/EngineConfig.h
@@ -39,6 +39,7 @@ using DictIntConstIter = std::map<std::string, int>::const_iterator;
 using DictBoolConstIter = std::map<std::string, bool>::const_iterator;
 using DictStrConstIter = std::map<std::string, std::string>::const_iterator;
 using DictVecStrConstIter = std::map<std::string, std::vector<std::string> >::const_iterator;
+using DictVecIntConstIter = std::map<std::string, std::vector<int> >::const_iterator;
 
 class EngineConfig
 {
@@ -119,7 +120,7 @@ public:
 	 *
 	 * @since_tizen @if MOBILE 2.4 @else 3.0 @endif
 	 * @param [in]  key          The string name of the attribute
-	 * @param [out] value   r    The double attribute value to be obtained
+	 * @param [out] value        The double attribute value to be obtained
 	 * @return @c MEDIA_VISION_ERROR_NONE on success,\n
 	 *         otherwise a negative error value
 	 * @retval #MEDIA_VISION_ERROR_KEY_NOT_AVAILABLE If attribute with name @a key
@@ -140,6 +141,19 @@ public:
 	 */
 	int getIntegerAttribute(const std::string &key, int *value) const;
 
+	/**
+	 * @brief Gets integer attribute value by attribute name.
+	 *
+	 * @since_tizen 7.5
+	 * @param [in]  key          The string name of the attribute
+	 * @param [out] value         The vector attribute value of integer to be obtained
+	 * @return @c MEDIA_VISION_ERROR_NONE on success,\n
+	 *         otherwise a negative error value
+	 * @retval #MEDIA_VISION_ERROR_KEY_NOT_AVAILABLE If attribute with name @a key
+	 *         doesn't exist in the engine configuration dictionary
+	 */
+	int getIntegerAttribute(const std::string &key, std::vector<int> *value) const;
+
 	/**
 	 * @brief Gets boolean attribute value by attribute name.
 	 *
@@ -184,6 +198,7 @@ public:
 	const std::map<std::string, bool> &getDefaultBoolDict();
 	const std::map<std::string, std::string> &getDefaultStrDict();
 	const std::map<std::string, std::vector<std::string> > &getDefaultVecStrDict();
+	const std::map<std::string, std::vector<int> > &getDefaultVecIntDict();
 
 private:
 	std::map<std::string, double> __dblDict;
@@ -191,6 +206,7 @@ private:
 	std::map<std::string, bool> __boolDict;
 	std::map<std::string, std::string> __strDict;
 	std::map<std::string, std::vector<std::string> > __vecStrDict;
+	std::map<std::string, std::vector<int> > __vecIntDict;
 
 	int loadDictionaries(std::string &config_file_path);
 };
diff --git a/mv_common/src/EngineConfig.cpp b/mv_common/src/EngineConfig.cpp
index 212be17d..19a42f1c 100644
--- a/mv_common/src/EngineConfig.cpp
+++ b/mv_common/src/EngineConfig.cpp
@@ -149,6 +149,24 @@ int EngineConfig::getIntegerAttribute(const std::string &key, int *value) const
 	return MEDIA_VISION_ERROR_NONE;
 }
 
+int EngineConfig::getIntegerAttribute(const std::string &key, std::vector<int> *value) const
+{
+	DictVecIntConstIter dictIter = __vecIntDict.find(key);
+	if (dictIter == __vecIntDict.end()) {
+		LOGE("Attempt to access to the unsupported vector attribute [%s] of integer "
+			 "of the engine config %p",
+			 key.c_str(), this);
+		return MEDIA_VISION_ERROR_KEY_NOT_AVAILABLE;
+	}
+
+	LOGD("Get vector attribute of integer from the engine config %p. [%s] = [%d,...]", this, dictIter->first.c_str(),
+		 dictIter->second[0]);
+
+	*value = dictIter->second;
+
+	return MEDIA_VISION_ERROR_NONE;
+}
+
 int EngineConfig::getBooleanAttribute(const std::string &key, bool *value) const
 {
 	DictBoolConstIter dictIter = __boolDict.find(key);
@@ -229,6 +247,11 @@ const std::map<std::string, std::vector<std::string> > &EngineConfig::getDefault
 	return __vecStrDict;
 }
 
+const std::map<std::string, std::vector<int> > &EngineConfig::getDefaultVecIntDict()
+{
+	return __vecIntDict;
+}
+
 int EngineConfig::loadDictionaries(std::string &config_file_path)
 {
 	LOGI("Start to cache default attributes from engine configuration file.");
@@ -238,6 +261,7 @@ int EngineConfig::loadDictionaries(std::string &config_file_path)
 	__boolDict.clear();
 	__strDict.clear();
 	__vecStrDict.clear();
+	__vecIntDict.clear();
 
 	const char *conf_file = config_file_path.c_str();
 	GError *error = NULL;
@@ -297,10 +321,10 @@ int EngineConfig::loadDictionaries(std::string &config_file_path)
 			continue;
 		}
 
-		const char *nameStr = (const char *) json_object_get_string_member(attr_obj, "name");
+		const std::string nameStr = (const char *) json_object_get_string_member(attr_obj, "name");
 		const char *typeStr = (const char *) json_object_get_string_member(attr_obj, "type");
 
-		if (NULL == nameStr || NULL == typeStr) {
+		if (nameStr.empty() || NULL == typeStr) {
 			LOGW("Attribute %i wasn't parsed from json file. name and/or "
 				 "type of the attribute are parsed as NULL.",
 				 attrInd);
@@ -308,13 +332,13 @@ int EngineConfig::loadDictionaries(std::string &config_file_path)
 		}
 
 		if (0 == strcmp("double", typeStr)) {
-			__dblDict[std::string(nameStr)] = (double) json_object_get_double_member(attr_obj, "value");
+			__dblDict[nameStr] = (double) json_object_get_double_member(attr_obj, "value");
 		} else if (0 == strcmp("integer", typeStr)) {
-			__intDict[std::string(nameStr)] = (int) json_object_get_int_member(attr_obj, "value");
+			__intDict[nameStr] = (int) json_object_get_int_member(attr_obj, "value");
 		} else if (0 == strcmp("boolean", typeStr)) {
-			__boolDict[std::string(nameStr)] = json_object_get_boolean_member(attr_obj, "value") ? true : false;
+			__boolDict[nameStr] = json_object_get_boolean_member(attr_obj, "value") ? true : false;
 		} else if (0 == strcmp("string", typeStr)) {
-			__strDict[std::string(nameStr)] = (char *) json_object_get_string_member(attr_obj, "value");
+			__strDict[nameStr] = (char *) json_object_get_string_member(attr_obj, "value");
 		} else if (0 == strcmp("array", typeStr)) {
 			const char *subTypeStr = (const char *) json_object_get_string_member(attr_obj, "subtype");
 
@@ -328,13 +352,24 @@ int EngineConfig::loadDictionaries(std::string &config_file_path)
 				for (unsigned int item = 0; item < json_array_get_length(attr_array); ++item) {
 					defaultVecStr.push_back(std::string(json_array_get_string_element(attr_array, item)));
 				}
-				__vecStrDict[std::string(nameStr)] = defaultVecStr;
+				__vecStrDict[nameStr] = defaultVecStr;
 			}
+
+			if (0 == strcmp("integer", subTypeStr)) {
+				JsonArray *attr_array = json_object_get_array_member(attr_obj, "value");
+				std::vector<int> defaultVecInt;
+
+				for (unsigned int item = 0; item < json_array_get_length(attr_array); ++item) {
+					defaultVecInt.push_back(static_cast<int>(json_array_get_int_element(attr_array, item)));
+				}
+				__vecIntDict[nameStr] = defaultVecInt;
+			}
+
 			//TO-DO: add other subtypes
 		} else {
 			LOGW("Attribute %i:%s wasn't parsed from json file. "
 				 "Type isn't supported.",
-				 attrInd, nameStr);
+				 attrInd, nameStr.c_str());
 			continue;
 		}
 	}
diff --git a/mv_machine_learning/face_recognition/include/face_recognition.h b/mv_machine_learning/face_recognition/include/face_recognition.h
index bcc46891..f280e2ff 100644
--- a/mv_machine_learning/face_recognition/include/face_recognition.h
+++ b/mv_machine_learning/face_recognition/include/face_recognition.h
@@ -82,6 +82,7 @@ struct FaceRecognitionConfig {
 	std::string label_file_path;
 	std::string feature_vector_file_path;
 	double decision_threshold;
+	std::vector<size_t> input_tensor_shape;
 };
 
 class FaceRecognition
diff --git a/mv_machine_learning/face_recognition/include/face_recognition_type.h b/mv_machine_learning/face_recognition/include/face_recognition_type.h
index 649bc54e..422040e4 100644
--- a/mv_machine_learning/face_recognition/include/face_recognition_type.h
+++ b/mv_machine_learning/face_recognition/include/face_recognition_type.h
@@ -23,10 +23,6 @@
  * @brief Defines #MV_FACENET_MODEL_FILE_PATH
  *        to set the backbone model file path.
  * @details This model file is used to extract the feature vectors from a given face image data.
- *
- * @since_tizen 7.0
- * @see mv_engine_config_set_string_attribute()
- * @see mv_engine_config_get_string_attribute()
  */
 #define MV_FACENET_MODEL_FILE_PATH "FACENET_MODEL_FILE_PATH"
 
@@ -34,10 +30,6 @@
  * @brief Defines #MV_FACENET_MODEL_META_FILE_PATH
  *        to set the backbone model meta file path.
  * @details This model meta file is used to provide input and output tensor info of a given model file.
- *
- * @since_tizen 7.0
- * @see mv_engine_config_set_string_attribute()
- * @see mv_engine_config_get_string_attribute()
  */
 #define MV_FACENET_MODEL_META_FILE_PATH "FACENET_MODEL_META_FILE_PATH"
 
@@ -47,10 +39,6 @@
  * @brief Defines #MV_FACE_RECOGNITION_DEFAULT_PATH
  *        to set the path where the training relevant files are created.
  * @details This path is used as a default location where the trained model, label and feature vector files are created.
- *
- * @since_tizen 7.0
- * @see mv_engine_config_set_string_attribute()
- * @see mv_engine_config_get_string_attribute()
  */
 #define MV_FACE_RECOGNITION_DEFAULT_PATH "FACE_RECOGNITION_DEFAULT_PATH"
 
@@ -63,14 +51,17 @@
 /**
  * @brief Defines #MV_FACE_RECOGNITION_DECISION_THRESHOLD
  *        to set the decision threshold file+.
- * @details This file is used to determine face recognition result with a given face image data is true or false..
- *
- * @since_tizen 7.0
- * @see mv_engine_config_set_string_attribute()
- * @see mv_engine_config_get_string_attribute()
+ * @details This file is used to determine face recognition result with a given face image data is true or false.
  */
 #define MV_FACE_RECOGNITION_DECISION_THRESHOLD "FACE_RECOGNITION_DECISION_THRESHOLD"
 
+/**
+ * @brief Defines #MV_FACE_RECOGNITION_INPUT_TENSOR_SHAPE
+ *        to set input tensor shape.
+ * @details This file is used to describe input tensor shape.
+ */
+#define MV_FACE_RECOGNITION_INPUT_TENSOR_SHAPE "FACE_RECOGNITION_INPUT_TENSOR_SHAPE"
+
 struct facenet_input_s {
 	std::vector<mv_source_h> inputs;
 };
diff --git a/mv_machine_learning/face_recognition/include/simple_shot.h b/mv_machine_learning/face_recognition/include/simple_shot.h
index 3866cda3..6f9c47d4 100644
--- a/mv_machine_learning/face_recognition/include/simple_shot.h
+++ b/mv_machine_learning/face_recognition/include/simple_shot.h
@@ -30,9 +30,8 @@ private:
 	void RemoveModel(const std::string file_path) override;
 
 public:
-	SimpleShot(const training_backend_type_e backend_type = TRAINING_BACKEND_NNTRAINER,
-			   const training_target_type_e target_type = TRAINING_TARGET_CPU,
-			   const std::string internal_model_file = "model_and_weights.ini");
+	SimpleShot(const training_backend_type_e backend_type, const training_target_type_e target_type,
+			   const std::vector<size_t> input_tensor_shape, const std::string internal_model_file);
 	~SimpleShot();
 
 	// Configure layers for SimpleShot learning.
diff --git a/mv_machine_learning/face_recognition/meta/face_recognition.json b/mv_machine_learning/face_recognition/meta/face_recognition.json
index 1076f4c3..36451aa4 100644
--- a/mv_machine_learning/face_recognition/meta/face_recognition.json
+++ b/mv_machine_learning/face_recognition/meta/face_recognition.json
@@ -16,6 +16,12 @@
             "type"  : "string",
             "value" : "/home/owner/media/res/face_recognition/training/"
         },
+        {
+            "name"  : "FACE_RECOGNITION_INPUT_TENSOR_SHAPE",
+            "type" : "array",
+            "subtype" : "integer",
+            "value" : [ 512, 1 ]
+        },
         {
             "name"  : "FACENET_MODEL_BACKEND_TYPE",
             "type"  : "integer",
diff --git a/mv_machine_learning/face_recognition/src/face_recognition.cpp b/mv_machine_learning/face_recognition/src/face_recognition.cpp
index 6ffc4d70..c7a9c3e1 100644
--- a/mv_machine_learning/face_recognition/src/face_recognition.cpp
+++ b/mv_machine_learning/face_recognition/src/face_recognition.cpp
@@ -74,7 +74,7 @@ void FaceRecognition::CheckFeatureVectorFile(string fv_file_name, string new_fv_
 	}
 
 	// Make sure to remove a temp file in case that current process is terminated just after generating new feature vector file
-	// which is not correct file but existing one isn't removed. In this cae, existing file is used again.
+	// which is not correct file but existing one isn't removed. In this case, existing file is used again.
 	if (FaceRecogUtil::IsFileExist(new_fv_file_name)) {
 		int ret = ::remove(new_fv_file_name.c_str());
 		if (ret)
@@ -160,7 +160,7 @@ int FaceRecognition::GetVecFromMvSource(mv_source_h img_src, std::vector<float>
 int FaceRecognition::Initialize()
 {
 	_training_model = make_unique<SimpleShot>(_config.training_engine_backend_type, _config.training_target_device_type,
-											  _config.internal_model_file_path);
+											  _config.input_tensor_shape, _config.internal_model_file_path);
 
 	_internal = make_unique<Inference>();
 
@@ -389,7 +389,7 @@ int FaceRecognition::DeleteLabel(string label_name)
 		ImportLabel();
 
 		if (_label_manager->IsExist(label_name) == false) {
-			LOGE("%s doesn't exsit in label file.", label_name.c_str());
+			LOGE("%s doesn't exist in label file.", label_name.c_str());
 			return MEDIA_VISION_ERROR_INVALID_OPERATION;
 		}
 
diff --git a/mv_machine_learning/face_recognition/src/face_recognition_adapter.cpp b/mv_machine_learning/face_recognition/src/face_recognition_adapter.cpp
index 1fe4ade2..65a8c533 100644
--- a/mv_machine_learning/face_recognition/src/face_recognition_adapter.cpp
+++ b/mv_machine_learning/face_recognition/src/face_recognition_adapter.cpp
@@ -57,6 +57,11 @@ template<typename T, typename V> void FaceRecognitionAdapter<T, V>::configure()
 	if (ret != MEDIA_VISION_ERROR_NONE)
 		throw InvalidOperation("Fail to get default decision threshold value.");
 
+	std::vector<int> vecIntValues;
+	ret = _config->getIntegerAttribute(string(MV_FACE_RECOGNITION_INPUT_TENSOR_SHAPE), &vecIntValues);
+	if (ret != MEDIA_VISION_ERROR_NONE)
+		throw InvalidOperation("Fail to get input tensor shape.");
+
 	FaceRecognitionConfig config = { TRAINING_TARGET_CPU, // device type for training.
 									 TRAINING_BACKEND_NNTRAINER, // backend type for training.
 									 MV_INFERENCE_TARGET_DEVICE_CPU, // device type for internal model interface.
@@ -66,6 +71,9 @@ template<typename T, typename V> void FaceRecognitionAdapter<T, V>::configure()
 									 string(defaultPath) + "feature_vector_file.dat",
 									 decisionThreshold };
 
+	for (auto value : vecIntValues)
+		config.input_tensor_shape.push_back(static_cast<size_t>(value));
+
 	_face_recognition->SetConfig(config);
 }
 
diff --git a/mv_machine_learning/face_recognition/src/facenet.cpp b/mv_machine_learning/face_recognition/src/facenet.cpp
index f8784756..9834e353 100644
--- a/mv_machine_learning/face_recognition/src/facenet.cpp
+++ b/mv_machine_learning/face_recognition/src/facenet.cpp
@@ -100,6 +100,7 @@ void Facenet::prepare()
 	if (ret != MEDIA_VISION_ERROR_NONE)
 		throw InvalidOperation("Fail to load model files.");
 }
+
 void Facenet::preprocess(mv_source_h &mv_src)
 {
 	LOGI("ENTER");
diff --git a/mv_machine_learning/face_recognition/src/simple_shot.cpp b/mv_machine_learning/face_recognition/src/simple_shot.cpp
index 2f49344a..ef46f9d0 100644
--- a/mv_machine_learning/face_recognition/src/simple_shot.cpp
+++ b/mv_machine_learning/face_recognition/src/simple_shot.cpp
@@ -34,13 +34,20 @@ using namespace TrainingEngineInterface::Common;
 using namespace mediavision::machine_learning::exception;
 
 SimpleShot::SimpleShot(const training_backend_type_e backend_type, const training_target_type_e target_type,
-					   const string internal_model_file)
-		: TrainingModel(backend_type, target_type, internal_model_file)
+					   vector<size_t> input_tensor_shape, const string internal_model_file)
+		: TrainingModel(backend_type, target_type, input_tensor_shape, internal_model_file)
 {
-	const inference_engine_tensor_info input_tensor_info = {
-		{ 512, 1, 1, 1 }, INFERENCE_TENSOR_SHAPE_NCHW, INFERENCE_TENSOR_DATA_TYPE_FLOAT32, (size_t)(512 * 1 * 1 * 1)
+	// Make sure to initialize input_tesnor_info.shape with { 1, 1, 1, 1}
+	// because 4 channels are used as N C H W.
+	inference_engine_tensor_info input_tensor_info = {
+		{ 1, 1, 1, 1 }, INFERENCE_TENSOR_SHAPE_NCHW, INFERENCE_TENSOR_DATA_TYPE_FLOAT32, 1
 	};
 
+	for (auto idx = 0; idx < input_tensor_shape.size(); ++idx) {
+		input_tensor_info.size *= input_tensor_shape[idx];
+		input_tensor_info.shape[idx] = input_tensor_shape[idx];
+	}
+
 	_engine_info.input_layer_names.push_back("preprocess_l2norm0");
 	_engine_info.input_tensor_info.push_back(input_tensor_info);
 
@@ -80,13 +87,24 @@ void SimpleShot::ConfigureModel(int num_of_class)
 	if (!knn)
 		throw InvalidOperation("Fail to create knn layer.");
 
+	inference_engine_tensor_info input_tensor_info = _engine_info.input_tensor_info[0];
+	string input_shape_str = "input_shape=";
+
+	// NNTrainer needs revered tensor order so make sure to reverse the tensor order.
+	reverse(input_tensor_info.shape.begin(), input_tensor_info.shape.end());
+
+	for (auto shape_idx = 0; shape_idx < input_tensor_info.shape.size(); ++shape_idx) {
+		input_shape_str += to_string(input_tensor_info.shape[shape_idx]);
+		if (shape_idx < input_tensor_info.shape.size() - 1)
+			input_shape_str += ":";
+	}
+
 	// Ps. In case of the first layer, input_shape property is mandatorily required.
-	// 1:192 is a shape of backbone model output tensor.
-	training_engine_layer_property l2norm_property = { .options = { "input_shape=1:512", "trainable=false" } };
+	training_engine_layer_property l2norm_property = { .options = { input_shape_str.c_str(), "trainable=false" } };
 
 	int ret = _training->SetLayerProperty(l2norm.get(), l2norm_property);
 	if (ret != TRAINING_ENGINE_ERROR_NONE)
-		throw InvalidOperation("Fail to set layer propery.");
+		throw InvalidOperation("Fail to set layer property.");
 
 	const string num_class_prop = "num_class=" + to_string(num_of_class);
 	training_engine_layer_property knn_property = { .options = { num_class_prop, "trainable=false" } };
diff --git a/mv_machine_learning/training/include/training_model.h b/mv_machine_learning/training/include/training_model.h
index ba99ae1c..ee49b9ee 100644
--- a/mv_machine_learning/training/include/training_model.h
+++ b/mv_machine_learning/training/include/training_model.h
@@ -50,9 +50,8 @@ protected:
 	std::string _internal_model_file;
 
 public:
-	TrainingModel(const training_backend_type_e backend_type = TRAINING_BACKEND_NNTRAINER,
-				  const training_target_type_e target_type = TRAINING_TARGET_CPU,
-				  const std::string internal_model_file = "model_and_weights.ini");
+	TrainingModel(const training_backend_type_e backend_type, const training_target_type_e target_type,
+				  const std::vector<size_t> input_tensor_shape, const std::string internal_model_file);
 	virtual ~TrainingModel();
 
 	void ApplyDataSet(std::unique_ptr<DataSetManager> &data_set);
diff --git a/mv_machine_learning/training/src/training_model.cpp b/mv_machine_learning/training/src/training_model.cpp
index 1432576e..9f20fabd 100644
--- a/mv_machine_learning/training/src/training_model.cpp
+++ b/mv_machine_learning/training/src/training_model.cpp
@@ -34,7 +34,7 @@ using namespace TrainingEngineInterface::Common;
 using namespace mediavision::machine_learning::exception;
 
 TrainingModel::TrainingModel(const training_backend_type_e backend_type, const training_target_type_e target_type,
-							 const string internal_model_file)
+							 vector<size_t> input_tensor_shape, const string internal_model_file)
 {
 	_internal_model_file = internal_model_file;
 	_training = make_unique<TrainingEngineInterface::Common::TrainingEngineCommon>();