From 10965865f20620704308c026f3139abbb8f96742 Mon Sep 17 00:00:00 2001
From: Inki Dae <inki.dae@samsung.com>
Date: Thu, 30 Sep 2021 19:45:34 +0900
Subject: [PATCH 01/16] test/machine_learning/inference: add test cases for
 legacy path

Added test cases for legacy path of inference engine, which uses
user-given model information instead of ones from json file.

As for this, this patch has a little bit code refactoring which
uses parameterized test instead of fixture one of google test
to decide API path in runtime - json or legacy.

This patch enlarges exsiting test coverage from 119 to 132 test cases.

[==========] 132 tests from 6 test suites ran. (49021 ms total)
[  PASSED  ] 132 tests.

Change-Id: I9829725aad8037cbe5a82d50e7790a3e7a6bfe6b
Signed-off-by: Inki Dae <inki.dae@samsung.com>
---
 .../inference/test_face_detection.cpp              |  40 +++-
 .../inference/test_face_landmark_detection.cpp     |  11 +-
 .../inference/test_image_classification.cpp        | 204 +++++++++++++++++++--
 .../inference/test_inference_helper.cpp            |  25 ++-
 .../inference/test_inference_helper.hpp            |  18 +-
 .../inference/test_object_detection.cpp            |  41 ++++-
 .../inference/test_pose_landmark_detection.cpp     |  41 ++++-
 7 files changed, 341 insertions(+), 39 deletions(-)

diff --git a/test/testsuites/machine_learning/inference/test_face_detection.cpp b/test/testsuites/machine_learning/inference/test_face_detection.cpp
index ebf37e5..376a717 100644
--- a/test/testsuites/machine_learning/inference/test_face_detection.cpp
+++ b/test/testsuites/machine_learning/inference/test_face_detection.cpp
@@ -27,6 +27,7 @@ public:
 	{
 		ASSERT_EQ(mv_inference_configure(infer, engine_cfg),
 				  MEDIA_VISION_ERROR_NONE);
+
 		ASSERT_EQ(mv_inference_prepare(infer), MEDIA_VISION_ERROR_NONE);
 		ASSERT_EQ(MediaVision::Common::ImageHelper::loadImageToSource(
 						  IMG_FACE, mv_source),
@@ -37,9 +38,42 @@ public:
 	}
 };
 
-TEST_F(TestFaceDetection, CPU_TFLITE_MobilenetV1_SSD)
+TEST_P(TestFaceDetection, CPU_TFLITE_MobilenetV1_SSD)
 {
 	engine_config_hosted_cpu_tflite(engine_cfg,
-									FD_TFLITE_WEIGHT_MOBILENET_V1_SSD_300_PATH);
+									FD_TFLITE_WEIGHT_MOBILENET_V1_SSD_300_PATH, _use_json_parser);
+	if (!_use_json_parser) {
+		const char *inputNodeName = "normalized_input_image_tensor";
+		const char *outputNodeName[] = { "TFLite_Detection_PostProcess",
+											"TFLite_Detection_PostProcess:1",
+											"TFLite_Detection_PostProcess:2",
+											"TFLite_Detection_PostProcess:3" };
+
+		ASSERT_EQ(mv_engine_config_set_double_attribute(engine_cfg, MV_INFERENCE_MODEL_MEAN_VALUE, 127.5),
+					MEDIA_VISION_ERROR_NONE);
+		ASSERT_EQ(mv_engine_config_set_double_attribute(engine_cfg, MV_INFERENCE_MODEL_STD_VALUE, 127.5),
+					MEDIA_VISION_ERROR_NONE);
+		ASSERT_EQ(mv_engine_config_set_double_attribute(engine_cfg, MV_INFERENCE_CONFIDENCE_THRESHOLD, 0.3),
+					MEDIA_VISION_ERROR_NONE);
+
+		ASSERT_EQ(mv_engine_config_set_int_attribute(engine_cfg, MV_INFERENCE_INPUT_TENSOR_WIDTH, 300),
+					MEDIA_VISION_ERROR_NONE);
+		ASSERT_EQ(mv_engine_config_set_int_attribute(engine_cfg, MV_INFERENCE_INPUT_TENSOR_HEIGHT, 300),
+					MEDIA_VISION_ERROR_NONE);
+		ASSERT_EQ(mv_engine_config_set_int_attribute(engine_cfg, MV_INFERENCE_INPUT_TENSOR_CHANNELS, 3),
+					MEDIA_VISION_ERROR_NONE);
+		ASSERT_EQ(mv_engine_config_set_string_attribute(engine_cfg, MV_INFERENCE_INPUT_NODE_NAME,
+														inputNodeName), MEDIA_VISION_ERROR_NONE);
+		ASSERT_EQ(mv_engine_config_set_array_string_attribute(engine_cfg, MV_INFERENCE_OUTPUT_NODE_NAMES,
+																outputNodeName, 4), MEDIA_VISION_ERROR_NONE);
+	}
+
 	inferenceFace();
-}
\ No newline at end of file
+}
+
+INSTANTIATE_TEST_CASE_P(Prefix, TestFaceDetection,
+						::testing::Values(
+							ParamTypeOne(false),
+							ParamTypeOne(true)
+						)
+);
\ No newline at end of file
diff --git a/test/testsuites/machine_learning/inference/test_face_landmark_detection.cpp b/test/testsuites/machine_learning/inference/test_face_landmark_detection.cpp
index 6d4ada9..e186c6f 100644
--- a/test/testsuites/machine_learning/inference/test_face_landmark_detection.cpp
+++ b/test/testsuites/machine_learning/inference/test_face_landmark_detection.cpp
@@ -37,7 +37,7 @@ public:
 	}
 };
 
-TEST_F(TestFaceLandmarkDetection, CPU_OPENCV_CAFFE_CNNCASCADE)
+TEST_P(TestFaceLandmarkDetection, CPU_OPENCV_CAFFE_CNNCASCADE)
 {
 	const char *inputNodeName = "data";
 	const char *outputNodeName[] = { "Sigmoid_fc2" };
@@ -85,4 +85,11 @@ TEST_F(TestFaceLandmarkDetection, CPU_OPENCV_CAFFE_CNNCASCADE)
 					  outputNodeName, 1),
 			  MEDIA_VISION_ERROR_NONE);
 	inferenceFaceLandmark();
-}
\ No newline at end of file
+}
+
+INSTANTIATE_TEST_CASE_P(Prefix, TestFaceLandmarkDetection,
+						::testing::Values(
+							ParamTypeOne(false),
+							ParamTypeOne(true)
+						)
+);
\ No newline at end of file
diff --git a/test/testsuites/machine_learning/inference/test_image_classification.cpp b/test/testsuites/machine_learning/inference/test_image_classification.cpp
index 0aab594..cdda3f6 100644
--- a/test/testsuites/machine_learning/inference/test_image_classification.cpp
+++ b/test/testsuites/machine_learning/inference/test_image_classification.cpp
@@ -63,58 +63,232 @@ public:
 	}
 };
 
-TEST_F(TestImageClassification, CPU_TFLITE_MobilenetV1)
+TEST_P(TestImageClassification, CPU_TFLITE_MobilenetV1)
 {
 	engine_config_hosted_cpu_tflite_user_model(
 			engine_cfg, IC_TFLITE_WEIGHT_MOBILENET_V1_224_PATH,
-			IC_LABEL_MOBILENET_V1_224_PATH);
+			IC_LABEL_MOBILENET_V1_224_PATH, _use_json_parser);
+
+	if (!_use_json_parser) {
+		const char *inputNodeName = "input";
+		const char *outputNodeName[] = { "MobilenetV1/Predictions/Reshape_1" };
+
+		ASSERT_EQ(mv_engine_config_set_double_attribute(engine_cfg, MV_INFERENCE_MODEL_MEAN_VALUE, 127.5),
+					MEDIA_VISION_ERROR_NONE);
+		ASSERT_EQ(mv_engine_config_set_double_attribute(engine_cfg, MV_INFERENCE_MODEL_STD_VALUE, 127.5),
+					MEDIA_VISION_ERROR_NONE);
+		ASSERT_EQ(mv_engine_config_set_double_attribute(engine_cfg, MV_INFERENCE_CONFIDENCE_THRESHOLD, 0.0),
+					MEDIA_VISION_ERROR_NONE);
+		ASSERT_EQ(mv_engine_config_set_int_attribute(engine_cfg, MV_INFERENCE_INPUT_TENSOR_WIDTH, 224),
+					MEDIA_VISION_ERROR_NONE);
+		ASSERT_EQ(mv_engine_config_set_int_attribute(engine_cfg, MV_INFERENCE_INPUT_TENSOR_HEIGHT, 224),
+					MEDIA_VISION_ERROR_NONE);
+		ASSERT_EQ(mv_engine_config_set_int_attribute(engine_cfg, MV_INFERENCE_INPUT_TENSOR_CHANNELS, 3),
+					MEDIA_VISION_ERROR_NONE);
+		ASSERT_EQ(mv_engine_config_set_string_attribute(engine_cfg, MV_INFERENCE_INPUT_NODE_NAME,
+														inputNodeName), MEDIA_VISION_ERROR_NONE);
+		ASSERT_EQ(mv_engine_config_set_array_string_attribute(engine_cfg, MV_INFERENCE_OUTPUT_NODE_NAMES,
+															  outputNodeName, 1), MEDIA_VISION_ERROR_NONE);
+	}
+
 	inferenceBanana();
 }
 
-TEST_F(TestImageClassification, CPU_TFLITE_MobilenetV2)
+TEST_P(TestImageClassification, CPU_TFLITE_MobilenetV2)
 {
 	engine_config_hosted_cpu_tflite_user_model(
 			engine_cfg, IC_TFLITE_WEIGHT_MOBILENET_V2_224_PATH,
-			IC_LABEL_MOBILENET_V1_224_PATH);
+			IC_LABEL_MOBILENET_V1_224_PATH, _use_json_parser);
+
+	if (!_use_json_parser) {
+		const char *inputNodeName = "input";
+		const char *outputNodeName[] = { "MobilenetV2/Predictions/Reshape_1" };
+
+		ASSERT_EQ(mv_engine_config_set_double_attribute(engine_cfg, MV_INFERENCE_MODEL_MEAN_VALUE, 127.5),
+					MEDIA_VISION_ERROR_NONE);
+		ASSERT_EQ(mv_engine_config_set_double_attribute(engine_cfg, MV_INFERENCE_MODEL_STD_VALUE, 127.5),
+					MEDIA_VISION_ERROR_NONE);
+		ASSERT_EQ(mv_engine_config_set_double_attribute(engine_cfg, MV_INFERENCE_CONFIDENCE_THRESHOLD, 0.01),
+					MEDIA_VISION_ERROR_NONE);
+		ASSERT_EQ(mv_engine_config_set_int_attribute(engine_cfg, MV_INFERENCE_INPUT_TENSOR_WIDTH, 224),
+					MEDIA_VISION_ERROR_NONE);
+		ASSERT_EQ(mv_engine_config_set_int_attribute(engine_cfg, MV_INFERENCE_INPUT_TENSOR_HEIGHT, 224),
+					MEDIA_VISION_ERROR_NONE);
+		ASSERT_EQ(mv_engine_config_set_int_attribute(engine_cfg, MV_INFERENCE_INPUT_TENSOR_CHANNELS, 3),
+					MEDIA_VISION_ERROR_NONE);
+		ASSERT_EQ(mv_engine_config_set_string_attribute(engine_cfg, MV_INFERENCE_INPUT_NODE_NAME,
+														inputNodeName), MEDIA_VISION_ERROR_NONE);
+		ASSERT_EQ(mv_engine_config_set_array_string_attribute(engine_cfg, MV_INFERENCE_OUTPUT_NODE_NAMES,
+															  outputNodeName, 1), MEDIA_VISION_ERROR_NONE);
+	}
+
 	inferenceBanana();
 }
 
-TEST_F(TestImageClassification, CPU_TFLITE_Densenet)
+TEST_P(TestImageClassification, CPU_TFLITE_Densenet)
 {
 	engine_config_hosted_cpu_tflite_user_model(
 			engine_cfg, IC_TFLITE_WEIGHT_DENSENET_224_PATH,
-			IC_LABEL_MOBILENET_V1_224_PATH);
+			IC_LABEL_MOBILENET_V1_224_PATH, _use_json_parser);
+
+	if (!_use_json_parser) {
+		const char *inputNodeName = "Placeholder";
+		const char *outputNodeName[] = { "softmax_tensor" };
+
+		ASSERT_EQ(mv_engine_config_set_double_attribute(engine_cfg, MV_INFERENCE_MODEL_MEAN_VALUE, 127.5),
+					MEDIA_VISION_ERROR_NONE);
+		ASSERT_EQ(mv_engine_config_set_double_attribute(engine_cfg, MV_INFERENCE_MODEL_STD_VALUE, 255.0),
+					MEDIA_VISION_ERROR_NONE);
+		ASSERT_EQ(mv_engine_config_set_double_attribute(engine_cfg, MV_INFERENCE_CONFIDENCE_THRESHOLD, 0.0),
+					MEDIA_VISION_ERROR_NONE);
+		ASSERT_EQ(mv_engine_config_set_int_attribute(engine_cfg, MV_INFERENCE_INPUT_TENSOR_WIDTH, 224),
+					MEDIA_VISION_ERROR_NONE);
+		ASSERT_EQ(mv_engine_config_set_int_attribute(engine_cfg, MV_INFERENCE_INPUT_TENSOR_HEIGHT, 224),
+					MEDIA_VISION_ERROR_NONE);
+		ASSERT_EQ(mv_engine_config_set_int_attribute(engine_cfg, MV_INFERENCE_INPUT_TENSOR_CHANNELS, 3),
+					MEDIA_VISION_ERROR_NONE);
+		ASSERT_EQ(mv_engine_config_set_string_attribute(engine_cfg, MV_INFERENCE_INPUT_NODE_NAME,
+														inputNodeName), MEDIA_VISION_ERROR_NONE);
+		ASSERT_EQ(mv_engine_config_set_array_string_attribute(engine_cfg, MV_INFERENCE_OUTPUT_NODE_NAMES,
+															  outputNodeName, 1), MEDIA_VISION_ERROR_NONE);
+	}
+
 	inferenceBanana();
 }
 
-TEST_F(TestImageClassification, CPU_TFLITE_Nasnet)
+TEST_P(TestImageClassification, CPU_TFLITE_Nasnet)
 {
 	engine_config_hosted_cpu_tflite_user_model(engine_cfg,
 											   IC_TFLITE_WEIGHT_NASNET_224_PATH,
-											   IC_LABEL_MOBILENET_V1_224_PATH);
+											   IC_LABEL_MOBILENET_V1_224_PATH,
+											   _use_json_parser);
+
+	if (!_use_json_parser) {
+		const char *inputNodeName = "input";
+		const char *outputNodeName[] = { "final_layer/predictions" };
+
+		ASSERT_EQ(mv_engine_config_set_double_attribute(engine_cfg, MV_INFERENCE_MODEL_MEAN_VALUE, 127.5),
+					MEDIA_VISION_ERROR_NONE);
+		ASSERT_EQ(mv_engine_config_set_double_attribute(engine_cfg, MV_INFERENCE_MODEL_STD_VALUE, 127.5),
+					MEDIA_VISION_ERROR_NONE);
+		ASSERT_EQ(mv_engine_config_set_double_attribute(engine_cfg, MV_INFERENCE_CONFIDENCE_THRESHOLD, 0.0),
+					MEDIA_VISION_ERROR_NONE);
+		ASSERT_EQ(mv_engine_config_set_int_attribute(engine_cfg, MV_INFERENCE_INPUT_TENSOR_WIDTH, 224),
+					MEDIA_VISION_ERROR_NONE);
+		ASSERT_EQ(mv_engine_config_set_int_attribute(engine_cfg, MV_INFERENCE_INPUT_TENSOR_HEIGHT, 224),
+					MEDIA_VISION_ERROR_NONE);
+		ASSERT_EQ(mv_engine_config_set_int_attribute(engine_cfg, MV_INFERENCE_INPUT_TENSOR_CHANNELS, 3),
+					MEDIA_VISION_ERROR_NONE);
+		ASSERT_EQ(mv_engine_config_set_string_attribute(engine_cfg, MV_INFERENCE_INPUT_NODE_NAME,
+														inputNodeName), MEDIA_VISION_ERROR_NONE);
+		ASSERT_EQ(mv_engine_config_set_array_string_attribute(engine_cfg, MV_INFERENCE_OUTPUT_NODE_NAMES,
+															  outputNodeName, 1), MEDIA_VISION_ERROR_NONE);
+	}
+
 	inferenceBanana();
 }
 
-TEST_F(TestImageClassification, CPU_TFLITE_MNasnet)
+TEST_P(TestImageClassification, CPU_TFLITE_MNasnet)
 {
 	engine_config_hosted_cpu_tflite_user_model(
 			engine_cfg, IC_TFLITE_WEIGHT_MNASNET_224_PATH,
-			IC_LABEL_MOBILENET_V1_224_PATH);
+			IC_LABEL_MOBILENET_V1_224_PATH,
+			_use_json_parser);
+
+	if (!_use_json_parser) {
+		const char *inputNodeName = "input";
+		const char *outputNodeName[] = { "output" };
+
+		ASSERT_EQ(mv_engine_config_set_double_attribute(engine_cfg, MV_INFERENCE_MODEL_MEAN_VALUE, 127.5),
+					MEDIA_VISION_ERROR_NONE);
+		ASSERT_EQ(mv_engine_config_set_double_attribute(engine_cfg, MV_INFERENCE_MODEL_STD_VALUE, 57.5),
+					MEDIA_VISION_ERROR_NONE);
+		ASSERT_EQ(mv_engine_config_set_double_attribute(engine_cfg, MV_INFERENCE_CONFIDENCE_THRESHOLD, 0.0),
+					MEDIA_VISION_ERROR_NONE);
+		ASSERT_EQ(mv_engine_config_set_int_attribute(engine_cfg, MV_INFERENCE_INPUT_TENSOR_WIDTH, 224),
+					MEDIA_VISION_ERROR_NONE);
+		ASSERT_EQ(mv_engine_config_set_int_attribute(engine_cfg, MV_INFERENCE_INPUT_TENSOR_HEIGHT, 224),
+					MEDIA_VISION_ERROR_NONE);
+		ASSERT_EQ(mv_engine_config_set_int_attribute(engine_cfg, MV_INFERENCE_INPUT_TENSOR_CHANNELS, 3),
+					MEDIA_VISION_ERROR_NONE);
+		ASSERT_EQ(mv_engine_config_set_string_attribute(engine_cfg, MV_INFERENCE_INPUT_NODE_NAME,
+														inputNodeName), MEDIA_VISION_ERROR_NONE);
+		ASSERT_EQ(mv_engine_config_set_array_string_attribute(engine_cfg, MV_INFERENCE_OUTPUT_NODE_NAMES,
+															  outputNodeName, 1), MEDIA_VISION_ERROR_NONE);
+	}
+
 	inferenceBanana();
 }
 
-TEST_F(TestImageClassification, CPU_TFLITE_Squeezenet)
+TEST_P(TestImageClassification, CPU_TFLITE_Squeezenet)
 {
 	engine_config_hosted_cpu_tflite_user_model(
 			engine_cfg, IC_TFLITE_WEIGHT_SQUEEZENET_224_PATH,
-			IC_LABEL_MOBILENET_V1_224_PATH);
+			IC_LABEL_MOBILENET_V1_224_PATH,
+			_use_json_parser);
+
+	if (!_use_json_parser) {
+		const char *inputNodeName = "Placeholder";
+		const char *outputNodeName[] = { "softmax_tensor" };
+
+		ASSERT_EQ(mv_engine_config_set_double_attribute(engine_cfg, MV_INFERENCE_MODEL_MEAN_VALUE, 127.5),
+					MEDIA_VISION_ERROR_NONE);
+		ASSERT_EQ(mv_engine_config_set_double_attribute(engine_cfg, MV_INFERENCE_MODEL_STD_VALUE, 127.5),
+					MEDIA_VISION_ERROR_NONE);
+		ASSERT_EQ(mv_engine_config_set_double_attribute(engine_cfg, MV_INFERENCE_CONFIDENCE_THRESHOLD, 0.0),
+					MEDIA_VISION_ERROR_NONE);
+		ASSERT_EQ(mv_engine_config_set_int_attribute(engine_cfg, MV_INFERENCE_INPUT_TENSOR_WIDTH, 224),
+					MEDIA_VISION_ERROR_NONE);
+		ASSERT_EQ(mv_engine_config_set_int_attribute(engine_cfg, MV_INFERENCE_INPUT_TENSOR_HEIGHT, 224),
+					MEDIA_VISION_ERROR_NONE);
+		ASSERT_EQ(mv_engine_config_set_int_attribute(engine_cfg, MV_INFERENCE_INPUT_TENSOR_CHANNELS, 3),
+					MEDIA_VISION_ERROR_NONE);
+		ASSERT_EQ(mv_engine_config_set_string_attribute(engine_cfg, MV_INFERENCE_INPUT_NODE_NAME,
+														inputNodeName), MEDIA_VISION_ERROR_NONE);
+		ASSERT_EQ(mv_engine_config_set_array_string_attribute(engine_cfg, MV_INFERENCE_OUTPUT_NODE_NAMES,
+															  outputNodeName, 1), MEDIA_VISION_ERROR_NONE);
+	}
+
 	inferenceBanana();
 }
 
-TEST_F(TestImageClassification, CPU_TFLITE_QUANT_MobilenetV1)
+TEST_P(TestImageClassification, CPU_TFLITE_QUANT_MobilenetV1)
 {
 	engine_config_hosted_cpu_tflite_user_model(
 			engine_cfg, IC_TFLITE_WEIGHT_QUANT_MOBILENET_V1_224_PATH,
-			IC_LABEL_MOBILENET_V1_224_PATH);
+			IC_LABEL_MOBILENET_V1_224_PATH,
+			_use_json_parser);
+
+	if (!_use_json_parser) {
+		const char *inputNodeName = "input";
+		const char *outputNodeName[] = { "MobilenetV1/Predictions/Reshape_1" };
+
+		ASSERT_EQ(mv_engine_config_set_int_attribute(engine_cfg, MV_INFERENCE_INPUT_DATA_TYPE, MV_INFERENCE_DATA_UINT8),
+					MEDIA_VISION_ERROR_NONE);
+		ASSERT_EQ(mv_engine_config_set_double_attribute(engine_cfg, MV_INFERENCE_MODEL_MEAN_VALUE, 0.0),
+					MEDIA_VISION_ERROR_NONE);
+		ASSERT_EQ(mv_engine_config_set_double_attribute(engine_cfg, MV_INFERENCE_MODEL_STD_VALUE, 1.0),
+					MEDIA_VISION_ERROR_NONE);
+		ASSERT_EQ(mv_engine_config_set_double_attribute(engine_cfg, MV_INFERENCE_CONFIDENCE_THRESHOLD, 0.0),
+					MEDIA_VISION_ERROR_NONE);
+		ASSERT_EQ(mv_engine_config_set_int_attribute(engine_cfg, MV_INFERENCE_INPUT_TENSOR_WIDTH, 224),
+					MEDIA_VISION_ERROR_NONE);
+		ASSERT_EQ(mv_engine_config_set_int_attribute(engine_cfg, MV_INFERENCE_INPUT_TENSOR_HEIGHT, 224),
+					MEDIA_VISION_ERROR_NONE);
+		ASSERT_EQ(mv_engine_config_set_int_attribute(engine_cfg, MV_INFERENCE_INPUT_TENSOR_CHANNELS, 3),
+					MEDIA_VISION_ERROR_NONE);
+		ASSERT_EQ(mv_engine_config_set_string_attribute(engine_cfg, MV_INFERENCE_INPUT_NODE_NAME,
+														inputNodeName), MEDIA_VISION_ERROR_NONE);
+		ASSERT_EQ(mv_engine_config_set_array_string_attribute(engine_cfg, MV_INFERENCE_OUTPUT_NODE_NAMES,
+															  outputNodeName, 1), MEDIA_VISION_ERROR_NONE);
+	}
+
 	inferenceBanana();
-}
\ No newline at end of file
+}
+
+INSTANTIATE_TEST_CASE_P(Prefix, TestImageClassification,
+						::testing::Values(
+							ParamTypeOne(false),
+							ParamTypeOne(true)
+						)
+);
\ No newline at end of file
diff --git a/test/testsuites/machine_learning/inference/test_inference_helper.cpp b/test/testsuites/machine_learning/inference/test_inference_helper.cpp
index 1c88689..81a0380 100644
--- a/test/testsuites/machine_learning/inference/test_inference_helper.cpp
+++ b/test/testsuites/machine_learning/inference/test_inference_helper.cpp
@@ -16,19 +16,23 @@ TestInference::~TestInference()
 }
 
 void engine_config_hosted_cpu_tflite(mv_engine_config_h handle,
-									 const char *tf_weight)
+									 const char *tf_weight,
+									 const bool use_json_parser)
 {
 	EXPECT_EQ(mv_engine_config_set_string_attribute(
 					  handle, MV_INFERENCE_MODEL_WEIGHT_FILE_PATH, tf_weight),
 			  MEDIA_VISION_ERROR_NONE);
 
-	std::string meta_file_path = tf_weight;
-	meta_file_path = meta_file_path.substr(0, meta_file_path.find('.'));
-	meta_file_path += std::string(".json");
+	if (use_json_parser) {
+		std::string meta_file_path = tf_weight;
+		meta_file_path = meta_file_path.substr(0, meta_file_path.find('.'));
+		meta_file_path += std::string(".json");
+
+		EXPECT_EQ(mv_engine_config_set_string_attribute(
+						handle, MV_INFERENCE_MODEL_META_FILE_PATH , meta_file_path.c_str()),
+				MEDIA_VISION_ERROR_NONE);
+	}
 
-	EXPECT_EQ(mv_engine_config_set_string_attribute(
-					  handle, MV_INFERENCE_MODEL_META_FILE_PATH , meta_file_path.c_str()),
-			  MEDIA_VISION_ERROR_NONE);
 	EXPECT_EQ(mv_engine_config_set_int_attribute(handle,
 												 MV_INFERENCE_BACKEND_TYPE,
 												 MV_INFERENCE_BACKEND_TFLITE),
@@ -41,10 +45,11 @@ void engine_config_hosted_cpu_tflite(mv_engine_config_h handle,
 
 void engine_config_hosted_cpu_tflite_user_model(mv_engine_config_h handle,
 												const char *tf_weight,
-												const char *user_file)
+												const char *user_file,
+												const bool use_json_parser)
 {
-	engine_config_hosted_cpu_tflite(handle, tf_weight);
+	engine_config_hosted_cpu_tflite(handle, tf_weight, use_json_parser);
 	EXPECT_EQ(mv_engine_config_set_string_attribute(
 					  handle, MV_INFERENCE_MODEL_USER_FILE_PATH, user_file),
 			  MEDIA_VISION_ERROR_NONE);
-}
+}
\ No newline at end of file
diff --git a/test/testsuites/machine_learning/inference/test_inference_helper.hpp b/test/testsuites/machine_learning/inference/test_inference_helper.hpp
index 16bb4c6..a04fb00 100644
--- a/test/testsuites/machine_learning/inference/test_inference_helper.hpp
+++ b/test/testsuites/machine_learning/inference/test_inference_helper.hpp
@@ -3,8 +3,18 @@
 
 #include <mv_inference.h>
 
-class TestInference : public ::testing::Test
+typedef std::tuple<int> ParamTypeOne;
+
+class TestInference : public ::testing::TestWithParam<ParamTypeOne>
 {
+protected:
+	void SetUp() final
+	{
+		std::tie(_use_json_parser) = GetParam();
+	}
+
+	bool _use_json_parser;
+
 public:
 	TestInference();
 	virtual ~TestInference();
@@ -14,10 +24,12 @@ public:
 };
 
 void engine_config_hosted_cpu_tflite(mv_engine_config_h handle,
-									 const char *tf_weight);
+									 const char *tf_weight,
+									 const bool use_json_parser);
 
 void engine_config_hosted_cpu_tflite_user_model(mv_engine_config_h handle,
 												const char *tf_weight,
-												const char *user_file);
+												const char *user_file,
+												const bool use_json_parser);
 
 #endif //__TEST_INFERENCE_HELPER_HPP__
diff --git a/test/testsuites/machine_learning/inference/test_object_detection.cpp b/test/testsuites/machine_learning/inference/test_object_detection.cpp
index 8cea9f7..b849c20 100644
--- a/test/testsuites/machine_learning/inference/test_object_detection.cpp
+++ b/test/testsuites/machine_learning/inference/test_object_detection.cpp
@@ -46,10 +46,45 @@ public:
 	}
 };
 
-TEST_F(TestObjectDetection, CPU_TFLITE_MobilenetV1_SSD)
+TEST_P(TestObjectDetection, CPU_TFLITE_MobilenetV1_SSD)
 {
 	engine_config_hosted_cpu_tflite_user_model(
 			engine_cfg, OD_TFLITE_WEIGHT_MOBILENET_V1_SSD_300_PATH,
-			OD_LABEL_MOBILENET_V1_SSD_300_PATH);
+			OD_LABEL_MOBILENET_V1_SSD_300_PATH,
+			_use_json_parser);
+
+	if (!_use_json_parser) {
+		const char *inputNodeName = "normalized_input_image_tensor";
+		const char *outputNodeName[] = { "TFLite_Detection_PostProcess",
+											"TFLite_Detection_PostProcess:1",
+											"TFLite_Detection_PostProcess:2",
+											"TFLite_Detection_PostProcess:3" };
+
+		ASSERT_EQ(mv_engine_config_set_double_attribute(engine_cfg, MV_INFERENCE_MODEL_MEAN_VALUE, 127.5),
+					MEDIA_VISION_ERROR_NONE);
+		ASSERT_EQ(mv_engine_config_set_double_attribute(engine_cfg, MV_INFERENCE_MODEL_STD_VALUE, 127.5),
+					MEDIA_VISION_ERROR_NONE);
+		ASSERT_EQ(mv_engine_config_set_double_attribute(engine_cfg, MV_INFERENCE_CONFIDENCE_THRESHOLD, 0.3),
+					MEDIA_VISION_ERROR_NONE);
+
+		ASSERT_EQ(mv_engine_config_set_int_attribute(engine_cfg, MV_INFERENCE_INPUT_TENSOR_WIDTH, 300),
+					MEDIA_VISION_ERROR_NONE);
+		ASSERT_EQ(mv_engine_config_set_int_attribute(engine_cfg, MV_INFERENCE_INPUT_TENSOR_HEIGHT, 300),
+					MEDIA_VISION_ERROR_NONE);
+		ASSERT_EQ(mv_engine_config_set_int_attribute(engine_cfg, MV_INFERENCE_INPUT_TENSOR_CHANNELS, 3),
+					MEDIA_VISION_ERROR_NONE);
+		ASSERT_EQ(mv_engine_config_set_string_attribute(engine_cfg, MV_INFERENCE_INPUT_NODE_NAME,
+														inputNodeName), MEDIA_VISION_ERROR_NONE);
+		ASSERT_EQ(mv_engine_config_set_array_string_attribute(engine_cfg, MV_INFERENCE_OUTPUT_NODE_NAMES,
+																outputNodeName, 4), MEDIA_VISION_ERROR_NONE);
+	}
+
 	inferenceDog();
-}
\ No newline at end of file
+}
+
+INSTANTIATE_TEST_CASE_P(Prefix, TestObjectDetection,
+						::testing::Values(
+							ParamTypeOne(false),
+							ParamTypeOne(true)
+						)
+);
\ No newline at end of file
diff --git a/test/testsuites/machine_learning/inference/test_pose_landmark_detection.cpp b/test/testsuites/machine_learning/inference/test_pose_landmark_detection.cpp
index 033488c..58c4b43 100644
--- a/test/testsuites/machine_learning/inference/test_pose_landmark_detection.cpp
+++ b/test/testsuites/machine_learning/inference/test_pose_landmark_detection.cpp
@@ -37,9 +37,44 @@ public:
 	}
 };
 
-TEST_F(TestPoseLandmarkDetection, CPU_TFLITE_MobilenetV1)
+TEST_P(TestPoseLandmarkDetection, CPU_TFLITE_MobilenetV1)
 {
 	engine_config_hosted_cpu_tflite(
-			engine_cfg, PLD_TFLITE_WEIGHT_MOBILENET_V1_POSENET_257_PATH);
+			engine_cfg, PLD_TFLITE_WEIGHT_MOBILENET_V1_POSENET_257_PATH, _use_json_parser);
+
+	if (!_use_json_parser) {
+		const char *inputNodeName = "sub_2";
+		const char *outputNodeName[] = { "MobilenetV1/heatmap_2/BiasAdd",
+											"MobilenetV1/offset_2/BiasAdd",
+											"MobilenetV1/displacement_fwd_2/BiasAdd",
+											"MobilenetV1/displacement_bwd_2/BiasAdd" };
+
+		ASSERT_EQ(mv_engine_config_set_double_attribute(engine_cfg, MV_INFERENCE_MODEL_MEAN_VALUE, 127.5),
+					MEDIA_VISION_ERROR_NONE);
+		ASSERT_EQ(mv_engine_config_set_double_attribute(engine_cfg, MV_INFERENCE_MODEL_STD_VALUE, 127.5),
+					MEDIA_VISION_ERROR_NONE);
+		ASSERT_EQ(mv_engine_config_set_double_attribute(engine_cfg, MV_INFERENCE_CONFIDENCE_THRESHOLD, 0.3),
+					MEDIA_VISION_ERROR_NONE);
+
+		ASSERT_EQ(mv_engine_config_set_int_attribute(engine_cfg, MV_INFERENCE_INPUT_TENSOR_WIDTH, 300),
+					MEDIA_VISION_ERROR_NONE);
+		ASSERT_EQ(mv_engine_config_set_int_attribute(engine_cfg, MV_INFERENCE_INPUT_TENSOR_HEIGHT, 300),
+					MEDIA_VISION_ERROR_NONE);
+		ASSERT_EQ(mv_engine_config_set_int_attribute(engine_cfg, MV_INFERENCE_INPUT_TENSOR_CHANNELS, 3),
+					MEDIA_VISION_ERROR_NONE);
+		ASSERT_EQ(mv_engine_config_set_string_attribute(engine_cfg, MV_INFERENCE_INPUT_NODE_NAME,
+														inputNodeName), MEDIA_VISION_ERROR_NONE);
+		ASSERT_EQ(mv_engine_config_set_array_string_attribute(engine_cfg, MV_INFERENCE_OUTPUT_NODE_NAMES,
+																outputNodeName, 4), MEDIA_VISION_ERROR_NONE);
+	}
+
+
 	inferencePoseLandmark();
-}
\ No newline at end of file
+}
+
+INSTANTIATE_TEST_CASE_P(Prefix, TestPoseLandmarkDetection,
+						::testing::Values(
+							ParamTypeOne(false),
+							ParamTypeOne(true)
+						)
+);
\ No newline at end of file
-- 
2.7.4


From 78b72ee27b5b6ae5355046e82fcc3d604ab0a55f Mon Sep 17 00:00:00 2001
From: Inki Dae <inki.dae@samsung.com>
Date: Thu, 7 Oct 2021 15:30:36 +0900
Subject: [PATCH 02/16] mv_machine_learning: code cleanup to
 GetFacialLandMarkDetectionResults function

Did code cleanup to GetFacialLandMarkDetectionResults function
by doing,
   - code sliding
   - change variable name to meaningful name for readability.

Change-Id: Ib59786c085c8202a1f7d9eb85a01d528220c728f
Signed-off-by: Inki Dae <inki.dae@samsung.com>
---
 .../mv_inference/inference/include/Inference.h     |  3 +-
 .../mv_inference/inference/src/Inference.cpp       | 46 ++++++++++++----------
 .../inference/src/mv_inference_open.cpp            | 25 +++---------
 3 files changed, 31 insertions(+), 43 deletions(-)

diff --git a/mv_machine_learning/mv_inference/inference/include/Inference.h b/mv_machine_learning/mv_inference/inference/include/Inference.h
index cad69e9..a0506f2 100644
--- a/mv_machine_learning/mv_inference/inference/include/Inference.h
+++ b/mv_machine_learning/mv_inference/inference/include/Inference.h
@@ -317,8 +317,7 @@ namespace inference
 		 * @since_tizen 5.5
 		 * @return @c true on success, otherwise a negative error value
 		 */
-		int GetFacialLandMarkDetectionResults(FacialLandMarkDetectionResults *results,
-										int width, int height);
+		int GetFacialLandMarkDetectionResults(FacialLandMarkDetectionResults *results);
 
 		/**
 		 * @brief	Gets the PoseLandmarkDetectionResults
diff --git a/mv_machine_learning/mv_inference/inference/src/Inference.cpp b/mv_machine_learning/mv_inference/inference/src/Inference.cpp
index 756f041..c25a4a4 100755
--- a/mv_machine_learning/mv_inference/inference/src/Inference.cpp
+++ b/mv_machine_learning/mv_inference/inference/src/Inference.cpp
@@ -1536,15 +1536,15 @@ namespace inference
 		return MEDIA_VISION_ERROR_NONE;
 	}
 
-	int Inference::GetFacialLandMarkDetectionResults(
-			FacialLandMarkDetectionResults *detectionResults, int width, int height)
+	int Inference::GetFacialLandMarkDetectionResults(FacialLandMarkDetectionResults *results)
 	{
 		LOGI("ENTER");
-		FacialLandMarkDetectionResults results;
+
 		OutputMetadata& outputMeta = mMetadata.GetOutputMeta();
 		if (outputMeta.IsParsed()) {
 			auto& landmarkInfo = outputMeta.GetLandmark();
 			auto& scoreInfo = outputMeta.GetScore();
+
 			if (!mOutputTensorBuffers.exist(landmarkInfo.GetName()) ||
 				!mOutputTensorBuffers.exist(scoreInfo.GetName())) {
 				LOGE("output buffers named of %s or %s are NULL",
@@ -1555,27 +1555,26 @@ namespace inference
 			int heatMapWidth = 0;
 			int heatMapHeight = 0;
 			int heatMapChannel = 0;
-			if (landmarkInfo.GetDecodingType() != INFERENCE_LANDMARK_DECODING_TYPE_BYPASS) {
-				heatMapWidth = mOutputLayerProperty.layers[landmarkInfo.GetName()].shape[landmarkInfo.GetDecodingInfo().heatMap.wIdx];
-				heatMapHeight = mOutputLayerProperty.layers[landmarkInfo.GetName()].shape[landmarkInfo.GetDecodingInfo().heatMap.hIdx];
-				heatMapChannel = mOutputLayerProperty.layers[landmarkInfo.GetName()].shape[landmarkInfo.GetDecodingInfo().heatMap.cIdx];
-			}
-
-			int number_of_landmarks = 0;
 			std::vector<int> channelIndexes = landmarkInfo.GetDimInfo().GetValidIndexAll();
+			int number_of_landmarks = heatMapChannel;
+
 			if (landmarkInfo.GetDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS) {
 				LOGI("landmark dim size: %zd and idx[0] is %d", channelIndexes.size(), channelIndexes[0]);
 				number_of_landmarks = mOutputLayerProperty.layers[landmarkInfo.GetName()].shape[channelIndexes[0]]
 									/ landmarkInfo.GetOffset();
 			} else {
-				number_of_landmarks = heatMapChannel;
+				heatMapWidth = mOutputLayerProperty.layers[landmarkInfo.GetName()].shape[landmarkInfo.GetDecodingInfo().heatMap.wIdx];
+				heatMapHeight = mOutputLayerProperty.layers[landmarkInfo.GetName()].shape[landmarkInfo.GetDecodingInfo().heatMap.hIdx];
+				heatMapChannel = mOutputLayerProperty.layers[landmarkInfo.GetName()].shape[landmarkInfo.GetDecodingInfo().heatMap.cIdx];
 			}
+
 			LOGI("heatMap: w[%d], h[%d], c[%d]", heatMapWidth, heatMapHeight, heatMapChannel);
 
 			// decoding
 			PoseDecoder poseDecoder(mOutputTensorBuffers, outputMeta,
 									heatMapWidth, heatMapHeight, heatMapChannel,
 									number_of_landmarks);
+
 			// initialize decorder queue with landmarks to be decoded.
 			int ret = poseDecoder.init();
 			if (ret != MEDIA_VISION_ERROR_NONE) {
@@ -1585,21 +1584,24 @@ namespace inference
 
 			float inputW = 1.f;
 			float inputH = 1.f;
+
 			if (landmarkInfo.GetCoordinate() == INFERENCE_LANDMARK_COORDINATE_TYPE_PIXEL) {
 				inputW = static_cast<float>(mMetadata.GetInputMeta().layer.begin()->second.GetWidth());
 				inputH = static_cast<float>(mMetadata.GetInputMeta().layer.begin()->second.GetHeight());
 			}
+
 			float thresRadius = landmarkInfo.GetType() == INFERENCE_LANDMARK_TYPE_2D_SINGLE ? 0.0 :
 														outputMeta.GetLandmark().GetDecodingInfo().heatMap.nmsRadius;
+
 			poseDecoder.decode(inputW, inputH, thresRadius);
 
 			for (int landmarkIndex = 0; landmarkIndex < number_of_landmarks; landmarkIndex++) {
-				results.locations.push_back(
+				results->locations.push_back(
 					cv::Point(poseDecoder.getPointX(0, landmarkIndex) * static_cast<float>(mSourceSize.width),
 							  poseDecoder.getPointY(0, landmarkIndex) * static_cast<float>(mSourceSize.height)));
 			}
-			results.number_of_landmarks = results.locations.size();
-			*detectionResults = results;
+
+			results->number_of_landmarks = results->locations.size();
 		} else {
 			tensor_t outputData;
 
@@ -1611,22 +1613,24 @@ namespace inference
 			}
 
 			int number_of_detections = outputData.dimInfo[0][1] >> 1;
-			float *loc = reinterpret_cast<float *>(outputData.data[0]);
-			results.number_of_landmarks = number_of_detections;
-			results.locations.resize(number_of_detections);
+
+			results->number_of_landmarks = number_of_detections;
+			results->locations.resize(number_of_detections);
 
 			LOGI("imgW:%d, imgH:%d", mSourceSize.width, mSourceSize.height);
-			for (auto& point : results.locations) {
+
+			float *loc = reinterpret_cast<float *>(outputData.data[0]);
+
+			for (auto& point : results->locations) {
 				point.x = static_cast<int>(*loc++ * mSourceSize.width);
 				point.y = static_cast<int>(*loc++ * mSourceSize.height);
 
 				LOGI("x:%d, y:%d", point.x, point.y);
 			}
-
-			*detectionResults = results;
 		}
+
 		LOGI("Inference: FacialLandmarkDetectionResults: %d\n",
-			 results.number_of_landmarks);
+			 results->number_of_landmarks);
 		return MEDIA_VISION_ERROR_NONE;
 	}
 
diff --git a/mv_machine_learning/mv_inference/inference/src/mv_inference_open.cpp b/mv_machine_learning/mv_inference/inference/src/mv_inference_open.cpp
index 41c62df..c945ac4 100644
--- a/mv_machine_learning/mv_inference/inference/src/mv_inference_open.cpp
+++ b/mv_machine_learning/mv_inference/inference/src/mv_inference_open.cpp
@@ -765,20 +765,6 @@ int mv_inference_facial_landmark_detect_open(
 		mv_inference_facial_landmark_detected_cb detected_cb, void *user_data)
 {
 	Inference *pInfer = static_cast<Inference *>(infer);
-	unsigned int width, height;
-
-	int ret = mv_source_get_width(source, &width);
-	if (ret != MEDIA_VISION_ERROR_NONE) {
-		LOGE("Fail to get width");
-		return ret;
-	}
-
-	ret = mv_source_get_height(source, &height);
-	if (ret != MEDIA_VISION_ERROR_NONE) {
-		LOGE("Fail to get height");
-		return ret;
-	}
-
 	std::vector<mv_source_h> sources;
 	std::vector<mv_rectangle_s> rects;
 
@@ -787,7 +773,7 @@ int mv_inference_facial_landmark_detect_open(
 	if (roi != NULL)
 		rects.push_back(*roi);
 
-	ret = pInfer->Run(sources, rects);
+	int ret = pInfer->Run(sources, rects);
 	if (ret != MEDIA_VISION_ERROR_NONE) {
 		LOGE("Fail to run inference");
 		return ret;
@@ -795,8 +781,7 @@ int mv_inference_facial_landmark_detect_open(
 
 	FacialLandMarkDetectionResults facialLandMarkDetectionResults;
 
-	ret = pInfer->GetFacialLandMarkDetectionResults(
-			&facialLandMarkDetectionResults, width, height);
+	ret = pInfer->GetFacialLandMarkDetectionResults(&facialLandMarkDetectionResults);
 	if (ret != MEDIA_VISION_ERROR_NONE) {
 		LOGE("Fail to get inference results");
 		return ret;
@@ -805,9 +790,9 @@ int mv_inference_facial_landmark_detect_open(
 	int numberOfLandmarks = facialLandMarkDetectionResults.number_of_landmarks;
 	std::vector<mv_point_s> locations(numberOfLandmarks);
 
-	for (int n = 0; n < numberOfLandmarks; ++n) {
-		locations[n].x = facialLandMarkDetectionResults.locations[n].x;
-		locations[n].y = facialLandMarkDetectionResults.locations[n].y;
+	for (int landmark_idx = 0; landmark_idx < numberOfLandmarks; ++landmark_idx) {
+		locations[landmark_idx].x = facialLandMarkDetectionResults.locations[landmark_idx].x;
+		locations[landmark_idx].y = facialLandMarkDetectionResults.locations[landmark_idx].y;
 	}
 
 	detected_cb(source, numberOfLandmarks, locations.data(), user_data);
-- 
2.7.4


From c750e58c3fc375e2f9070ed03bc24617bc5d1f3e Mon Sep 17 00:00:00 2001
From: Inki Dae <inki.dae@samsung.com>
Date: Thu, 7 Oct 2021 19:03:23 +0900
Subject: [PATCH 03/16] mv_machine_learning: code refactoring to input and
 output metadata classes

Did code refactoring to InputMetadata and OuputMetadata.

The biggest change of this patch is to delegate all requests
of various metadata relevant classes to InputMetadata and
OutputMetadata classes to mitigate Inference class dependency of
Metadata things.

In addition, it makes several classes declared in BoxInfo class to get out
, and changes class to struct in case that class functionality isn't needed.

And some cleanup and code sliding.

This is just one step for next code refactoring.

Change-Id: I92c4e2b2c77499246c56a237282c05262550301a
Signed-off-by: Inki Dae <inki.dae@samsung.com>
---
 .../mv_inference/inference/include/InputMetadata.h |  10 +-
 .../mv_inference/inference/include/ObjectDecoder.h |   9 +-
 .../inference/include/OutputMetadata.h             | 323 ++++++++++-----------
 .../mv_inference/inference/src/Inference.cpp       | 203 +++++++------
 .../mv_inference/inference/src/ObjectDecoder.cpp   |  70 ++---
 .../mv_inference/inference/src/OutputMetadata.cpp  | 139 +++------
 .../mv_inference/inference/src/PoseDecoder.cpp     |  97 +++----
 packaging/capi-media-vision.spec                   |   2 +-
 8 files changed, 392 insertions(+), 461 deletions(-)

diff --git a/mv_machine_learning/mv_inference/inference/include/InputMetadata.h b/mv_machine_learning/mv_inference/inference/include/InputMetadata.h
index 8b5e2bd..bdd12c2 100644
--- a/mv_machine_learning/mv_inference/inference/include/InputMetadata.h
+++ b/mv_machine_learning/mv_inference/inference/include/InputMetadata.h
@@ -88,10 +88,6 @@ namespace inference
 	class InputMetadata
 	{
 	public:
-		bool parsed;
-		std::map<std::string, LayerInfo> layer;
-		std::map<std::string, Options> option;
-
 		/**
 		 * @brief   Creates an InputMetadata class instance.
 		 *
@@ -113,11 +109,17 @@ namespace inference
 		 * @since_tizen 6.5
 		 */
 		int Parse(JsonObject *root);
+		bool IsParsed(void) { return parsed; }
+		std::map<std::string, LayerInfo>& GetLayer() { return layer; }
+		std::map<std::string, Options>& GetOption() { return option; }
 
 	private:
+		bool parsed;
 		std::map<std::string, inference_tensor_shape_type_e> mSupportedShapeType;
 		std::map<std::string, mv_inference_data_type_e> mSupportedDataType;
 		std::map<std::string, mv_colorspace_e> mSupportedColorSpace;
+		std::map<std::string, LayerInfo> layer;
+		std::map<std::string, Options> option;
 
 		template <typename T>
 		static T GetSupportedType(JsonObject* root, std::string typeName,
diff --git a/mv_machine_learning/mv_inference/inference/include/ObjectDecoder.h b/mv_machine_learning/mv_inference/inference/include/ObjectDecoder.h
index f5324f2..11c5cc2 100755
--- a/mv_machine_learning/mv_inference/inference/include/ObjectDecoder.h
+++ b/mv_machine_learning/mv_inference/inference/include/ObjectDecoder.h
@@ -44,13 +44,8 @@ namespace inference
 		OutputMetadata mMeta;
 		int mBoxOffset;
 		int mNumberOfOjects;
-
-		ScoreInfo& mScoreInfo;
-		BoxInfo& mBoxInfo;
-
 		float mScaleW;
 		float mScaleH;
-
 		Boxes mResultBoxes;
 
 		float decodeScore(int idx);
@@ -62,9 +57,7 @@ namespace inference
 					int boxOffset, float scaleW, float scaleH, int numberOfObjects = 0) :
 					mTensorBuffer(buffer), mMeta(metaData),
 					mBoxOffset(boxOffset), mNumberOfOjects(numberOfObjects),
-					mScoreInfo(mMeta.GetScore()), mBoxInfo(mMeta.GetBox()),
-					mScaleW(scaleW), mScaleH(scaleH),
-					mResultBoxes() {
+					mScaleW(scaleW), mScaleH(scaleH), mResultBoxes() {
 					};
 
 		~ObjectDecoder() = default;
diff --git a/mv_machine_learning/mv_inference/inference/include/OutputMetadata.h b/mv_machine_learning/mv_inference/inference/include/OutputMetadata.h
index ca4df80..d223726 100644
--- a/mv_machine_learning/mv_inference/inference/include/OutputMetadata.h
+++ b/mv_machine_learning/mv_inference/inference/include/OutputMetadata.h
@@ -42,6 +42,7 @@ namespace inference
 	{
 	private:
 		std::vector<int> dims;
+
 	public:
 		std::vector<int> GetValidIndexAll() const;
 		void SetValidIndex(int index);
@@ -52,6 +53,7 @@ namespace inference
 	private:
 		double scale;
 		double zeropoint;
+
 	public:
 		DeQuantization(double s, double z) : scale(s), zeropoint(z) {};
 		~DeQuantization() = default;
@@ -66,10 +68,9 @@ namespace inference
 		std::string name;
 		DimInfo dimInfo;
 		double threshold;
-		inference_score_type_e type;
 		int topNumber;
+		inference_score_type_e type;
 		std::shared_ptr<DeQuantization> deQuantization;
-
 		std::map<std::string, inference_score_type_e> supportedScoreTypes;
 
 	public:
@@ -82,141 +83,130 @@ namespace inference
 		inference_score_type_e GetType() { return type; }
 		int GetTopNumber() { return topNumber; }
 		std::shared_ptr<DeQuantization> GetDeQuant() { return deQuantization; }
-
 		int ParseScore(JsonObject *root);
 	};
 
-	class BoxInfo
-	{
+	struct AnchorParam {
+		int mode; /**< 0: generate anchor, 1:load pre-anchor*/
+		int numLayers;
+		float minScale;
+		float maxScale;
+		int inputSizeHeight;
+		int inputSizeWidth;
+		float anchorOffsetX;
+		float anchorOffsetY;
+		std::vector<int> strides;
+		std::vector<float> aspectRatios;
+		bool isReduceBoxedInLowestLayer;
+		float interpolatedScaleAspectRatio;
+		bool isFixedAnchorSize;
+		bool isExponentialBoxScale;
+		float xScale;
+		float yScale;
+		float wScale;
+		float hScale;
+	};
+
+	struct NMSParam {
+		inference_box_nms_type_e mode; /**< 0: standard */
+		float iouThreshold;
+		std::map<std::string, inference_box_nms_type_e> supportedBoxNmsTypes;
+	};
+
+	struct RotateParam {
+		int startPointIndex;
+		int endPointIndex;
+		cv::Point2f startPoint;
+		cv::Point2f endPoint;
+		float baseAngle;
+	};
+
+	struct RoiOptionParam {
+		int startPointIndex;
+		int endPointIndex;
+		int centerPointIndex;
+		cv::Point2f centerPoint;
+		float shiftX;
+		float shiftY;
+		float scaleX;
+		float scaleY;
+		int mode;
+	};
+
+	class DecodeInfo {
+	private:
+		AnchorParam anchorParam;
+		std::vector<cv::Rect2f> anchorBoxes;
+		NMSParam nmsParam;
+		RotateParam rotParam;
+		RoiOptionParam roiOptParam;
+
 	public:
-		class DecodeInfo {
-		public:
-			class AnchorParam {
-			public:
-				int mode; /**< 0: generate anchor, 1:load pre-anchor*/
-				int numLayers;
-				float minScale;
-				float maxScale;
-				int inputSizeHeight;
-				int inputSizeWidth;
-				float anchorOffsetX;
-				float anchorOffsetY;
-				std::vector<int> strides;
-				std::vector<float> aspectRatios;
-				bool isReduceBoxedInLowestLayer;
-				float interpolatedScaleAspectRatio;
-				bool isFixedAnchorSize;
-				bool isExponentialBoxScale;
-				float xScale;
-				float yScale;
-				float wScale;
-				float hScale;
-
-				AnchorParam() = default;
-				~AnchorParam() =  default;
-			};
-
-			class NMSParam {
-			public:
-				inference_box_nms_type_e mode; /**< 0: standard */
-				float iouThreshold;
-
-				std::map<std::string, inference_box_nms_type_e> supportedBoxNmsTypes;
-
-				NMSParam() : mode(INFERENCE_BOX_NMS_TYPE_NONE), iouThreshold(0.2f) {
-					supportedBoxNmsTypes.insert({"STANDARD", INFERENCE_BOX_NMS_TYPE_STANDARD});
-				};
-
-				~NMSParam() = default;
-			};
-
-			class RotateParam {
-			public:
-				int startPointIndex;
-				int endPointIndex;
-				cv::Point2f startPoint;
-				cv::Point2f endPoint;
-				float baseAngle;
-
-				RotateParam() : startPointIndex(-1),
-								endPointIndex(-1),
-								startPoint(cv::Point2f(0.f,0.f)),
-								endPoint(cv::Point2f(0.f,0.f)),
-								baseAngle(0.f){};
-				~RotateParam() = default;
-			};
-
-			class RoiOptionParam {
-			public:
-				int startPointIndex;
-				int endPointIndex;
-				int centerPointIndex;
-				cv::Point2f centerPoint;
-				float shiftX;
-				float shiftY;
-				float scaleX;
-				float scaleY;
-				int mode;
-
-				RoiOptionParam() : startPointIndex(-1),
-								   endPointIndex(-1),
-								   centerPointIndex(-1),
-								   centerPoint(cv::Point2f(0.f, 0.f)),
-								   shiftX(0.f), shiftY(0.f),
-								   scaleX(1.f), scaleY(1.f),
-								   mode(-1) {};
-				~RoiOptionParam() = default;
-			};
-
-		private:
-			AnchorParam anchorParam;
-			std::vector<cv::Rect2f> anchorBoxes;
-			NMSParam nmsParam;
-			RotateParam rotParam;
-			RoiOptionParam roiOptParam;
-
-		public:
-			DecodeInfo() = default;
-			~DecodeInfo() = default;
-			std::vector<cv::Rect2f>& GetAnchorBoxAll();
-			bool IsAnchorBoxEmpty();
-			void AddAnchorBox(cv::Rect2f& ahcnor);
-			void ClearAnchorBox();
-
-			// Anchor param
-			int ParseAnchorParam(JsonObject *root);
-			int GenerateAnchor();
-			bool IsFixedAnchorSize();
-			bool IsExponentialBoxScale();
-			float GetAnchorXscale();
-			float GetAnchorYscale();
-			float GetAnchorWscale();
-			float GetAnchorHscale();
-			float CalculateScale(float min, float max, int index, int maxStride);
-
-			// Nms param
-			int ParseNms(JsonObject *root);
-			int GetNmsMode();
-			float GetNmsIouThreshold();
-
-			// Rotate param
-			int ParseRotate(JsonObject *root);
-			int GetRotStartPointIndex();
-			int GetRotEndPointIndex();
-			float GetBaseAngle();
-
-			// Roi option param
-			int ParseRoiOption(JsonObject *root);
-			int GetRoiMode();
-			int GetRoiCenterPointIndex();
-			int GetRoiStartPointIndex();
-			int GetRoiEndPointIndex();
-			float GetShiftX();
-			float GetShiftY();
-			float GetScaleX();
-			float GetScaleY();
-		};
+		DecodeInfo() {
+			nmsParam.mode = INFERENCE_BOX_NMS_TYPE_NONE;
+			nmsParam.iouThreshold = 0.2f;
+			nmsParam.supportedBoxNmsTypes.insert({"STANDARD", INFERENCE_BOX_NMS_TYPE_STANDARD});
+
+			rotParam.startPointIndex = -1;
+			rotParam.endPointIndex = -1;
+			rotParam.startPoint = cv::Point2f(0.f,0.f);
+			rotParam.endPoint = cv::Point2f(0.f,0.f);
+			rotParam.baseAngle = 0.f;
+
+			roiOptParam.startPointIndex = -1;
+			roiOptParam.endPointIndex = -1;
+			roiOptParam.centerPointIndex = -1;
+			roiOptParam.centerPoint = cv::Point2f(0.f, 0.f);
+			roiOptParam.shiftX = 0.f;
+			roiOptParam.shiftY = 0.f;
+			roiOptParam.scaleX = 1.f;
+			roiOptParam.scaleY = 1.f;
+			roiOptParam.mode = -1;
+		}
+
+		~DecodeInfo() = default;
+
+		std::vector<cv::Rect2f>& GetAnchorBoxAll();
+		bool IsAnchorBoxEmpty();
+		void AddAnchorBox(cv::Rect2f& ahcnor);
+		void ClearAnchorBox();
+
+		// Anchor param
+		int ParseAnchorParam(JsonObject *root);
+		int GenerateAnchor();
+		bool IsFixedAnchorSize();
+		bool IsExponentialBoxScale();
+		float GetAnchorXscale();
+		float GetAnchorYscale();
+		float GetAnchorWscale();
+		float GetAnchorHscale();
+		float CalculateScale(float min, float max, int index, int maxStride);
+
+		// Nms param
+		int ParseNms(JsonObject *root);
+		int GetNmsMode();
+		float GetNmsIouThreshold();
+
+		// Rotate param
+		int ParseRotate(JsonObject *root);
+		int GetRotStartPointIndex();
+		int GetRotEndPointIndex();
+		float GetBaseAngle();
+
+		// Roi option param
+		int ParseRoiOption(JsonObject *root);
+		int GetRoiMode();
+		int GetRoiCenterPointIndex();
+		int GetRoiStartPointIndex();
+		int GetRoiEndPointIndex();
+		float GetShiftX();
+		float GetShiftY();
+		float GetScaleX();
+		float GetScaleY();
+	};
 
+	class BoxInfo
+	{
 	private:
 		std::string name;
 		DimInfo dimInfo;
@@ -275,25 +265,16 @@ namespace inference
 		int ParseNumber(JsonObject *root);
 	};
 
+	struct HeatMapInfo {
+		int wIdx;
+		int hIdx;
+		int cIdx;
+		float nmsRadius;
+		inference_tensor_shape_type_e shapeType;
+	};
+
 	class Landmark
 	{
-	public:
-		class DecodeInfo {
-		public:
-			class HeatMapInfo {
-			public:
-				int wIdx;
-				int hIdx;
-				int cIdx;
-				inference_tensor_shape_type_e shapeType;
-				float nmsRadius;
-				HeatMapInfo() = default;
-				~HeatMapInfo() = default;
-			};
-			HeatMapInfo heatMap;
-			DecodeInfo() = default;
-			~DecodeInfo() = default;
-		};
 	private:
 		std::string name;
 		DimInfo dimInfo;
@@ -303,7 +284,7 @@ namespace inference
 		inference_landmark_decoding_type_e decodingType; /**< 0: decoding  unnecessary,
 															1: decoding heatmap,
 															2: decoding heatmap with refinement */
-		DecodeInfo decodingInfo;
+		HeatMapInfo heatMapInfo;
 
 		std::map<std::string, inference_landmark_type_e> supportedLandmarkTypes;
 		std::map<std::string, inference_landmark_coorindate_type_e> supportedLandmarkCoordinateTypes;
@@ -318,7 +299,7 @@ namespace inference
 		int GetOffset();
 		inference_landmark_coorindate_type_e GetCoordinate();
 		inference_landmark_decoding_type_e GetDecodingType();
-		DecodeInfo& GetDecodingInfo();
+		HeatMapInfo& GetHeatMapInfo();
 
 		int ParseLandmark(JsonObject *root);
 	};
@@ -366,7 +347,7 @@ namespace inference
 		Edge() = default;
 		~Edge() = default;
 		int ParseEdge(JsonObject *root);
-		std::vector<std::pair<int, int>>& GetEdgesAll();
+		std::vector<std::pair<int, int>>& GetEdgesAll() { return edges; }
 	};
 
 	class OutputMetadata
@@ -416,20 +397,38 @@ namespace inference
 		 */
 		int Parse(JsonObject *root);
 
-		bool IsParsed();
-		ScoreInfo& GetScore();
-		BoxInfo& GetBox();
-		Label& GetLabel();
-		Number& GetNumber();
-		Landmark& GetLandmark();
-		OffsetVec& GetOffset();
-		std::vector<DispVec>& GetDispVecAll();
-		Edge& GetEdge();
+		bool IsParsed() { return parsed; }
+
+		std::string GetScoreName() { return score.GetName(); }
+		DimInfo GetScoreDimInfo() { return score.GetDimInfo(); }
+		inference_score_type_e GetScoreType() { return score.GetType(); }
+		double GetScoreThreshold() { return score.GetThresHold(); }
+		int GetScoreTopNumber() { return score.GetTopNumber(); }
+		std::shared_ptr<DeQuantization> GetScoreDeQuant() { return score.GetDeQuant(); }
+		std::string GetBoxName() { return box.GetName(); }
+		DimInfo GetBoxDimInfo() { return box.GetDimInfo(); }
+		std::vector<int> GetBoxOrder() { return box.GetOrder(); }
+		DecodeInfo& GetBoxDecodeInfo() { return box.GetDecodeInfo(); }
+		inference_box_type_e GetBoxType() { return box.GetType(); }
+		int GetScoreCoordinate() { return box.GetCoordinate(); }
+		std::string GetLabelName() { return label.GetName(); }
+		std::string GetNumberName() { return number.GetName(); }
+		DimInfo GetNumberDimInfo() { return number.GetDimInfo(); }
+		std::string GetLandmarkName() { return landmark.GetName(); }
+		int GetLandmarkOffset() { return landmark.GetOffset(); }
+		inference_landmark_type_e GetLandmarkType() { return landmark.GetType(); }
+		DimInfo GetLandmarkDimInfo() { return landmark.GetDimInfo(); }
+		HeatMapInfo& GetLandmarkHeatMapInfo() { return landmark.GetHeatMapInfo(); }
+		inference_landmark_coorindate_type_e GetLandmarkCoordinate() { return landmark.GetCoordinate(); }
+		inference_landmark_decoding_type_e GetLandmarkDecodingType() { return landmark.GetDecodingType(); }
+		std::string GetOffsetVecName() { return offsetVec.GetName(); }
+		inference_box_decoding_type_e GetBoxDecodingType() { return box.GetDecodingType(); }
+		std::vector<DispVec>& GetDispVecAll() { return dispVecs; }
+		std::vector<std::pair<int, int>>& GetEdges() { return edgeMap.GetEdgesAll(); }
 		template <typename T>
 		static T GetSupportedType(JsonObject* root, std::string typeName,
 								std::map<std::string, T>& supportedTypes);
 	};
-
 } /* Inference */
 } /* MediaVision */
 
diff --git a/mv_machine_learning/mv_inference/inference/src/Inference.cpp b/mv_machine_learning/mv_inference/inference/src/Inference.cpp
index c25a4a4..05c9c07 100755
--- a/mv_machine_learning/mv_inference/inference/src/Inference.cpp
+++ b/mv_machine_learning/mv_inference/inference/src/Inference.cpp
@@ -371,10 +371,9 @@ namespace inference
 		mConfig.mDataType = static_cast<mv_inference_data_type_e>(dataType);
 		mConfig.mInputLayerNames = names;
 
-		const InputMetadata& inputMeta = mMetadata.GetInputMeta();
-		if (inputMeta.parsed) {
+		if (mMetadata.GetInputMeta().IsParsed()) {
 			LOGI("use input meta");
-			auto& layerInfo = inputMeta.layer.begin()->second;
+			auto& layerInfo = mMetadata.GetInputMeta().GetLayer().begin()->second;
 			if (layerInfo.shapeType == INFERENCE_TENSOR_SHAPE_NCHW) { // NCHW
 				mConfig.mTensorInfo.ch = layerInfo.dims[1];
 				mConfig.mTensorInfo.dim = layerInfo.dims[0];
@@ -389,8 +388,8 @@ namespace inference
 				LOGE("Invalid shape type[%d]", layerInfo.shapeType);
 			}
 
-			if (!inputMeta.option.empty()) {
-				auto& option = inputMeta.option.begin()->second;
+			if (!mMetadata.GetInputMeta().GetOption().empty()) {
+				auto& option = mMetadata.GetInputMeta().GetOption().begin()->second;
 				if (option.normalization.use) {
 					mConfig.mMeanValue = option.normalization.mean[0];
 					mConfig.mStdValue = option.normalization.std[0];
@@ -399,7 +398,7 @@ namespace inference
 
 			mConfig.mDataType = layerInfo.dataType;
 			mConfig.mInputLayerNames.clear();
-			for (auto& layer : inputMeta.layer) {
+			for (auto& layer : mMetadata.GetInputMeta().GetLayer()) {
 				mConfig.mInputLayerNames.push_back(layer.first);
 			}
 		}
@@ -444,25 +443,25 @@ namespace inference
 		mConfig.mOutputLayerNames = names;
 
 		OutputMetadata& outputMeta = mMetadata.GetOutputMeta();
-		if (outputMeta.IsParsed()) {
+		if (mMetadata.GetOutputMeta().IsParsed()) {
 			mConfig.mOutputLayerNames.clear();
-			if (!outputMeta.GetScore().GetName().empty())
-				mConfig.mOutputLayerNames.push_back(outputMeta.GetScore().GetName());
+			if (!outputMeta.GetScoreName().empty())
+				mConfig.mOutputLayerNames.push_back(outputMeta.GetScoreName());
 
-			if (!outputMeta.GetBox().GetName().empty())
-				mConfig.mOutputLayerNames.push_back(outputMeta.GetBox().GetName());
+			if (!outputMeta.GetBoxName().empty())
+				mConfig.mOutputLayerNames.push_back(outputMeta.GetBoxName());
 
-			if (!outputMeta.GetLabel().GetName().empty())
-				mConfig.mOutputLayerNames.push_back(outputMeta.GetLabel().GetName());
+			if (!outputMeta.GetLabelName().empty())
+				mConfig.mOutputLayerNames.push_back(outputMeta.GetLabelName());
 
-			if (!outputMeta.GetNumber().GetName().empty())
-				mConfig.mOutputLayerNames.push_back(outputMeta.GetNumber().GetName());
+			if (!outputMeta.GetNumberName().empty())
+				mConfig.mOutputLayerNames.push_back(outputMeta.GetNumberName());
 
-			if (!outputMeta.GetLandmark().GetName().empty())
-				mConfig.mOutputLayerNames.push_back(outputMeta.GetLandmark().GetName());
+			if (!outputMeta.GetLandmarkName().empty())
+				mConfig.mOutputLayerNames.push_back(outputMeta.GetLandmarkName());
 
-			if (!outputMeta.GetOffset().GetName().empty())
-				mConfig.mOutputLayerNames.push_back(outputMeta.GetOffset().GetName());
+			if (!outputMeta.GetOffsetVecName().empty())
+				mConfig.mOutputLayerNames.push_back(outputMeta.GetOffsetVecName());
 
 			for (auto& dispVec : outputMeta.GetDispVecAll()) {
 				mConfig.mOutputLayerNames.push_back(dispVec.GetName());
@@ -1044,12 +1043,11 @@ namespace inference
 			return MEDIA_VISION_ERROR_INVALID_PARAMETER;
 		}
 
-		const InputMetadata& inputMeta = mMetadata.GetInputMeta();
-		if (inputMeta.parsed) {
+		if (mMetadata.GetInputMeta().IsParsed()) {
 			for (auto& buffer : mInputTensorBuffers.getAllTensorBuffer()) {
 				inference_engine_tensor_buffer& tensor_buffer = buffer.second;
-				const LayerInfo& layerInfo = inputMeta.layer.at(buffer.first);
-				const Options& opt = inputMeta.option.empty() ? Options() : inputMeta.option.at(buffer.first);
+				const LayerInfo& layerInfo = mMetadata.GetInputMeta().GetLayer().at(buffer.first);
+				const Options& opt = mMetadata.GetInputMeta().GetOption().empty() ? Options() : mMetadata.GetInputMeta().GetOption().at(buffer.first);
 
 				int data_type = ConvertToCv(tensor_buffer.data_type);
 
@@ -1095,51 +1093,50 @@ namespace inference
 
 	int Inference::GetClassficationResults(ImageClassificationResults &results)
 	{
-		OutputMetadata& outputMeta = mMetadata.GetOutputMeta();
 		// Will contain top N results in ascending order.
 		std::vector<std::pair<float, int>> topScore;
 		auto threadHold = mConfig.mConfidenceThresHold;
 
 		results.number_of_classes = 0;
 
-		if (outputMeta.IsParsed()) {
-			auto& info = outputMeta.GetScore();
-			std::vector<int> indexes = info.GetDimInfo().GetValidIndexAll();
+		if (mMetadata.GetOutputMeta().IsParsed()) {
+			OutputMetadata outputMetadata = mMetadata.GetOutputMeta();
+			std::vector<int> indexes = outputMetadata.GetScoreDimInfo().GetValidIndexAll();
 
 			if (indexes.size() != 1) {
 				LOGE("Invalid dim size. It should be 1");
 				return MEDIA_VISION_ERROR_INVALID_OPERATION;
 			}
 
-			int classes = mOutputLayerProperty.layers[info.GetName()].shape[indexes[0]];
+			int classes = mOutputLayerProperty.layers[outputMetadata.GetScoreName()].shape[indexes[0]];
 
-			if (!mOutputTensorBuffers.exist(info.GetName())) {
+			if (!mOutputTensorBuffers.exist(outputMetadata.GetScoreName())) {
 				LOGE("output buffe is NULL");
 				return MEDIA_VISION_ERROR_INVALID_OPERATION;
 			}
 
 			PostProcess postProc;
 
-			postProc.ScoreClear(info.GetTopNumber());
-			threadHold = info.GetThresHold();
+			postProc.ScoreClear(outputMetadata.GetScoreTopNumber());
+			threadHold = outputMetadata.GetScoreThreshold();
 
 			for (int cId = 0; cId < classes; ++cId) {
 				float value = 0.0f;
 
 				try {
-					value = mOutputTensorBuffers.getValue<float>(info.GetName(), cId);
+					value = mOutputTensorBuffers.getValue<float>(outputMetadata.GetScoreName(), cId);
 				} catch (const std::exception& e) {
 					LOGE(" Fail to get getValue with %s", e.what());
 					return MEDIA_VISION_ERROR_INVALID_OPERATION;
 				}
 
-				if (info.GetDeQuant()) {
+				if (outputMetadata.GetScoreDeQuant()) {
 					value = PostProcess::dequant(value,
-											info.GetDeQuant()->GetScale(),
-											info.GetDeQuant()->GetZeroPoint());
+											outputMetadata.GetScoreDeQuant()->GetScale(),
+											outputMetadata.GetScoreDeQuant()->GetZeroPoint());
 				}
 
-				if (info.GetType() == INFERENCE_SCORE_TYPE_SIGMOID)
+				if (outputMetadata.GetScoreType() == INFERENCE_SCORE_TYPE_SIGMOID)
 					value = PostProcess::sigmoid(value);
 
 				if (value < threadHold)
@@ -1212,46 +1209,45 @@ namespace inference
 	int Inference::GetObjectDetectionResults(
 			ObjectDetectionResults *detectionResults)
 	{
-		OutputMetadata& outputMeta = mMetadata.GetOutputMeta();
-		if (outputMeta.IsParsed()) {
+		if (mMetadata.GetOutputMeta().IsParsed()) {
+			OutputMetadata& outputMeta = mMetadata.GetOutputMeta();
+
 			// decoding type
-			auto& boxInfo = outputMeta.GetBox();
-			auto& scoreInfo = outputMeta.GetScore();
-			if (!mOutputTensorBuffers.exist(boxInfo.GetName()) ||
-				!mOutputTensorBuffers.exist(scoreInfo.GetName()) ){
+			if (!mOutputTensorBuffers.exist(outputMeta.GetBoxName()) ||
+				!mOutputTensorBuffers.exist(outputMeta.GetScoreName()) ){
 				LOGE("output buffers named of %s or %s are NULL",
-					boxInfo.GetName().c_str(), scoreInfo.GetName().c_str());
+					outputMeta.GetBoxName().c_str(), outputMeta.GetScoreName().c_str());
 				return MEDIA_VISION_ERROR_INVALID_OPERATION;
 			}
 
 			int boxOffset = 0;
 			int numberOfObjects = 0;
-			if (boxInfo.GetDecodingType() == INFERENCE_BOX_DECODING_TYPE_BYPASS) {
-				std::vector<int> boxIndexes = boxInfo.GetDimInfo().GetValidIndexAll();
+			if (outputMeta.GetBoxDecodingType() == INFERENCE_BOX_DECODING_TYPE_BYPASS) {
+				std::vector<int> boxIndexes = outputMeta.GetBoxDimInfo().GetValidIndexAll();
 				if (boxIndexes.size() != 1) {
 					LOGE("Invalid dim size. It should be 1");
 					return MEDIA_VISION_ERROR_INVALID_OPERATION;
 				}
-				boxOffset = mOutputLayerProperty.layers[boxInfo.GetName()].shape[boxIndexes[0]];
+				boxOffset = mOutputLayerProperty.layers[outputMeta.GetBoxName()].shape[boxIndexes[0]];
 			} else {
-				std::vector<int> boxIndexes = boxInfo.GetDimInfo().GetValidIndexAll();
+				std::vector<int> boxIndexes = outputMeta.GetBoxDimInfo().GetValidIndexAll();
 				if (boxIndexes.size() != 1) {
 					LOGE("Invalid dim size. It should be 1");
 					return MEDIA_VISION_ERROR_INVALID_OPERATION;
 				}
-				boxOffset = mOutputLayerProperty.layers[boxInfo.GetName()].shape[boxIndexes[0]];
+				boxOffset = mOutputLayerProperty.layers[outputMeta.GetBoxName()].shape[boxIndexes[0]];
 
-				std::vector<int> scoreIndexes = scoreInfo.GetDimInfo().GetValidIndexAll();
+				std::vector<int> scoreIndexes = outputMeta.GetScoreDimInfo().GetValidIndexAll();
 				if (scoreIndexes.size() != 1) {
 					LOGE("Invalid dim size. It should be 1");
 					return MEDIA_VISION_ERROR_INVALID_OPERATION;
 				}
-				numberOfObjects = mOutputLayerProperty.layers[scoreInfo.GetName()].shape[scoreIndexes[0]];
+				numberOfObjects = mOutputLayerProperty.layers[outputMeta.GetScoreName()].shape[scoreIndexes[0]];
 			}
 
 			ObjectDecoder objDecoder(mOutputTensorBuffers, outputMeta, boxOffset,
-						static_cast<float>(mMetadata.GetInputMeta().layer.begin()->second.GetWidth()),
-						static_cast<float>(mMetadata.GetInputMeta().layer.begin()->second.GetHeight()),
+						static_cast<float>(mMetadata.GetInputMeta().GetLayer().begin()->second.GetWidth()),
+						static_cast<float>(mMetadata.GetInputMeta().GetLayer().begin()->second.GetHeight()),
 						numberOfObjects);
 
 			objDecoder.init();
@@ -1378,46 +1374,45 @@ namespace inference
 	int
 	Inference::GetFaceDetectionResults(FaceDetectionResults *detectionResults)
 	{
-		OutputMetadata& outputMeta = mMetadata.GetOutputMeta();
-		if (outputMeta.IsParsed()) {
+		if (mMetadata.GetOutputMeta().IsParsed()) {
+			OutputMetadata& outputMeta = mMetadata.GetOutputMeta();
+
 			// decoding type
-			auto& boxInfo = outputMeta.GetBox();
-			auto& scoreInfo = outputMeta.GetScore();
-			if (!mOutputTensorBuffers.exist(boxInfo.GetName()) ||
-				!mOutputTensorBuffers.exist(scoreInfo.GetName())){
+			if (!mOutputTensorBuffers.exist(outputMeta.GetBoxName()) ||
+				!mOutputTensorBuffers.exist(outputMeta.GetScoreName())){
 				LOGE("output buffers named of %s or %s are NULL",
-					boxInfo.GetName().c_str(), scoreInfo.GetName().c_str());
+					outputMeta.GetBoxName().c_str(), outputMeta.GetScoreName().c_str());
 				return MEDIA_VISION_ERROR_INVALID_OPERATION;
 			}
 
 			int boxOffset = 0;
 			int numberOfFaces = 0;
-			if (boxInfo.GetDecodingType() == INFERENCE_BOX_DECODING_TYPE_BYPASS) {
-				std::vector<int> boxIndexes = boxInfo.GetDimInfo().GetValidIndexAll();
+			if (outputMeta.GetBoxDecodingType() == INFERENCE_BOX_DECODING_TYPE_BYPASS) {
+				std::vector<int> boxIndexes = outputMeta.GetBoxDimInfo().GetValidIndexAll();
 				if (boxIndexes.size() != 1) {
 					LOGE("Invalid dim size. It should be 1");
 					return MEDIA_VISION_ERROR_INVALID_OPERATION;
 				}
-				boxOffset = mOutputLayerProperty.layers[boxInfo.GetName()].shape[boxIndexes[0]];
+				boxOffset = mOutputLayerProperty.layers[outputMeta.GetBoxName()].shape[boxIndexes[0]];
 			} else {
-				std::vector<int> boxIndexes = boxInfo.GetDimInfo().GetValidIndexAll();
+				std::vector<int> boxIndexes = outputMeta.GetBoxDimInfo().GetValidIndexAll();
 				if (boxIndexes.size() != 1) {
 					LOGE("Invalid dim size. It should be 1");
 					return MEDIA_VISION_ERROR_INVALID_OPERATION;
 				}
-				boxOffset = mOutputLayerProperty.layers[boxInfo.GetName()].shape[boxIndexes[0]];
+				boxOffset = mOutputLayerProperty.layers[outputMeta.GetBoxName()].shape[boxIndexes[0]];
 
-				std::vector<int> scoreIndexes = scoreInfo.GetDimInfo().GetValidIndexAll();
+				std::vector<int> scoreIndexes = outputMeta.GetScoreDimInfo().GetValidIndexAll();
 				if (scoreIndexes.size() != 1) {
 					LOGE("Invaid dim size. It should be 1");
 					return MEDIA_VISION_ERROR_INVALID_OPERATION;
 				}
-				numberOfFaces = mOutputLayerProperty.layers[scoreInfo.GetName()].shape[scoreIndexes[0]];
+				numberOfFaces = mOutputLayerProperty.layers[outputMeta.GetScoreName()].shape[scoreIndexes[0]];
 			}
 
 			ObjectDecoder objDecoder(mOutputTensorBuffers, outputMeta, boxOffset,
-						static_cast<float>(mMetadata.GetInputMeta().layer.begin()->second.GetWidth()),
-						static_cast<float>(mMetadata.GetInputMeta().layer.begin()->second.GetHeight()),
+						static_cast<float>(mMetadata.GetInputMeta().GetLayer().begin()->second.GetWidth()),
+						static_cast<float>(mMetadata.GetInputMeta().GetLayer().begin()->second.GetHeight()),
 						numberOfFaces);
 
 			objDecoder.init();
@@ -1540,32 +1535,30 @@ namespace inference
 	{
 		LOGI("ENTER");
 
-		OutputMetadata& outputMeta = mMetadata.GetOutputMeta();
-		if (outputMeta.IsParsed()) {
-			auto& landmarkInfo = outputMeta.GetLandmark();
-			auto& scoreInfo = outputMeta.GetScore();
+		if (mMetadata.GetOutputMeta().IsParsed()) {
+			OutputMetadata& outputMeta = mMetadata.GetOutputMeta();
 
-			if (!mOutputTensorBuffers.exist(landmarkInfo.GetName()) ||
-				!mOutputTensorBuffers.exist(scoreInfo.GetName())) {
+			if (!mOutputTensorBuffers.exist(outputMeta.GetLandmarkName()) ||
+				!mOutputTensorBuffers.exist(outputMeta.GetScoreName())) {
 				LOGE("output buffers named of %s or %s are NULL",
-					landmarkInfo.GetName().c_str(), scoreInfo.GetName().c_str());
+					outputMeta.GetLandmarkName().c_str(), outputMeta.GetScoreName().c_str());
 				return MEDIA_VISION_ERROR_INVALID_OPERATION;
 			}
 
 			int heatMapWidth = 0;
 			int heatMapHeight = 0;
 			int heatMapChannel = 0;
-			std::vector<int> channelIndexes = landmarkInfo.GetDimInfo().GetValidIndexAll();
+			std::vector<int> channelIndexes = outputMeta.GetLandmarkDimInfo().GetValidIndexAll();
 			int number_of_landmarks = heatMapChannel;
 
-			if (landmarkInfo.GetDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS) {
+			if (outputMeta.GetLandmarkDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS) {
 				LOGI("landmark dim size: %zd and idx[0] is %d", channelIndexes.size(), channelIndexes[0]);
-				number_of_landmarks = mOutputLayerProperty.layers[landmarkInfo.GetName()].shape[channelIndexes[0]]
-									/ landmarkInfo.GetOffset();
+				number_of_landmarks = mOutputLayerProperty.layers[outputMeta.GetLandmarkName()].shape[channelIndexes[0]]
+									/ outputMeta.GetLandmarkOffset();
 			} else {
-				heatMapWidth = mOutputLayerProperty.layers[landmarkInfo.GetName()].shape[landmarkInfo.GetDecodingInfo().heatMap.wIdx];
-				heatMapHeight = mOutputLayerProperty.layers[landmarkInfo.GetName()].shape[landmarkInfo.GetDecodingInfo().heatMap.hIdx];
-				heatMapChannel = mOutputLayerProperty.layers[landmarkInfo.GetName()].shape[landmarkInfo.GetDecodingInfo().heatMap.cIdx];
+				heatMapWidth = mOutputLayerProperty.layers[outputMeta.GetLandmarkName()].shape[outputMeta.GetLandmarkHeatMapInfo().wIdx];
+				heatMapHeight = mOutputLayerProperty.layers[outputMeta.GetLandmarkName()].shape[outputMeta.GetLandmarkHeatMapInfo().hIdx];
+				heatMapChannel = mOutputLayerProperty.layers[outputMeta.GetLandmarkName()].shape[outputMeta.GetLandmarkHeatMapInfo().cIdx];
 			}
 
 			LOGI("heatMap: w[%d], h[%d], c[%d]", heatMapWidth, heatMapHeight, heatMapChannel);
@@ -1585,13 +1578,13 @@ namespace inference
 			float inputW = 1.f;
 			float inputH = 1.f;
 
-			if (landmarkInfo.GetCoordinate() == INFERENCE_LANDMARK_COORDINATE_TYPE_PIXEL) {
-				inputW = static_cast<float>(mMetadata.GetInputMeta().layer.begin()->second.GetWidth());
-				inputH = static_cast<float>(mMetadata.GetInputMeta().layer.begin()->second.GetHeight());
+			if (outputMeta.GetLandmarkCoordinate() == INFERENCE_LANDMARK_COORDINATE_TYPE_PIXEL) {
+				inputW = static_cast<float>(mMetadata.GetInputMeta().GetLayer().begin()->second.GetWidth());
+				inputH = static_cast<float>(mMetadata.GetInputMeta().GetLayer().begin()->second.GetHeight());
 			}
 
-			float thresRadius = landmarkInfo.GetType() == INFERENCE_LANDMARK_TYPE_2D_SINGLE ? 0.0 :
-														outputMeta.GetLandmark().GetDecodingInfo().heatMap.nmsRadius;
+			float thresRadius = outputMeta.GetLandmarkType() == INFERENCE_LANDMARK_TYPE_2D_SINGLE ? 0.0 :
+														outputMeta.GetLandmarkHeatMapInfo().nmsRadius;
 
 			poseDecoder.decode(inputW, inputH, thresRadius);
 
@@ -1639,17 +1632,15 @@ namespace inference
 	{
 		LOGI("ENTER");
 
-		OutputMetadata& outputMeta = mMetadata.GetOutputMeta();
 		auto poseResult = std::make_unique<mv_inference_pose_s>();
 
-		if (outputMeta.IsParsed()) {
-			auto& landmarkInfo = outputMeta.GetLandmark();
-			auto& scoreInfo = outputMeta.GetScore();
+		if (mMetadata.GetOutputMeta().IsParsed()) {
+			OutputMetadata& outputMeta = mMetadata.GetOutputMeta();
 
-			if (!mOutputTensorBuffers.exist(landmarkInfo.GetName()) ||
-				!mOutputTensorBuffers.exist(scoreInfo.GetName())) {
+			if (!mOutputTensorBuffers.exist(outputMeta.GetLandmarkName()) ||
+				!mOutputTensorBuffers.exist(outputMeta.GetScoreName())) {
 				LOGE("output buffers named of %s or %s are NULL",
-					landmarkInfo.GetName().c_str(), scoreInfo.GetName().c_str());
+					outputMeta.GetLandmarkName().c_str(), outputMeta.GetScoreName().c_str());
 				return MEDIA_VISION_ERROR_INVALID_OPERATION;
 			}
 
@@ -1657,23 +1648,23 @@ namespace inference
 			int heatMapHeight = 0;
 			int heatMapChannel = 0;
 
-			if (landmarkInfo.GetDecodingType() != INFERENCE_LANDMARK_DECODING_TYPE_BYPASS) {
-				heatMapWidth = mOutputLayerProperty.layers[landmarkInfo.GetName()].shape[landmarkInfo.GetDecodingInfo().heatMap.wIdx];
-				heatMapHeight = mOutputLayerProperty.layers[landmarkInfo.GetName()].shape[landmarkInfo.GetDecodingInfo().heatMap.hIdx];
-				heatMapChannel = mOutputLayerProperty.layers[landmarkInfo.GetName()].shape[landmarkInfo.GetDecodingInfo().heatMap.cIdx];
+			if (outputMeta.GetLandmarkDecodingType() != INFERENCE_LANDMARK_DECODING_TYPE_BYPASS) {
+				heatMapWidth = mOutputLayerProperty.layers[outputMeta.GetLandmarkName()].shape[outputMeta.GetLandmarkHeatMapInfo().wIdx];
+				heatMapHeight = mOutputLayerProperty.layers[outputMeta.GetLandmarkName()].shape[outputMeta.GetLandmarkHeatMapInfo().hIdx];
+				heatMapChannel = mOutputLayerProperty.layers[outputMeta.GetLandmarkName()].shape[outputMeta.GetLandmarkHeatMapInfo().cIdx];
 			}
 
 			LOGI("heatMap: w[%d], h[%d], c[%d]", heatMapWidth, heatMapHeight, heatMapChannel);
 
-			std::vector<int> channelIndexes = landmarkInfo.GetDimInfo().GetValidIndexAll();
+			std::vector<int> channelIndexes = outputMeta.GetLandmarkDimInfo().GetValidIndexAll();
 
 			// If INFERENCE_LANDMARK_DECODING_TYPE_BYPASS,
 			// the landmarkChannel is guessed from the shape of the landmark output tensor.
 			// Otherwise, it is guessed from the heatMapChannel. (heatMapChannel is used in default).
 			int landmarkChannel = heatMapChannel;
 
-			if (landmarkInfo.GetDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS)
-				landmarkChannel = mOutputLayerProperty.layers[landmarkInfo.GetName()].shape[channelIndexes[0]] / landmarkInfo.GetOffset();
+			if (outputMeta.GetLandmarkDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS)
+				landmarkChannel = mOutputLayerProperty.layers[outputMeta.GetLandmarkName()].shape[channelIndexes[0]] / outputMeta.GetLandmarkOffset();
 
 			poseResult->number_of_landmarks_per_pose = mUserListName.empty() ? landmarkChannel :
 														static_cast<int>(mUserListName.size());
@@ -1700,11 +1691,11 @@ namespace inference
 
 			float inputW = 1.f;
 			float inputH = 1.f;
-			float thresRadius = landmarkInfo.GetType() == INFERENCE_LANDMARK_TYPE_2D_SINGLE ? 0.0 :
-														outputMeta.GetLandmark().GetDecodingInfo().heatMap.nmsRadius;
-			if (landmarkInfo.GetCoordinate() == INFERENCE_LANDMARK_COORDINATE_TYPE_PIXEL) {
-				inputW = static_cast<float>(mMetadata.GetInputMeta().layer.begin()->second.GetWidth());
-				inputH = static_cast<float>(mMetadata.GetInputMeta().layer.begin()->second.GetHeight());
+			float thresRadius = outputMeta.GetLandmarkType() == INFERENCE_LANDMARK_TYPE_2D_SINGLE ? 0.0 :
+														outputMeta.GetLandmarkHeatMapInfo().nmsRadius;
+			if (outputMeta.GetLandmarkCoordinate() == INFERENCE_LANDMARK_COORDINATE_TYPE_PIXEL) {
+				inputW = static_cast<float>(mMetadata.GetInputMeta().GetLayer().begin()->second.GetWidth());
+				inputH = static_cast<float>(mMetadata.GetInputMeta().GetLayer().begin()->second.GetHeight());
 			}
 
 			poseDecoder.decode(inputW, inputH, thresRadius);
diff --git a/mv_machine_learning/mv_inference/inference/src/ObjectDecoder.cpp b/mv_machine_learning/mv_inference/inference/src/ObjectDecoder.cpp
index 2831edc..4d5e36e 100755
--- a/mv_machine_learning/mv_inference/inference/src/ObjectDecoder.cpp
+++ b/mv_machine_learning/mv_inference/inference/src/ObjectDecoder.cpp
@@ -27,16 +27,16 @@ namespace inference
 {
 	int ObjectDecoder::init()
 	{
-		if (mBoxInfo.GetDecodingType() == INFERENCE_BOX_DECODING_TYPE_BYPASS) {
-			if (!mTensorBuffer.exist(mMeta.GetLabel().GetName()) ||
-				!mTensorBuffer.exist(mMeta.GetNumber().GetName()) ) {
+		if (mMeta.GetBoxDecodingType() == INFERENCE_BOX_DECODING_TYPE_BYPASS) {
+			if (!mTensorBuffer.exist(mMeta.GetLabelName()) ||
+				!mTensorBuffer.exist(mMeta.GetNumberName()) ) {
 				LOGE("buffer buffers named of %s or %s are NULL",
-					mMeta.GetLabel().GetName().c_str(),
-					mMeta.GetNumber().GetName().c_str());
+					mMeta.GetLabelName().c_str(), mMeta.GetNumberName().c_str());
+
 				return MEDIA_VISION_ERROR_INVALID_OPERATION;
 			}
 
-			std::vector<int> indexes = mMeta.GetNumber().GetDimInfo().GetValidIndexAll();
+			std::vector<int> indexes = mMeta.GetNumberDimInfo().GetValidIndexAll();
 			if (indexes.size() != 1) {
 				LOGE("Invalid dim size. It should be 1");
 				return MEDIA_VISION_ERROR_INVALID_OPERATION;
@@ -45,9 +45,9 @@ namespace inference
 			// mNumberOfObjects is set again if INFERENCE_BOX_DECODING_TYPE_BYPASS.
 			// Otherwise it is set already within ctor.
 			mNumberOfOjects = mTensorBuffer.getValue<int>(
-								mMeta.GetNumber().GetName(), indexes[0]);
+								mMeta.GetNumberName(), indexes[0]);
 		} else {
-			if (mBoxInfo.GetDecodeInfo().IsAnchorBoxEmpty()) {
+			if (mMeta.GetBoxDecodeInfo().IsAnchorBoxEmpty()) {
 				LOGE("Anchor boxes are required but empty.");
 				return MEDIA_VISION_ERROR_INVALID_OPERATION;
 			}
@@ -58,32 +58,32 @@ namespace inference
 
 	float ObjectDecoder::decodeScore(int idx)
 	{
-		float score = mTensorBuffer.getValue<float>(mScoreInfo.GetName(), idx);
-		if (mScoreInfo.GetType() == INFERENCE_SCORE_TYPE_SIGMOID) {
+		float score = mTensorBuffer.getValue<float>(mMeta.GetScoreName(), idx);
+		if (mMeta.GetScoreType() == INFERENCE_SCORE_TYPE_SIGMOID) {
 			score = PostProcess::sigmoid(score);
 		}
 
-		return score < mScoreInfo.GetThresHold() ? 0.0f : score;
+		return score < mMeta.GetScoreThreshold() ? 0.0f : score;
 	}
 
 	Box ObjectDecoder::decodeBox(int idx, float score, int label)
 	{
 		// assume type is (cx,cy,w,h)
 		// left or cx
-		float cx = mTensorBuffer.getValue<float>(mBoxInfo.GetName(),
-									idx * mBoxOffset + mBoxInfo.GetOrder()[0]);
+		float cx = mTensorBuffer.getValue<float>(mMeta.GetBoxName(),
+									idx * mBoxOffset + mMeta.GetBoxOrder()[0]);
 		// top or cy
-		float cy = mTensorBuffer.getValue<float>(mBoxInfo.GetName(),
-									idx * mBoxOffset + mBoxInfo.GetOrder()[1]);
+		float cy = mTensorBuffer.getValue<float>(mMeta.GetBoxName(),
+									idx * mBoxOffset + mMeta.GetBoxOrder()[1]);
 		// right or width
-		float cWidth = mTensorBuffer.getValue<float>(mBoxInfo.GetName(),
-									idx * mBoxOffset + mBoxInfo.GetOrder()[2]);
+		float cWidth = mTensorBuffer.getValue<float>(mMeta.GetBoxName(),
+									idx * mBoxOffset + mMeta.GetBoxOrder()[2]);
 		// bottom or height
-		float cHeight = mTensorBuffer.getValue<float>(mBoxInfo.GetName(),
-									idx * mBoxOffset + mBoxInfo.GetOrder()[3]);
+		float cHeight = mTensorBuffer.getValue<float>(mMeta.GetBoxName(),
+									idx * mBoxOffset + mMeta.GetBoxOrder()[3]);
 
 		// convert type to ORIGIN_CENTER if ORIGIN_LEFTTOP
-		if (mBoxInfo.GetType() == INFERENCE_BOX_TYPE_ORIGIN_LEFTTOP) {
+		if (mMeta.GetBoxType() == INFERENCE_BOX_TYPE_ORIGIN_LEFTTOP) {
 			float tmpCx = cx;
 			float tmpCy = cy;
 			cx = (cx + cWidth) * 0.5f; // (left + right)/2
@@ -93,7 +93,7 @@ namespace inference
 		}
 
 		// convert coordinate to RATIO if PIXEL
-		if (mBoxInfo.GetCoordinate() == INFERENCE_BOX_COORDINATE_TYPE_PIXEL) {
+		if (mMeta.GetScoreCoordinate() == INFERENCE_BOX_COORDINATE_TYPE_PIXEL) {
 			cx /= mScaleW;
 			cy /= mScaleH;
 			cWidth /= mScaleW;
@@ -101,9 +101,9 @@ namespace inference
 		}
 
 		Box box = {
-			.index = mMeta.GetLabel().GetName().empty() ?
+			.index = mMeta.GetLabelName().empty() ?
 						label :
-						mTensorBuffer.getValue<int>(mMeta.GetLabel().GetName(), idx),
+						mTensorBuffer.getValue<int>(mMeta.GetLabelName(), idx),
 			.score = score,
 			.location = cv::Rect2f(cx, cy, cWidth, cHeight)
 		};
@@ -116,26 +116,26 @@ namespace inference
 		// location coordinate of box, the output of decodeBox(), is relative between 0 ~ 1
 		Box box = decodeBox(anchorIdx, score, idx);
 
-		if (mBoxInfo.GetDecodeInfo().IsFixedAnchorSize()) {
+		if (mMeta.GetBoxDecodeInfo().IsFixedAnchorSize()) {
 			box.location.x += anchor.x;
 			box.location.y += anchor.y;
 		} else {
-			box.location.x = box.location.x / mBoxInfo.GetDecodeInfo().GetAnchorXscale() *
+			box.location.x = box.location.x / mMeta.GetBoxDecodeInfo().GetAnchorXscale() *
 							 anchor.width + anchor.x;
-			box.location.y = box.location.y / mBoxInfo.GetDecodeInfo().GetAnchorYscale() *
+			box.location.y = box.location.y / mMeta.GetBoxDecodeInfo().GetAnchorYscale() *
 							 anchor.height + anchor.y;
 		}
 
-		if (mBoxInfo.GetDecodeInfo().IsExponentialBoxScale()) {
+		if (mMeta.GetBoxDecodeInfo().IsExponentialBoxScale()) {
 			box.location.width = anchor.width *
-						std::exp(box.location.width/mBoxInfo.GetDecodeInfo().GetAnchorWscale());
+						std::exp(box.location.width / mMeta.GetBoxDecodeInfo().GetAnchorWscale());
 			box.location.height = anchor.height *
-						std::exp(box.location.height/mBoxInfo.GetDecodeInfo().GetAnchorHscale());
+						std::exp(box.location.height / mMeta.GetBoxDecodeInfo().GetAnchorHscale());
 		} else {
 			box.location.width = anchor.width *
-						box.location.width/mBoxInfo.GetDecodeInfo().GetAnchorWscale();
+						box.location.width / mMeta.GetBoxDecodeInfo().GetAnchorWscale();
 			box.location.height = anchor.height *
-						box.location.height/mBoxInfo.GetDecodeInfo().GetAnchorHscale();
+						box.location.height / mMeta.GetBoxDecodeInfo().GetAnchorHscale();
 		}
 
 		return box;
@@ -147,7 +147,7 @@ namespace inference
 		int ret = MEDIA_VISION_ERROR_NONE;
 
 		for (int idx = 0; idx < mNumberOfOjects; ++idx) {
-			if (mBoxInfo.GetDecodingType() == INFERENCE_BOX_DECODING_TYPE_BYPASS) {
+			if (mMeta.GetBoxDecodingType() == INFERENCE_BOX_DECODING_TYPE_BYPASS) {
 				float score = decodeScore(idx);
 				if (score <= 0.0f)
 					continue;
@@ -158,7 +158,7 @@ namespace inference
 				int anchorIdx = -1;
 
 				Boxes boxes;
-				for (auto& anchorBox : mBoxInfo.GetDecodeInfo().GetAnchorBoxAll()) {
+				for (auto& anchorBox : mMeta.GetBoxDecodeInfo().GetAnchorBoxAll()) {
 					anchorIdx++;
 
 					float score = decodeScore(anchorIdx * mNumberOfOjects + idx);
@@ -176,8 +176,8 @@ namespace inference
 		if (!boxList.empty()) {
 			PostProcess postProc;
 			ret = postProc.Nms(boxList,
-					mBoxInfo.GetDecodeInfo().GetNmsMode(),
-					mBoxInfo.GetDecodeInfo().GetNmsIouThreshold(),
+					mMeta.GetBoxDecodeInfo().GetNmsMode(),
+					mMeta.GetBoxDecodeInfo().GetNmsIouThreshold(),
 					mResultBoxes);
 			if (ret != MEDIA_VISION_ERROR_NONE) {
 				LOGE("Fail to non-maximum suppression[%d]", ret);
diff --git a/mv_machine_learning/mv_inference/inference/src/OutputMetadata.cpp b/mv_machine_learning/mv_inference/inference/src/OutputMetadata.cpp
index 8dec322..176b0eb 100755
--- a/mv_machine_learning/mv_inference/inference/src/OutputMetadata.cpp
+++ b/mv_machine_learning/mv_inference/inference/src/OutputMetadata.cpp
@@ -35,6 +35,7 @@ namespace inference
 			score(),
 			box(),
 			label(),
+			number(),
 			landmark(),
 			offsetVec(),
 			dispVecs(),
@@ -47,8 +48,8 @@ namespace inference
 			name(),
 			dimInfo(),
 			threshold(0.0),
-			type(INFERENCE_SCORE_TYPE_NORMAL),
 			topNumber(1),
+			type(INFERENCE_SCORE_TYPE_NORMAL),
 			deQuantization(nullptr)
 	{
 		// Score type
@@ -195,22 +196,22 @@ namespace inference
 		return MEDIA_VISION_ERROR_NONE;
 	}
 
-	void BoxInfo::DecodeInfo::AddAnchorBox(cv::Rect2f& anchor)
+	void DecodeInfo::AddAnchorBox(cv::Rect2f& anchor)
 	{
 		anchorBoxes.push_back(anchor);
 	}
 
-	void BoxInfo::DecodeInfo::ClearAnchorBox()
+	void DecodeInfo::ClearAnchorBox()
 	{
 		anchorBoxes.clear();
 	}
 
-	std::vector<cv::Rect2f>& BoxInfo::DecodeInfo::GetAnchorBoxAll()
+	std::vector<cv::Rect2f>& DecodeInfo::GetAnchorBoxAll()
 	{
 		return anchorBoxes;
 	}
 
-	bool BoxInfo::DecodeInfo::IsAnchorBoxEmpty()
+	bool DecodeInfo::IsAnchorBoxEmpty()
 	{
 		return anchorBoxes.empty();
 	}
@@ -374,7 +375,7 @@ namespace inference
 
 	}
 
-	int BoxInfo::DecodeInfo::ParseAnchorParam(JsonObject *root)
+	int DecodeInfo::ParseAnchorParam(JsonObject *root)
 	{
 		JsonObject *object = json_object_get_object_member(root, "anchor") ;
 
@@ -420,45 +421,43 @@ namespace inference
 		return MEDIA_VISION_ERROR_NONE;
 	}
 
-	float BoxInfo::DecodeInfo::CalculateScale(float min, float max, int index, int maxStride)
+	float DecodeInfo::CalculateScale(float min, float max, int index, int maxStride)
 	{
 		return min + (max - min) * 1.0 * index / (maxStride - 1.0f);
 	}
 
-	bool BoxInfo::DecodeInfo::IsFixedAnchorSize()
+	bool DecodeInfo::IsFixedAnchorSize()
 	{
 		return this->anchorParam.isFixedAnchorSize;;
 	}
 
-	bool BoxInfo::DecodeInfo::IsExponentialBoxScale()
+	bool DecodeInfo::IsExponentialBoxScale()
 	{
 		return this->anchorParam.isExponentialBoxScale;
 	}
 
-	float BoxInfo::DecodeInfo::GetAnchorXscale()
+	float DecodeInfo::GetAnchorXscale()
 	{
 		return this->anchorParam.xScale;
 	}
 
-	float BoxInfo::DecodeInfo::GetAnchorYscale()
+	float DecodeInfo::GetAnchorYscale()
 	{
 		return this->anchorParam.yScale;
 	}
 
-	float BoxInfo::DecodeInfo::GetAnchorWscale()
+	float DecodeInfo::GetAnchorWscale()
 	{
 		return this->anchorParam.wScale;
 	}
 
-	float BoxInfo::DecodeInfo::GetAnchorHscale()
+	float DecodeInfo::GetAnchorHscale()
 	{
 		return this->anchorParam.hScale;
 	}
 
-	int BoxInfo::DecodeInfo::GenerateAnchor()
+	int DecodeInfo::GenerateAnchor()
 	{
-		//BoxInfo::DecodeInfo& decodeInfo = box.GetDecodeInfo();
-
 		if (this->anchorParam.strides.empty() ||
 			this->anchorParam.aspectRatios.empty()) {
 			LOGE("Invalid anchor parameters");
@@ -552,7 +551,7 @@ namespace inference
 		return MEDIA_VISION_ERROR_NONE;
 	}
 
-	int BoxInfo::DecodeInfo::ParseNms(JsonObject *root)
+	int DecodeInfo::ParseNms(JsonObject *root)
 	{
 		if (!json_object_has_member(root, "nms")) {
 			LOGI("nms is empty. skip it");
@@ -572,17 +571,17 @@ namespace inference
 		return MEDIA_VISION_ERROR_NONE;
 	}
 
-	int BoxInfo::DecodeInfo::GetNmsMode()
+	int DecodeInfo::GetNmsMode()
 	{
 		return this->nmsParam.mode;
 	}
 
-	float BoxInfo::DecodeInfo::GetNmsIouThreshold()
+	float DecodeInfo::GetNmsIouThreshold()
 	{
 		return this->nmsParam.iouThreshold;
 	}
 
-	int BoxInfo::DecodeInfo::ParseRotate(JsonObject *root)
+	int DecodeInfo::ParseRotate(JsonObject *root)
 	{
 		if (!json_object_has_member(root, "rotate")) {
 			LOGI("rotate is empty. skip it");
@@ -597,62 +596,62 @@ namespace inference
 		return MEDIA_VISION_ERROR_NONE;
 	}
 
-	int BoxInfo::DecodeInfo::GetRotStartPointIndex()
+	int DecodeInfo::GetRotStartPointIndex()
 	{
 		return this->rotParam.startPointIndex;
 	}
 
-	int BoxInfo::DecodeInfo::GetRotEndPointIndex()
+	int DecodeInfo::GetRotEndPointIndex()
 	{
 		return this->rotParam.endPointIndex;
 	}
 
-	float BoxInfo::DecodeInfo::GetBaseAngle()
+	float DecodeInfo::GetBaseAngle()
 	{
 		return this->rotParam.baseAngle;
 	}
 
-	int BoxInfo::DecodeInfo::GetRoiMode()
+	int DecodeInfo::GetRoiMode()
 	{
 		return this->roiOptParam.mode;
 	}
 
-	int BoxInfo::DecodeInfo::GetRoiStartPointIndex()
+	int DecodeInfo::GetRoiStartPointIndex()
 	{
 		return this->roiOptParam.startPointIndex;
 	}
 
-	int BoxInfo::DecodeInfo::GetRoiEndPointIndex()
+	int DecodeInfo::GetRoiEndPointIndex()
 	{
 		return this->roiOptParam.endPointIndex;
 	}
 
-	int BoxInfo::DecodeInfo::GetRoiCenterPointIndex()
+	int DecodeInfo::GetRoiCenterPointIndex()
 	{
 		return this->roiOptParam.centerPointIndex;
 	}
 
-	float BoxInfo::DecodeInfo::GetShiftX()
+	float DecodeInfo::GetShiftX()
 	{
 		return this->roiOptParam.shiftX;
 	}
 
-	float BoxInfo::DecodeInfo::GetShiftY()
+	float DecodeInfo::GetShiftY()
 	{
 		return this->roiOptParam.shiftY;
 	}
 
-	float BoxInfo::DecodeInfo::GetScaleX()
+	float DecodeInfo::GetScaleX()
 	{
 		return this->roiOptParam.scaleX;
 	}
 
-	float BoxInfo::DecodeInfo::GetScaleY()
+	float DecodeInfo::GetScaleY()
 	{
 		return this->roiOptParam.scaleY;
 	}
 
-	int BoxInfo::DecodeInfo::ParseRoiOption(JsonObject *root)
+	int DecodeInfo::ParseRoiOption(JsonObject *root)
 	{
 		if (!json_object_has_member(root, "roi")) {
 			LOGI("roi is empty. skip it");
@@ -672,56 +671,6 @@ namespace inference
 		return MEDIA_VISION_ERROR_NONE;
 	}
 
-	ScoreInfo& OutputMetadata::GetScore()
-	{
-		return score;
-	}
-
-	BoxInfo& OutputMetadata::GetBox()
-	{
-		return box;
-	}
-
-	Label& OutputMetadata::GetLabel()
-	{
-		return label;
-	}
-
-	Number& OutputMetadata::GetNumber()
-	{
-		return number;
-	}
-
-	Landmark& OutputMetadata::GetLandmark()
-	{
-		return landmark;
-	}
-
-	OffsetVec& OutputMetadata::GetOffset()
-	{
-		return offsetVec;
-	}
-
-	std::vector<DispVec>& OutputMetadata::GetDispVecAll()
-	{
-		return dispVecs;
-	}
-
-	Edge& OutputMetadata::GetEdge()
-	{
-		return edgeMap;
-	}
-
-	std::vector<std::pair<int, int>>& Edge::GetEdgesAll()
-	{
-		return edges;
-	}
-
-	bool OutputMetadata::IsParsed()
-	{
-		return parsed;
-	}
-
 	Landmark::Landmark() :
 			name(),
 			dimInfo(),
@@ -729,7 +678,7 @@ namespace inference
 			offset(),
 			coordinate(INFERENCE_LANDMARK_COORDINATE_TYPE_RATIO),
 			decodingType(INFERENCE_LANDMARK_DECODING_TYPE_BYPASS),
-			decodingInfo()
+			heatMapInfo()
 
 	{
 		supportedLandmarkTypes.insert({"2D_SINGLE", INFERENCE_LANDMARK_TYPE_2D_SINGLE});
@@ -805,9 +754,9 @@ namespace inference
 		return decodingType;
 	}
 
-	Landmark::DecodeInfo& Landmark::GetDecodingInfo()
+	HeatMapInfo& Landmark::GetHeatMapInfo()
 	{
-		return decodingInfo;
+		return heatMapInfo;
 	}
 
 	int OutputMetadata::ParseLandmark(JsonObject *root)
@@ -863,26 +812,26 @@ namespace inference
 
 			JsonObject *object = json_object_get_object_member(cObject, "heatmap") ;
 			try {
-				landmark.GetDecodingInfo().heatMap.shapeType = OutputMetadata::GetSupportedType(object, "shape_type", supportedTensorShapes);
+				landmark.GetHeatMapInfo().shapeType = OutputMetadata::GetSupportedType(object, "shape_type", supportedTensorShapes);
 			} catch (const std::exception& e) {
 				LOGE("Invalid %s", e.what());
 				return MEDIA_VISION_ERROR_INVALID_OPERATION;
 			}
 
 			std::vector<int> heatMapIndexes = landmark.GetDimInfo().GetValidIndexAll();
-			if (landmark.GetDecodingInfo().heatMap.shapeType == INFERENCE_TENSOR_SHAPE_NCHW) {
-				landmark.GetDecodingInfo().heatMap.cIdx = heatMapIndexes[0];
-				landmark.GetDecodingInfo().heatMap.hIdx = heatMapIndexes[1];
-				landmark.GetDecodingInfo().heatMap.wIdx = heatMapIndexes[2];
+			if (landmark.GetHeatMapInfo().shapeType == INFERENCE_TENSOR_SHAPE_NCHW) {
+				landmark.GetHeatMapInfo().cIdx = heatMapIndexes[0];
+				landmark.GetHeatMapInfo().hIdx = heatMapIndexes[1];
+				landmark.GetHeatMapInfo().wIdx = heatMapIndexes[2];
 			} else {
-				landmark.GetDecodingInfo().heatMap.hIdx = heatMapIndexes[0];
-				landmark.GetDecodingInfo().heatMap.wIdx = heatMapIndexes[1];
-				landmark.GetDecodingInfo().heatMap.cIdx = heatMapIndexes[2];
+				landmark.GetHeatMapInfo().hIdx = heatMapIndexes[0];
+				landmark.GetHeatMapInfo().wIdx = heatMapIndexes[1];
+				landmark.GetHeatMapInfo().cIdx = heatMapIndexes[2];
 			}
 
 			if (json_object_has_member(object, "nms_radius")) {
-				landmark.GetDecodingInfo().heatMap.nmsRadius = static_cast<float>(json_object_get_double_member(object, "nms_radius"));
-				LOGI("nms is enabled with %3.f", landmark.GetDecodingInfo().heatMap.nmsRadius );
+				landmark.GetHeatMapInfo().nmsRadius = static_cast<float>(json_object_get_double_member(object, "nms_radius"));
+				LOGI("nms is enabled with %3.f", landmark.GetHeatMapInfo().nmsRadius );
 			}
 		}
 
diff --git a/mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp b/mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp
index 1ae33a7..a1efd2d 100644
--- a/mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp
+++ b/mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp
@@ -50,15 +50,13 @@ namespace inference
 	{
 		LOGI("ENTER");
 
-		Landmark& landmarkInfo = mMeta.GetLandmark();
-
-		if (landmarkInfo.GetType() < INFERENCE_LANDMARK_TYPE_2D_SINGLE ||
-			landmarkInfo.GetType() > INFERENCE_LANDMARK_TYPE_3D_SINGLE) {
+		if (mMeta.GetLandmarkType() < INFERENCE_LANDMARK_TYPE_2D_SINGLE ||
+			mMeta.GetLandmarkType() > INFERENCE_LANDMARK_TYPE_3D_SINGLE) {
 			LOGE("Not supported landmark type");
 			return MEDIA_VISION_ERROR_INVALID_OPERATION;
 		}
 
-		if (landmarkInfo.GetDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS) {
+		if (mMeta.GetLandmarkDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS) {
 			LOGI("Skip init");
 			return MEDIA_VISION_ERROR_NONE;
 		}
@@ -68,12 +66,11 @@ namespace inference
 		float score, localScore;
 		int idx;
 		bool isLocalMax;
-		ScoreInfo& scoreInfo = mMeta.GetScore();
 
 		mCandidates.clear();
 
-		if (landmarkInfo.GetType() == INFERENCE_LANDMARK_TYPE_2D_SINGLE ||
-			landmarkInfo.GetType() == INFERENCE_LANDMARK_TYPE_3D_SINGLE) {
+		if (mMeta.GetLandmarkType() == INFERENCE_LANDMARK_TYPE_2D_SINGLE ||
+			mMeta.GetLandmarkType() == INFERENCE_LANDMARK_TYPE_3D_SINGLE) {
 			mCandidates.resize(mHeatMapChannel);
 		}
 
@@ -83,16 +80,16 @@ namespace inference
 				for (c = 0; c < mHeatMapChannel; ++c, candidate++) {
 					isLocalMax = true;
 					idx = convertXYZtoX(x, y, c);
-					score = mTensorBuffer.getValue<float>(scoreInfo.GetName(), idx);
-					if (scoreInfo.GetType() == INFERENCE_SCORE_TYPE_SIGMOID) {
+					score = mTensorBuffer.getValue<float>(mMeta.GetScoreName(), idx);
+					if (mMeta.GetScoreType() == INFERENCE_SCORE_TYPE_SIGMOID) {
 						score = PostProcess::sigmoid(score);
 					}
 
-					if (score < scoreInfo.GetThresHold())
+					if (score < mMeta.GetScoreThreshold())
 						continue;
 
-					if (landmarkInfo.GetType() == INFERENCE_LANDMARK_TYPE_2D_SINGLE ||
-						landmarkInfo.GetType() == INFERENCE_LANDMARK_TYPE_3D_SINGLE) {
+					if (mMeta.GetLandmarkType() == INFERENCE_LANDMARK_TYPE_2D_SINGLE ||
+						mMeta.GetLandmarkType() == INFERENCE_LANDMARK_TYPE_3D_SINGLE) {
 						if (score <= candidate->score)
 							continue;
 
@@ -110,8 +107,8 @@ namespace inference
 						for (dy = sy; dy < ey; ++dy) {
 							for (dx = sx; dx < ex; ++dx) {
 								idx = convertXYZtoX(dx, dy, c);
-								localScore =  mTensorBuffer.getValue<float>(scoreInfo.GetName(), idx);
-								if (scoreInfo.GetType() == INFERENCE_SCORE_TYPE_SIGMOID) {
+								localScore =  mTensorBuffer.getValue<float>(mMeta.GetScoreName(), idx);
+								if (mMeta.GetScoreType() == INFERENCE_SCORE_TYPE_SIGMOID) {
 									localScore = PostProcess::sigmoid(localScore);
 								}
 								if (localScore > score) {
@@ -159,7 +156,7 @@ namespace inference
 
 	int PoseDecoder::getOffsetValue(LandmarkPoint& landmark, cv::Point2f &offsetVal)
 	{
-		if (!mTensorBuffer.exist(mMeta.GetOffset().GetName())) {
+		if (!mTensorBuffer.exist(mMeta.GetOffsetVecName())) {
 			offsetVal.x = offsetVal.y = 0.f;
 			LOGI("No offset value");
 			LOGI("LEAVE");
@@ -169,11 +166,11 @@ namespace inference
 		cv::Point idx = convertXYZtoXY(landmark.heatMapLoc.x, landmark.heatMapLoc.y, landmark.id);
 
 		try {
-			offsetVal.x = mTensorBuffer.getValue<float>(mMeta.GetOffset().GetName(), idx.x);
-			offsetVal.y = mTensorBuffer.getValue<float>(mMeta.GetOffset().GetName(), idx.y);
+			offsetVal.x = mTensorBuffer.getValue<float>(mMeta.GetOffsetVecName(), idx.x);
+			offsetVal.y = mTensorBuffer.getValue<float>(mMeta.GetOffsetVecName(), idx.y);
 		} catch (const std::exception& e) {
 			LOGE("Fail to get value at (%d, %d) from %s",
-						idx.x, idx.y, mMeta.GetOffset().GetName().c_str());
+						idx.x, idx.y, mMeta.GetOffsetVecName().c_str());
 			return MEDIA_VISION_ERROR_INVALID_OPERATION;
 		}
 
@@ -241,30 +238,28 @@ namespace inference
 		}
 
 		mPoseLandmarks.clear();
-		LandmarkPoint initValue = {0.0f, cv::Point(0,0), cv::Point2f(0.0f, 0.0f), -1, false};
 
-		Landmark& landmarkInfo = mMeta.GetLandmark();
-		ScoreInfo& scoreInfo = mMeta.GetScore();
+		LandmarkPoint initValue = {0.0f, cv::Point(0,0), cv::Point2f(0.0f, 0.0f), -1, false};
 
-		if (landmarkInfo.GetType() == INFERENCE_LANDMARK_TYPE_2D_SINGLE ||
-			landmarkInfo.GetType() == INFERENCE_LANDMARK_TYPE_3D_SINGLE) {
+		if (mMeta.GetLandmarkType() == INFERENCE_LANDMARK_TYPE_2D_SINGLE ||
+			mMeta.GetLandmarkType() == INFERENCE_LANDMARK_TYPE_3D_SINGLE) {
 			mPoseLandmarks.resize(1);
 
-			if (landmarkInfo.GetDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS) {
+			if (mMeta.GetLandmarkDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS) {
 				mPoseLandmarks[0].landmarks.resize(mNumberOfLandmarks);
 			} else {
 				mPoseLandmarks[0].landmarks.resize(mHeatMapChannel);
 			}
 		}
 
-		if (landmarkInfo.GetDecodingType() != INFERENCE_LANDMARK_DECODING_TYPE_BYPASS) {
+		if (mMeta.GetLandmarkDecodingType() != INFERENCE_LANDMARK_DECODING_TYPE_BYPASS) {
 			while (!mCandidates.empty()) {
 
 				LandmarkPoint &root = mCandidates.front();
 
 				getIndexToPos(root, scaleWidth, scaleHeight);
 
-				if (landmarkInfo.GetType() == INFERENCE_LANDMARK_TYPE_2D_SINGLE) {
+				if (mMeta.GetLandmarkType() == INFERENCE_LANDMARK_TYPE_2D_SINGLE) {
 					root.valid = true;
 					mPoseLandmarks[0].landmarks[root.id] = root;
 					mPoseLandmarks[0].score += root.score;
@@ -318,27 +313,27 @@ namespace inference
 			}
 		} else {
 			// multi pose is not supported
-			std::vector<int> scoreIndexes = scoreInfo.GetDimInfo().GetValidIndexAll();
-			float poseScore = scoreInfo.GetThresHold();
+			std::vector<int> scoreIndexes = mMeta.GetScoreDimInfo().GetValidIndexAll();
+			float poseScore = mMeta.GetScoreThreshold();
 			if (!scoreIndexes.empty()) {
-				poseScore  = mTensorBuffer.getValue<float>(scoreInfo.GetName(), scoreIndexes[scoreIndexes[0]]);
-				if (scoreInfo.GetType() == INFERENCE_SCORE_TYPE_SIGMOID) {
+				poseScore  = mTensorBuffer.getValue<float>(mMeta.GetScoreName(), scoreIndexes[scoreIndexes[0]]);
+				if (mMeta.GetScoreType() == INFERENCE_SCORE_TYPE_SIGMOID) {
 					poseScore = PostProcess::sigmoid(poseScore);
 				}
-				if (poseScore < scoreInfo.GetThresHold()) {
-					LOGI("pose score %.4f is lower than %.4f\n[LEAVE]", poseScore, scoreInfo.GetThresHold());
+				if (poseScore < mMeta.GetScoreThreshold()) {
+					LOGI("pose score %.4f is lower than %.4f\n[LEAVE]", poseScore, mMeta.GetScoreThreshold());
 					return MEDIA_VISION_ERROR_NONE;
 				}
 			}
 
-			int landmarkOffset = (landmarkInfo.GetType() == INFERENCE_LANDMARK_TYPE_2D_SINGLE ||
-								  landmarkInfo.GetType() == INFERENCE_LANDMARK_TYPE_2D_MULTI) ? 2 : 3;
-			if (landmarkInfo.GetDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS) {
-				landmarkOffset = landmarkInfo.GetOffset();
+			int landmarkOffset = (mMeta.GetLandmarkType() == INFERENCE_LANDMARK_TYPE_2D_SINGLE ||
+								  mMeta.GetLandmarkType() == INFERENCE_LANDMARK_TYPE_2D_MULTI) ? 2 : 3;
+			if (mMeta.GetLandmarkDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS) {
+				landmarkOffset = mMeta.GetLandmarkOffset();
 			}
 			for (int idx = 0; idx < mNumberOfLandmarks; ++idx) {
-					float px = mTensorBuffer.getValue<float>(landmarkInfo.GetName(), idx * landmarkOffset);
-					float py = mTensorBuffer.getValue<float>(landmarkInfo.GetName(), idx * landmarkOffset + 1);
+					float px = mTensorBuffer.getValue<float>(mMeta.GetLandmarkName(), idx * landmarkOffset);
+					float py = mTensorBuffer.getValue<float>(mMeta.GetLandmarkName(), idx * landmarkOffset + 1);
 
 					mPoseLandmarks[0].landmarks[idx].score = poseScore;
 					mPoseLandmarks[0].landmarks[idx].heatMapLoc = cv::Point(-1, -1);
@@ -371,9 +366,9 @@ namespace inference
 		LOGI("KeyId: [%d], heatMap: %d, %d", root.id, root.heatMapLoc.x, root.heatMapLoc.y);
 		LOGI("KeyId: [%d], decoded: %.4f, %.4f, score %.3f", root.id, root.decodedLoc.x, root.decodedLoc.y, root.score);
 
-		int index = static_cast<int>(mMeta.GetEdge().GetEdgesAll().size()) - 1;
-		for (auto riter = mMeta.GetEdge().GetEdgesAll().rbegin();
-			riter != mMeta.GetEdge().GetEdgesAll().rend(); ++riter) {
+		int index = static_cast<int>(mMeta.GetEdges().size()) - 1;
+		for (auto riter = mMeta.GetEdges().rbegin();
+			riter != mMeta.GetEdges().rend(); ++riter) {
 			int fromKeyId = riter->second;
 			int toKeyId = riter->first;
 
@@ -392,8 +387,8 @@ namespace inference
 		}
 
 		index = 0;
-		for (auto iter = mMeta.GetEdge().GetEdgesAll().begin();
-			iter != mMeta.GetEdge().GetEdgesAll().end(); ++iter) {
+		for (auto iter = mMeta.GetEdges().begin();
+			iter != mMeta.GetEdges().end(); ++iter) {
 			int fromKeyId = iter->first;
 			int toKeyId = iter->second;
 
@@ -449,8 +444,8 @@ namespace inference
 		}
 
 		int idx  = convertXYZtoX(toLandmark.heatMapLoc.x, toLandmark.heatMapLoc.y, toLandmark.id);
-		toLandmark.score = mTensorBuffer.getValue<float>(mMeta.GetScore().GetName(), idx);
-		if (mMeta.GetScore().GetType() == INFERENCE_SCORE_TYPE_SIGMOID) {
+		toLandmark.score = mTensorBuffer.getValue<float>(mMeta.GetScoreName(), idx);
+		if (mMeta.GetScoreType() == INFERENCE_SCORE_TYPE_SIGMOID) {
 			toLandmark.score = PostProcess::sigmoid(toLandmark.score);
 		}
 
@@ -464,12 +459,14 @@ namespace inference
 	{
 		LOGI("ENTER");
 
-		LOGI("edge size: %zd", mMeta.GetEdge().GetEdgesAll().size());
+		LOGI("edge size: %zd", mMeta.GetEdges().size());
+
 		int idxY = index.y * mHeatMapWidth
-					* static_cast<int>(mMeta.GetEdge().GetEdgesAll().size()) * 2;
-		idxY += index.x * static_cast<int>(mMeta.GetEdge().GetEdgesAll().size()) * 2 + edgeId;
+					* static_cast<int>(mMeta.GetEdges().size()) * 2;
+
+		idxY += index.x * static_cast<int>(mMeta.GetEdges().size()) * 2 + edgeId;
 
-		int idxX = idxY + static_cast<int>(mMeta.GetEdge().GetEdgesAll().size());
+		int idxX = idxY + static_cast<int>(mMeta.GetEdges().size());
 
 		for(auto& dispVec : mMeta.GetDispVecAll()){
 			if (dispVec.GetType() == type) { // 0: forward
diff --git a/packaging/capi-media-vision.spec b/packaging/capi-media-vision.spec
index ee7e44e..205f377 100644
--- a/packaging/capi-media-vision.spec
+++ b/packaging/capi-media-vision.spec
@@ -1,6 +1,6 @@
 Name:        capi-media-vision
 Summary:     Media Vision library for Tizen Native API
-Version:     0.8.17
+Version:     0.8.18
 Release:     1
 Group:       Multimedia/Framework
 License:     Apache-2.0 and BSD-3-Clause
-- 
2.7.4


From 20caea0dc7ad4eb0297da672601b13452717d498 Mon Sep 17 00:00:00 2001
From: Inki Dae <inki.dae@samsung.com>
Date: Wed, 13 Oct 2021 18:55:41 +0900
Subject: [PATCH 04/16] mv_machine_learning: code cleanup to Inference class

Cleaned up Inference class.

What this patch does,
	 - code sliding.
	 - change variable name to meaningful one such as 'n' to 'output_idx'.
	 - drop unnecessary variable.
	 - use same parameter name for Get*Results functions.

Change-Id: I47ac3eb241116174e4a6a7bc2a1b90ab9378de25
Signed-off-by: Inki Dae <inki.dae@samsung.com>
---
 .../mv_inference/inference/include/Inference.h     |   6 +-
 .../mv_inference/inference/src/Inference.cpp       | 138 +++++++++------------
 .../inference/src/mv_inference_open.cpp            |  26 ++--
 3 files changed, 75 insertions(+), 95 deletions(-)

diff --git a/mv_machine_learning/mv_inference/inference/include/Inference.h b/mv_machine_learning/mv_inference/inference/include/Inference.h
index a0506f2..95f285f 100644
--- a/mv_machine_learning/mv_inference/inference/include/Inference.h
+++ b/mv_machine_learning/mv_inference/inference/include/Inference.h
@@ -293,7 +293,7 @@ namespace inference
 		 * @since_tizen 5.5
 		 * @return @c true on success, otherwise a negative error value
 		 */
-		int GetClassficationResults(ImageClassificationResults &classificationResults);
+		int GetClassficationResults(ImageClassificationResults *results);
 
 		/**
 		 * @brief	Gets the ObjectDetectioResults
@@ -301,7 +301,7 @@ namespace inference
 		 * @since_tizen 5.5
 		 * @return @c true on success, otherwise a negative error value
 		 */
-		int GetObjectDetectionResults(ObjectDetectionResults *detectionResults);
+		int GetObjectDetectionResults(ObjectDetectionResults *results);
 
 		/**
 		 * @brief	Gets the FaceDetectioResults
@@ -309,7 +309,7 @@ namespace inference
 		 * @since_tizen 5.5
 		 * @return @c true on success, otherwise a negative error value
 		 */
-		int GetFaceDetectionResults(FaceDetectionResults *detectionResults);
+		int GetFaceDetectionResults(FaceDetectionResults *results);
 
 		/**
 		 * @brief	Gets the FacialLandmarkDetectionResults
diff --git a/mv_machine_learning/mv_inference/inference/src/Inference.cpp b/mv_machine_learning/mv_inference/inference/src/Inference.cpp
index 05c9c07..d79b3ff 100755
--- a/mv_machine_learning/mv_inference/inference/src/Inference.cpp
+++ b/mv_machine_learning/mv_inference/inference/src/Inference.cpp
@@ -1091,14 +1091,12 @@ namespace inference
 		return mSupportedInferenceBackend[backend];
 	}
 
-	int Inference::GetClassficationResults(ImageClassificationResults &results)
+	int Inference::GetClassficationResults(ImageClassificationResults *results)
 	{
 		// Will contain top N results in ascending order.
 		std::vector<std::pair<float, int>> topScore;
 		auto threadHold = mConfig.mConfidenceThresHold;
 
-		results.number_of_classes = 0;
-
 		if (mMetadata.GetOutputMeta().IsParsed()) {
 			OutputMetadata outputMetadata = mMetadata.GetOutputMeta();
 			std::vector<int> indexes = outputMetadata.GetScoreDimInfo().GetValidIndexAll();
@@ -1191,23 +1189,25 @@ namespace inference
 			std::reverse(topScore.begin(), topScore.end());
 		}
 
+		results->number_of_classes = 0;
+
 		for (auto& score : topScore) {
 			LOGI("score: %.3f, threshold: %.3f", score.first, threadHold);
 			LOGI("idx:%d", score.second);
 			LOGI("classProb: %.3f", score.first);
 
-			results.indices.push_back(score.second);
-			results.confidences.push_back(score.first);
-			results.names.push_back(mUserListName[score.second]);
-			results.number_of_classes++;
+			results->indices.push_back(score.second);
+			results->confidences.push_back(score.first);
+			results->names.push_back(mUserListName[score.second]);
+			results->number_of_classes++;
 		}
 
-		LOGE("Inference: GetClassificationResults: %d\n", results.number_of_classes);
+		LOGE("Inference: GetClassificationResults: %d\n", results->number_of_classes);
 		return MEDIA_VISION_ERROR_NONE;
 	}
 
 	int Inference::GetObjectDetectionResults(
-			ObjectDetectionResults *detectionResults)
+			ObjectDetectionResults *results)
 	{
 		if (mMetadata.GetOutputMeta().IsParsed()) {
 			OutputMetadata& outputMeta = mMetadata.GetOutputMeta();
@@ -1222,6 +1222,7 @@ namespace inference
 
 			int boxOffset = 0;
 			int numberOfObjects = 0;
+
 			if (outputMeta.GetBoxDecodingType() == INFERENCE_BOX_DECODING_TYPE_BYPASS) {
 				std::vector<int> boxIndexes = outputMeta.GetBoxDimInfo().GetValidIndexAll();
 				if (boxIndexes.size() != 1) {
@@ -1252,23 +1253,21 @@ namespace inference
 
 			objDecoder.init();
 			objDecoder.decode();
-			ObjectDetectionResults results;
-			results.number_of_objects = 0;
+			results->number_of_objects = 0;
 
 			for (auto& box : objDecoder.getObjectAll()) {
-				results.indices.push_back(box.index);
-				results.names.push_back(mUserListName[box.index]);
-				results.confidences.push_back(box.score);
-				results.locations.push_back(cv::Rect(
+				results->indices.push_back(box.index);
+				results->names.push_back(mUserListName[box.index]);
+				results->confidences.push_back(box.score);
+				results->locations.push_back(cv::Rect(
 						static_cast<int>((box.location.x -  box.location.width * 0.5f) * static_cast<float>(mSourceSize.width)),
 						static_cast<int>((box.location.y -  box.location.height * 0.5f) * static_cast<float>(mSourceSize.height)),
 						static_cast<int>(box.location.width *  static_cast<float>(mSourceSize.width)),
 						static_cast<int>(box.location.height * static_cast<float>(mSourceSize.height))));
-				results.number_of_objects++;
+				results->number_of_objects++;
 			}
-			*detectionResults = results;
-			LOGI("Inference: GetObjectDetectionResults: %d\n",
-				results.number_of_objects);
+
+			LOGI("Inference: GetObjectDetectionResults: %d\n", results->number_of_objects);
 		} else {
 			tensor_t outputData;
 
@@ -1288,7 +1287,6 @@ namespace inference
 			float *scores = nullptr;
 			int number_of_detections = 0;
 
-			cv::Mat cvScores, cvClasses, cvBoxes;
 			if (outputData.dimInfo.size() == 1) {
 				// there is no way to know how many objects are detect unless the number of objects aren't
 				// provided. In the case, each backend should provide the number of results manually.
@@ -1307,8 +1305,9 @@ namespace inference
 				cv::Mat cvTop = cvOutputData.col(4).clone();
 				cv::Mat cvRight = cvOutputData.col(5).clone();
 				cv::Mat cvBottom = cvOutputData.col(6).clone();
-
+				cv::Mat cvScores, cvClasses, cvBoxes;
 				cv::Mat cvBoxElems[] = { cvTop, cvLeft, cvBottom, cvRight };
+
 				cv::hconcat(cvBoxElems, 4, cvBoxes);
 
 				// classes
@@ -1320,42 +1319,38 @@ namespace inference
 				boxes = cvBoxes.ptr<float>(0);
 				classes = cvClasses.ptr<float>(0);
 				scores = cvScores.ptr<float>(0);
-
 			} else {
 				boxes = reinterpret_cast<float *>(outputData.data[0]);
 				classes = reinterpret_cast<float *>(outputData.data[1]);
 				scores = reinterpret_cast<float *>(outputData.data[2]);
-				number_of_detections =
-						(int) (*reinterpret_cast<float *>(outputData.data[3]));
+				number_of_detections = (int) (*reinterpret_cast<float *>(outputData.data[3]));
 			}
 
 			LOGI("number_of_detections = %d", number_of_detections);
 
-			int left, top, right, bottom;
-			cv::Rect loc;
+			results->number_of_objects = 0;
 
-			ObjectDetectionResults results;
-			results.number_of_objects = 0;
 			for (int idx = 0; idx < number_of_detections; ++idx) {
 				if (scores[idx] < mThreshold)
 					continue;
 
-				left = static_cast<int>(boxes[idx * 4 + 1] * mSourceSize.width);
-				top = static_cast<int>(boxes[idx * 4 + 0] * mSourceSize.height);
-				right = static_cast<int>(boxes[idx * 4 + 3] * mSourceSize.width);
-				bottom = static_cast<int>(boxes[idx * 4 + 2] * mSourceSize.height);
+				int left = static_cast<int>(boxes[idx * 4 + 1] * mSourceSize.width);
+				int top = static_cast<int>(boxes[idx * 4 + 0] * mSourceSize.height);
+				int right = static_cast<int>(boxes[idx * 4 + 3] * mSourceSize.width);
+				int bottom = static_cast<int>(boxes[idx * 4 + 2] * mSourceSize.height);
+				cv::Rect loc;
 
 				loc.x = left;
 				loc.y = top;
 				loc.width = right - left + 1;
 				loc.height = bottom - top + 1;
 
-				results.indices.push_back(static_cast<int>(classes[idx]));
-				results.confidences.push_back(scores[idx]);
-				results.names.push_back(
+				results->indices.push_back(static_cast<int>(classes[idx]));
+				results->confidences.push_back(scores[idx]);
+				results->names.push_back(
 						mUserListName[static_cast<int>(classes[idx])]);
-				results.locations.push_back(loc);
-				results.number_of_objects++;
+				results->locations.push_back(loc);
+				results->number_of_objects++;
 
 				LOGI("objectClass: %d", static_cast<int>(classes[idx]));
 				LOGI("confidence:%f", scores[idx]);
@@ -1363,16 +1358,13 @@ namespace inference
 					bottom);
 			}
 
-			*detectionResults = results;
-			LOGI("Inference: GetObjectDetectionResults: %d\n",
-				results.number_of_objects);
+			LOGI("Inference: GetObjectDetectionResults: %d\n", results->number_of_objects);
 		}
 
 		return MEDIA_VISION_ERROR_NONE;
 	}
 
-	int
-	Inference::GetFaceDetectionResults(FaceDetectionResults *detectionResults)
+	int Inference::GetFaceDetectionResults(FaceDetectionResults *results)
 	{
 		if (mMetadata.GetOutputMeta().IsParsed()) {
 			OutputMetadata& outputMeta = mMetadata.GetOutputMeta();
@@ -1387,6 +1379,7 @@ namespace inference
 
 			int boxOffset = 0;
 			int numberOfFaces = 0;
+
 			if (outputMeta.GetBoxDecodingType() == INFERENCE_BOX_DECODING_TYPE_BYPASS) {
 				std::vector<int> boxIndexes = outputMeta.GetBoxDimInfo().GetValidIndexAll();
 				if (boxIndexes.size() != 1) {
@@ -1417,21 +1410,19 @@ namespace inference
 
 			objDecoder.init();
 			objDecoder.decode();
-			FaceDetectionResults results;
-			results.number_of_faces = 0;
+			results->number_of_faces = 0;
 
 			for (auto& face : objDecoder.getObjectAll()) {
-				results.confidences.push_back(face.score);
-				results.locations.push_back(cv::Rect(
+				results->confidences.push_back(face.score);
+				results->locations.push_back(cv::Rect(
 						static_cast<int>((face.location.x -  face.location.width * 0.5f) * static_cast<float>(mSourceSize.width)),
 						static_cast<int>((face.location.y -  face.location.height * 0.5f) * static_cast<float>(mSourceSize.height)),
 						static_cast<int>(face.location.width *  static_cast<float>(mSourceSize.width)),
 						static_cast<int>(face.location.height * static_cast<float>(mSourceSize.height))));
-				results.number_of_faces++;
+				results->number_of_faces++;
 			}
-			*detectionResults = results;
-			LOGE("Inference: GetFaceDetectionResults: %d\n",
-				results.number_of_faces);
+
+			LOGE("Inference: GetFaceDetectionResults: %d\n", results->number_of_faces);
 		} else {
 			tensor_t outputData;
 
@@ -1450,8 +1441,8 @@ namespace inference
 			float *classes = nullptr;
 			float *scores = nullptr;
 			int number_of_detections = 0;
-
 			cv::Mat cvScores, cvClasses, cvBoxes;
+
 			if (outputData.dimInfo.size() == 1) {
 				// there is no way to know how many objects are detect unless the number of objects aren't
 				// provided. In the case, each backend should provide the number of results manually.
@@ -1460,17 +1451,14 @@ namespace inference
 				// indicates the image id. But it is useless if a batch mode isn't supported.
 				// So, use the 1st of 7.
 
-				number_of_detections = static_cast<int>(
-						*reinterpret_cast<float *>(outputData.data[0]));
-				cv::Mat cvOutputData(number_of_detections, outputData.dimInfo[0][3],
-									CV_32F, outputData.data[0]);
+				number_of_detections = static_cast<int>(*reinterpret_cast<float *>(outputData.data[0]));
+				cv::Mat cvOutputData(number_of_detections, outputData.dimInfo[0][3], CV_32F, outputData.data[0]);
 
 				// boxes
 				cv::Mat cvLeft = cvOutputData.col(3).clone();
 				cv::Mat cvTop = cvOutputData.col(4).clone();
 				cv::Mat cvRight = cvOutputData.col(5).clone();
 				cv::Mat cvBottom = cvOutputData.col(6).clone();
-
 				cv::Mat cvBoxElems[] = { cvTop, cvLeft, cvBottom, cvRight };
 				cv::hconcat(cvBoxElems, 4, cvBoxes);
 
@@ -1483,49 +1471,41 @@ namespace inference
 				boxes = cvBoxes.ptr<float>(0);
 				classes = cvClasses.ptr<float>(0);
 				scores = cvScores.ptr<float>(0);
-
 			} else {
 				boxes = reinterpret_cast<float *>(outputData.data[0]);
 				classes = reinterpret_cast<float *>(outputData.data[1]);
 				scores = reinterpret_cast<float *>(outputData.data[2]);
-				number_of_detections = static_cast<int>(
-						*reinterpret_cast<float *>(outputData.data[3]));
+				number_of_detections = static_cast<int>(*reinterpret_cast<float *>(outputData.data[3]));
 			}
 
-			int left, top, right, bottom;
-			cv::Rect loc;
+			results->number_of_faces = 0;
 
-			FaceDetectionResults results;
-			results.number_of_faces = 0;
 			for (int idx = 0; idx < number_of_detections; ++idx) {
 				if (scores[idx] < mThreshold)
 					continue;
 
-				left = static_cast<int>(boxes[idx * 4 + 1] * mSourceSize.width);
-				top = static_cast<int>(boxes[idx * 4 + 0] * mSourceSize.height);
-				right = static_cast<int>(boxes[idx * 4 + 3] * mSourceSize.width);
-				bottom = static_cast<int>(boxes[idx * 4 + 2] * mSourceSize.height);
+				int left = static_cast<int>(boxes[idx * 4 + 1] * mSourceSize.width);
+				int top = static_cast<int>(boxes[idx * 4 + 0] * mSourceSize.height);
+				int right = static_cast<int>(boxes[idx * 4 + 3] * mSourceSize.width);
+				int bottom = static_cast<int>(boxes[idx * 4 + 2] * mSourceSize.height);
+				cv::Rect loc;
 
 				loc.x = left;
 				loc.y = top;
 				loc.width = right - left + 1;
 				loc.height = bottom - top + 1;
-
-				results.confidences.push_back(scores[idx]);
-				results.locations.push_back(loc);
-				results.number_of_faces++;
+				results->confidences.push_back(scores[idx]);
+				results->locations.push_back(loc);
+				results->number_of_faces++;
 
 				LOGI("confidence:%f", scores[idx]);
 				LOGI("class: %f", classes[idx]);
 				LOGI("left:%f, top:%f, right:%f, bottom:%f", boxes[idx * 4 + 1],
 					boxes[idx * 4 + 0], boxes[idx * 4 + 3], boxes[idx * 4 + 2]);
-				LOGI("left:%d, top:%d, right:%d, bottom:%d", left, top, right,
-					bottom);
+				LOGI("left:%d, top:%d, right:%d, bottom:%d", left, top, right, bottom);
 			}
 
-			*detectionResults = results;
-			LOGE("Inference: GetFaceDetectionResults: %d\n",
-				results.number_of_faces);
+			LOGE("Inference: GetFaceDetectionResults: %d\n", results->number_of_faces);
 		}
 
 		return MEDIA_VISION_ERROR_NONE;
@@ -1699,12 +1679,11 @@ namespace inference
 			}
 
 			poseDecoder.decode(inputW, inputH, thresRadius);
-
-			int part = 0;
 			poseResult->number_of_poses = poseDecoder.getNumberOfPose();
+
 			for (int poseIndex = 0; poseIndex < poseResult->number_of_poses; ++poseIndex) {
 				for (int landmarkIndex = 0; landmarkIndex < poseResult->number_of_landmarks_per_pose; ++ landmarkIndex) {
-					part = landmarkIndex;
+					int part = landmarkIndex;
 					if (!mUserListName.empty()) {
 						part = std::stoi(mUserListName[landmarkIndex]) - 1;
 						if (part < 0) {
@@ -1773,6 +1752,7 @@ namespace inference
 
 					loc2f.x = (static_cast<float>(loc.x) / ratioX);
 					loc2f.y = (static_cast<float>(loc.y) / ratioY);
+
 					LOGI("landmarkIndex[%2d] - mapping to [%2d]: x[%.3f], y[%.3f], score[%.3f]",
 							landmarkIndex, part, loc2f.x, loc2f.y, score);
 
diff --git a/mv_machine_learning/mv_inference/inference/src/mv_inference_open.cpp b/mv_machine_learning/mv_inference/inference/src/mv_inference_open.cpp
index c945ac4..a9fd490 100644
--- a/mv_machine_learning/mv_inference/inference/src/mv_inference_open.cpp
+++ b/mv_machine_learning/mv_inference/inference/src/mv_inference_open.cpp
@@ -638,7 +638,7 @@ int mv_inference_image_classify_open(
 
 	ImageClassificationResults classificationResults;
 
-	ret = pInfer->GetClassficationResults(classificationResults);
+	ret = pInfer->GetClassficationResults(&classificationResults);
 	if (ret != MEDIA_VISION_ERROR_NONE) {
 		LOGE("Fail to get inference results");
 		return ret;
@@ -699,14 +699,14 @@ int mv_inference_object_detect_open(mv_source_h source, mv_inference_h infer,
 		locations.resize(numberOfOutputs);
 	}
 
-	for (int n = 0; n < numberOfOutputs; ++n) {
-		LOGE("names: %s", objectDetectionResults.names[n].c_str());
-		names[n] = objectDetectionResults.names[n].c_str();
+	for (int output_idx = 0; output_idx < numberOfOutputs; ++output_idx) {
+		LOGE("names: %s", objectDetectionResults.names[output_idx].c_str());
+		names[output_idx] = objectDetectionResults.names[output_idx].c_str();
 
-		locations[n].point.x = objectDetectionResults.locations[n].x;
-		locations[n].point.y = objectDetectionResults.locations[n].y;
-		locations[n].width = objectDetectionResults.locations[n].width;
-		locations[n].height = objectDetectionResults.locations[n].height;
+		locations[output_idx].point.x = objectDetectionResults.locations[output_idx].x;
+		locations[output_idx].point.y = objectDetectionResults.locations[output_idx].y;
+		locations[output_idx].width = objectDetectionResults.locations[output_idx].width;
+		locations[output_idx].height = objectDetectionResults.locations[output_idx].height;
 	}
 
 	int *indices = objectDetectionResults.indices.data();
@@ -745,11 +745,11 @@ int mv_inference_face_detect_open(mv_source_h source, mv_inference_h infer,
 	int numberOfOutputs = faceDetectionResults.number_of_faces;
 	std::vector<mv_rectangle_s> locations(numberOfOutputs);
 
-	for (int n = 0; n < numberOfOutputs; ++n) {
-		locations[n].point.x = faceDetectionResults.locations[n].x;
-		locations[n].point.y = faceDetectionResults.locations[n].y;
-		locations[n].width = faceDetectionResults.locations[n].width;
-		locations[n].height = faceDetectionResults.locations[n].height;
+	for (int output_idx = 0; output_idx < numberOfOutputs; ++output_idx) {
+		locations[output_idx].point.x = faceDetectionResults.locations[output_idx].x;
+		locations[output_idx].point.y = faceDetectionResults.locations[output_idx].y;
+		locations[output_idx].width = faceDetectionResults.locations[output_idx].width;
+		locations[output_idx].height = faceDetectionResults.locations[output_idx].height;
 	}
 
 	float *confidences = faceDetectionResults.confidences.data();
-- 
2.7.4


From 041a814164e95cb1adbd18485c66ed3be2103dfc Mon Sep 17 00:00:00 2001
From: Inki Dae <inki.dae@samsung.com>
Date: Thu, 14 Oct 2021 15:43:59 +0900
Subject: [PATCH 05/16] mv_machine_learning: code refactoring to OutputMetadata
 module

OutputMetadata.h and cpp files contain many classes and relevant
code so it makes maintenance too hard.

The biggest change of this refactoring is to separate many classes
in OutputMetadata file into each one. And also it changes unnecessary
class type to struct including several cleanups such as code sliding,
renaming and dropping unnessary code.

Change-Id: I0ce677d333ce3a3e7212f7d26a20b6cf77bc7a9a
Signed-off-by: Inki Dae <inki.dae@samsung.com>
---
 .../mv_inference/inference/include/BoxInfo.h       | 131 +++++++
 .../mv_inference/inference/include/DecodeInfo.h    | 156 ++++++++
 .../mv_inference/inference/include/DimInfo.h       |  52 +++
 .../mv_inference/inference/include/DispVec.h       |  93 +++++
 .../mv_inference/inference/include/Edge.h          |  68 ++++
 .../mv_inference/inference/include/InputMetadata.h |   3 -
 .../mv_inference/inference/include/Landmark.h      | 127 ++++++-
 .../mv_inference/inference/include/OffsetVec.h     |  84 +++++
 .../inference/include/OutputMetadata.h             | 329 +---------------
 .../mv_inference/inference/include/ScoreInfo.h     | 129 +++++++
 .../mv_inference/inference/include/Utils.h         |  46 +++
 .../mv_inference/inference/src/Inference.cpp       |   4 +-
 .../mv_inference/inference/src/InputMetadata.cpp   |  15 +-
 .../mv_inference/inference/src/OutputMetadata.cpp  | 419 ++-------------------
 packaging/capi-media-vision.spec                   |   2 +-
 15 files changed, 938 insertions(+), 720 deletions(-)
 create mode 100644 mv_machine_learning/mv_inference/inference/include/BoxInfo.h
 create mode 100644 mv_machine_learning/mv_inference/inference/include/DecodeInfo.h
 create mode 100644 mv_machine_learning/mv_inference/inference/include/DimInfo.h
 create mode 100644 mv_machine_learning/mv_inference/inference/include/DispVec.h
 create mode 100644 mv_machine_learning/mv_inference/inference/include/Edge.h
 create mode 100644 mv_machine_learning/mv_inference/inference/include/OffsetVec.h
 create mode 100644 mv_machine_learning/mv_inference/inference/include/ScoreInfo.h
 create mode 100644 mv_machine_learning/mv_inference/inference/include/Utils.h

diff --git a/mv_machine_learning/mv_inference/inference/include/BoxInfo.h b/mv_machine_learning/mv_inference/inference/include/BoxInfo.h
new file mode 100644
index 0000000..ceffaa0
--- /dev/null
+++ b/mv_machine_learning/mv_inference/inference/include/BoxInfo.h
@@ -0,0 +1,131 @@
+/**
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __BOX_INFO_H__
+#define __BOX_INFO_H__
+
+#include <string>
+#include <vector>
+#include <map>
+#include <memory>
+
+#include <mv_inference_type.h>
+#include <inference_engine_type.h>
+
+namespace mediavision
+{
+namespace inference
+{
+namespace box
+{
+	class BoxInfo
+	{
+	private:
+		std::string name;
+		DimInfo dimInfo;
+		inference_box_type_e type; // 0:L-T-R-B, 1: Cx-Cy-W-H
+		std::vector<int> order; // Order based on box type
+		inference_box_coordinate_type_e coordinate; // 0: ratio, 1: pixel
+		inference_box_decoding_type_e decodingType; // 0: bypass , 1:ssd with anchor
+		DecodeInfo decodingInfo;
+
+		std::map<std::string, inference_box_type_e> supportedBoxTypes;
+		std::map<std::string, inference_box_coordinate_type_e> supportedBoxCoordinateTypes;
+		std::map<std::string, inference_box_decoding_type_e> supportedBoxDecodingTypes;
+
+	public:
+		BoxInfo() :
+				name(),
+				dimInfo(),
+				type(INFERENCE_BOX_TYPE_ORIGIN_LEFTTOP),
+				order(),
+				coordinate(INFERENCE_BOX_COORDINATE_TYPE_RATIO),
+				decodingType(INFERENCE_BOX_DECODING_TYPE_BYPASS),
+				decodingInfo()
+
+		{
+			supportedBoxTypes.insert({"ORIGIN_LEFTTOP", INFERENCE_BOX_TYPE_ORIGIN_LEFTTOP});
+			supportedBoxTypes.insert({"ORIGIN_CENTER", INFERENCE_BOX_TYPE_ORIGIN_CENTER});
+
+			supportedBoxCoordinateTypes.insert({"RATIO", INFERENCE_BOX_COORDINATE_TYPE_RATIO});
+			supportedBoxCoordinateTypes.insert({"PIXEL", INFERENCE_BOX_COORDINATE_TYPE_PIXEL});
+
+			supportedBoxDecodingTypes.insert({"BYPASS", INFERENCE_BOX_DECODING_TYPE_BYPASS});
+			supportedBoxDecodingTypes.insert({"SSD_ANCHOR", INFERENCE_BOX_DECODING_TYPE_SSD_ANCHOR});
+		}
+
+		~BoxInfo() = default;
+
+		std::string GetName() { return name; }
+		DimInfo GetDimInfo() { return dimInfo; }
+		inference_box_type_e GetType() { return type; }
+		inference_box_decoding_type_e GetDecodingType() { return decodingType; }
+		std::vector<int> GetOrder() { return order; }
+		int GetCoordinate() { return coordinate; }
+		DecodeInfo& GetDecodeInfo() {return decodingInfo; }
+
+		int ParseBox(JsonObject *root)
+		{
+			LOGI("ENTER");
+
+			JsonArray * rootArray = json_object_get_array_member(root, "box");
+			unsigned int elements = json_array_get_length(rootArray);
+
+			for (unsigned int elem_idx = 0; elem_idx < elements; ++elem_idx) {
+				JsonNode *pNode = json_array_get_element(rootArray, elem_idx);
+				JsonObject *pObject = json_node_get_object(pNode);
+
+				name = json_object_get_string_member(pObject,"name");
+				LOGI("layer: %s", name.c_str());
+
+				JsonArray * array = json_object_get_array_member(pObject, "index");
+				unsigned int elements2 = json_array_get_length(array);
+
+				LOGI("range dim: size[%u]", elements2);
+
+				for (unsigned int elem2_idx = 0; elem2_idx < elements2; ++elem2_idx)
+					if (static_cast<int>(json_array_get_int_element(array, elem2_idx)) == 1)
+						dimInfo.SetValidIndex(elem2_idx);
+
+				try {
+					type = GetSupportedType(pObject, "box_type", supportedBoxTypes);
+					coordinate = GetSupportedType(pObject, "box_coordinate", supportedBoxCoordinateTypes);
+					decodingType = GetSupportedType(pObject, "decoding_type", supportedBoxDecodingTypes);
+				} catch (const std::exception& e) {
+					LOGE("Invalid %s", e.what());
+					return MEDIA_VISION_ERROR_INVALID_OPERATION;
+				}
+
+				array = json_object_get_array_member(pObject, "box_order");
+				elements2 = json_array_get_length(array);
+				LOGI("box order should have 4 elements and it has [%u]", elements2);
+
+				for (unsigned int elem2_idx = 0; elem2_idx < elements2; ++elem2_idx) {
+					auto val = static_cast<int>(json_array_get_int_element(array, elem2_idx));
+					order.push_back(val);
+					LOGI("%d", val);
+				}
+			}
+
+			LOGI("LEAVE");
+			return MEDIA_VISION_ERROR_NONE;
+		}
+	};
+} /* box */
+} /* Inference */
+} /* MediaVision */
+
+#endif
\ No newline at end of file
diff --git a/mv_machine_learning/mv_inference/inference/include/DecodeInfo.h b/mv_machine_learning/mv_inference/inference/include/DecodeInfo.h
new file mode 100644
index 0000000..a872c3a
--- /dev/null
+++ b/mv_machine_learning/mv_inference/inference/include/DecodeInfo.h
@@ -0,0 +1,156 @@
+/**
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __DECODE_INFO_H__
+#define __DECODE_INFO_H__
+
+#include <string>
+#include <vector>
+#include <map>
+#include <memory>
+
+#include <mv_inference_type.h>
+#include <opencv2/core.hpp>
+
+namespace mediavision
+{
+namespace inference
+{
+namespace box
+{
+	struct AnchorParam {
+		int mode; /**< 0: generate anchor, 1:load pre-anchor*/
+		int numLayers;
+		float minScale;
+		float maxScale;
+		int inputSizeHeight;
+		int inputSizeWidth;
+		float anchorOffsetX;
+		float anchorOffsetY;
+		std::vector<int> strides;
+		std::vector<float> aspectRatios;
+		bool isReduceBoxedInLowestLayer;
+		float interpolatedScaleAspectRatio;
+		bool isFixedAnchorSize;
+		bool isExponentialBoxScale;
+		float xScale;
+		float yScale;
+		float wScale;
+		float hScale;
+	};
+
+	struct NMSParam {
+		inference_box_nms_type_e mode; /**< 0: standard */
+		float iouThreshold;
+		std::map<std::string, inference_box_nms_type_e> supportedBoxNmsTypes;
+	};
+
+	struct RotateParam {
+		int startPointIndex;
+		int endPointIndex;
+		cv::Point2f startPoint;
+		cv::Point2f endPoint;
+		float baseAngle;
+	};
+
+	struct RoiOptionParam {
+		int startPointIndex;
+		int endPointIndex;
+		int centerPointIndex;
+		cv::Point2f centerPoint;
+		float shiftX;
+		float shiftY;
+		float scaleX;
+		float scaleY;
+		int mode;
+	};
+
+	class DecodeInfo {
+	private:
+		AnchorParam anchorParam;
+		std::vector<cv::Rect2f> anchorBoxes;
+		NMSParam nmsParam;
+		RotateParam rotParam;
+		RoiOptionParam roiOptParam;
+
+	public:
+		DecodeInfo() {
+			nmsParam.mode = INFERENCE_BOX_NMS_TYPE_NONE;
+			nmsParam.iouThreshold = 0.2f;
+			nmsParam.supportedBoxNmsTypes.insert({"STANDARD", INFERENCE_BOX_NMS_TYPE_STANDARD});
+
+			rotParam.startPointIndex = -1;
+			rotParam.endPointIndex = -1;
+			rotParam.startPoint = cv::Point2f(0.f,0.f);
+			rotParam.endPoint = cv::Point2f(0.f,0.f);
+			rotParam.baseAngle = 0.f;
+
+			roiOptParam.startPointIndex = -1;
+			roiOptParam.endPointIndex = -1;
+			roiOptParam.centerPointIndex = -1;
+			roiOptParam.centerPoint = cv::Point2f(0.f, 0.f);
+			roiOptParam.shiftX = 0.f;
+			roiOptParam.shiftY = 0.f;
+			roiOptParam.scaleX = 1.f;
+			roiOptParam.scaleY = 1.f;
+			roiOptParam.mode = -1;
+		}
+
+		~DecodeInfo() = default;
+
+		std::vector<cv::Rect2f>& GetAnchorBoxAll();
+		bool IsAnchorBoxEmpty();
+		void AddAnchorBox(cv::Rect2f& ahcnor);
+		void ClearAnchorBox();
+
+		// Anchor param
+		int ParseAnchorParam(JsonObject *root);
+		int GenerateAnchor();
+		bool IsFixedAnchorSize();
+		bool IsExponentialBoxScale();
+		float GetAnchorXscale();
+		float GetAnchorYscale();
+		float GetAnchorWscale();
+		float GetAnchorHscale();
+		float CalculateScale(float min, float max, int index, int maxStride);
+
+		// Nms param
+		int ParseNms(JsonObject *root);
+		int GetNmsMode();
+		float GetNmsIouThreshold();
+
+		// Rotate param
+		int ParseRotate(JsonObject *root);
+		int GetRotStartPointIndex();
+		int GetRotEndPointIndex();
+		float GetBaseAngle();
+
+		// Roi option param
+		int ParseRoiOption(JsonObject *root);
+		int GetRoiMode();
+		int GetRoiCenterPointIndex();
+		int GetRoiStartPointIndex();
+		int GetRoiEndPointIndex();
+		float GetShiftX();
+		float GetShiftY();
+		float GetScaleX();
+		float GetScaleY();
+	};
+} /* box */
+} /* Inference */
+} /* MediaVision */
+
+#endif
\ No newline at end of file
diff --git a/mv_machine_learning/mv_inference/inference/include/DimInfo.h b/mv_machine_learning/mv_inference/inference/include/DimInfo.h
new file mode 100644
index 0000000..d061122
--- /dev/null
+++ b/mv_machine_learning/mv_inference/inference/include/DimInfo.h
@@ -0,0 +1,52 @@
+/**
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __DIM_INFO_H__
+#define __DIM_INFO_H__
+
+#include <vector>
+
+namespace mediavision
+{
+namespace inference
+{
+	class DimInfo
+	{
+	private:
+		std::vector<int> dims;
+
+	public:
+		std::vector<int> GetValidIndexAll() const
+		{
+			LOGI("ENTER");
+
+			LOGI("LEAVE");
+			return dims;
+		}
+
+		void SetValidIndex(int index)
+		{
+			LOGI("ENTER");
+
+			dims.push_back(index);
+
+			LOGI("LEAVE");
+		}
+	};
+} /* Inference */
+} /* MediaVision */
+
+#endif
\ No newline at end of file
diff --git a/mv_machine_learning/mv_inference/inference/include/DispVec.h b/mv_machine_learning/mv_inference/inference/include/DispVec.h
new file mode 100644
index 0000000..f43dcf5
--- /dev/null
+++ b/mv_machine_learning/mv_inference/inference/include/DispVec.h
@@ -0,0 +1,93 @@
+/**
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __DISP_VEC_H__
+#define __DISP_VEC_H__
+
+#include <string>
+#include <vector>
+#include <map>
+#include <memory>
+
+#include <mv_inference_type.h>
+#include "DimInfo.h"
+#include "Utils.h"
+
+namespace mediavision
+{
+namespace inference
+{
+	class DispVec
+	{
+	private:
+		std::string name;
+		DimInfo dimInfo;
+		inference_displacement_type_e type;
+		int shapeType;
+		std::map<std::string, inference_displacement_type_e> supportedDispTypes;
+
+	public:
+		DispVec() :
+			name(),
+			dimInfo(),
+			type(INFERENCE_DISPLACEMENT_TYPE_FORWARD),
+			shapeType(INFERENCE_TENSOR_SHAPE_NCHW)
+		{
+			supportedDispTypes.insert({"FORWARD", INFERENCE_DISPLACEMENT_TYPE_FORWARD});
+			supportedDispTypes.insert({"BACKWARD", INFERENCE_DISPLACEMENT_TYPE_BACKWARD});
+		}
+
+		~DispVec() = default;
+
+		std::string GetName() { return name; }
+		DimInfo GetDimInfo() { return dimInfo; }
+		inference_displacement_type_e GetType() { return type; }
+		int GetShapeType() { return shapeType; }
+
+		int ParseDisplacement(JsonObject *root, const std::map<std::string, inference_tensor_shape_type_e>& supportedShapeType)
+		{
+			LOGI("ENTER");
+
+			name = static_cast<const char*>(json_object_get_string_member(root,"name"));
+			LOGI("layer: %s", name.c_str());
+
+			JsonArray * array = json_object_get_array_member(root, "index");
+			unsigned int elements2 = json_array_get_length(array);
+
+			LOGI("range dim: size[%u]", elements2);
+
+			for (unsigned int elem2 = 0; elem2 < elements2; ++elem2) {
+				if(static_cast<int>(json_array_get_int_element(array, elem2)) == 1)
+					dimInfo.SetValidIndex(elem2);
+			}
+
+			try {
+				shapeType = GetSupportedType(root, "shape_type", supportedShapeType);
+				type = GetSupportedType(root, "type", supportedDispTypes);
+			} catch (const std::exception& e) {
+				LOGE("Invalid %s", e.what());
+				return MEDIA_VISION_ERROR_INVALID_OPERATION;
+			}
+
+			LOGI("LEAVE");
+			return MEDIA_VISION_ERROR_NONE;
+		}
+
+	};
+} /* Inference */
+} /* MediaVision */
+
+#endif
\ No newline at end of file
diff --git a/mv_machine_learning/mv_inference/inference/include/Edge.h b/mv_machine_learning/mv_inference/inference/include/Edge.h
new file mode 100644
index 0000000..80c0216
--- /dev/null
+++ b/mv_machine_learning/mv_inference/inference/include/Edge.h
@@ -0,0 +1,68 @@
+/**
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __EDGE_H__
+#define __EDGE_H__
+
+#include <vector>
+#include <mv_inference_type.h>
+#include <json-glib/json-glib.h>
+
+namespace mediavision
+{
+namespace inference
+{
+	class Edge
+	{
+	private:
+		std::vector<std::pair<int, int>> edges;
+
+	public:
+		Edge() = default;
+
+		~Edge() = default;
+
+		int ParseEdge(JsonObject *root)
+		{
+			LOGI("ENTER");
+
+			JsonArray * rootArray = json_object_get_array_member(root, "edgemap");
+			unsigned int elements = json_array_get_length(rootArray);
+
+			// TODO: handling error
+			int pEdgeNode, cEdgeNode;
+
+			for (unsigned int elem = 0; elem < elements; ++elem) {
+				JsonNode *pNode = json_array_get_element(rootArray, elem);
+				JsonObject *pObject = json_node_get_object(pNode);
+
+				pEdgeNode = json_object_get_int_member(pObject, "parent");
+				cEdgeNode = json_object_get_int_member(pObject, "child");
+
+				edges.push_back(std::make_pair(pEdgeNode, cEdgeNode));
+				LOGI("%ud: parent - child: %d - %d", elem, pEdgeNode, cEdgeNode);
+			}
+
+			LOGI("LEAVE");
+			return MEDIA_VISION_ERROR_NONE;
+		}
+
+		std::vector<std::pair<int, int>>& GetEdgesAll() { return edges; }
+	};
+} /* Inference */
+} /* MediaVision */
+
+#endif
\ No newline at end of file
diff --git a/mv_machine_learning/mv_inference/inference/include/InputMetadata.h b/mv_machine_learning/mv_inference/inference/include/InputMetadata.h
index bdd12c2..c2bf94d 100644
--- a/mv_machine_learning/mv_inference/inference/include/InputMetadata.h
+++ b/mv_machine_learning/mv_inference/inference/include/InputMetadata.h
@@ -121,9 +121,6 @@ namespace inference
 		std::map<std::string, LayerInfo> layer;
 		std::map<std::string, Options> option;
 
-		template <typename T>
-		static T GetSupportedType(JsonObject* root, std::string typeName,
-								std::map<std::string, T>& supportedTypes);
 		int GetTensorInfo(JsonObject* root);
 		int GetPreProcess(JsonObject* root);
 
diff --git a/mv_machine_learning/mv_inference/inference/include/Landmark.h b/mv_machine_learning/mv_inference/inference/include/Landmark.h
index 63ccf60..2fe6c9a 100644
--- a/mv_machine_learning/mv_inference/inference/include/Landmark.h
+++ b/mv_machine_learning/mv_inference/inference/include/Landmark.h
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#ifndef __MEDIA_VISION_LANDMARK_H__
-#define __MEDIA_VISION_LANDMARK_H__
+#ifndef __LANDMARK_H__
+#define __LANDMARK_H__
 
 #include <string>
 #include <vector>
@@ -47,7 +47,128 @@ namespace inference
 		float score;
 	} LandmarkResults;
 
+	typedef struct _HeatMapInfo {
+		int wIdx;
+		int hIdx;
+		int cIdx;
+		float nmsRadius;
+		inference_tensor_shape_type_e shapeType;
+	} HeatMapInfo;
+
+	class Landmark
+	{
+	private:
+		std::string name;
+		DimInfo dimInfo;
+		inference_landmark_type_e type; /**< 0: 2D_SINGLE, 1: 2D_MULTI, 2: 3D_SINGLE */
+		int offset;
+		inference_landmark_coorindate_type_e coordinate; /**< 0: RATIO, 1: PIXEL */
+		inference_landmark_decoding_type_e decodingType; /**< 0: decoding  unnecessary,
+															1: decoding heatmap,
+															2: decoding heatmap with refinement */
+		HeatMapInfo heatMapInfo;
+
+		std::map<std::string, inference_landmark_type_e> supportedLandmarkTypes;
+		std::map<std::string, inference_landmark_coorindate_type_e> supportedLandmarkCoordinateTypes;
+		std::map<std::string, inference_landmark_decoding_type_e> supportedLandmarkDecodingTypes;
+
+	public:
+
+		Landmark() :
+			name(),
+			dimInfo(),
+			type(INFERENCE_LANDMARK_TYPE_2D_SINGLE),
+			offset(),
+			coordinate(INFERENCE_LANDMARK_COORDINATE_TYPE_RATIO),
+			decodingType(INFERENCE_LANDMARK_DECODING_TYPE_BYPASS),
+			heatMapInfo()
+
+		{
+			supportedLandmarkTypes.insert({"2D_SINGLE", INFERENCE_LANDMARK_TYPE_2D_SINGLE});
+			supportedLandmarkTypes.insert({"2D_MULTI",  INFERENCE_LANDMARK_TYPE_2D_MULTI});
+			supportedLandmarkTypes.insert({"3D_SINGLE", INFERENCE_LANDMARK_TYPE_3D_SINGLE});
+
+			supportedLandmarkCoordinateTypes.insert({"RATIO", INFERENCE_LANDMARK_COORDINATE_TYPE_RATIO});
+			supportedLandmarkCoordinateTypes.insert({"PIXEL", INFERENCE_LANDMARK_COORDINATE_TYPE_PIXEL});
+
+			supportedLandmarkDecodingTypes.insert({"BYPASS", INFERENCE_LANDMARK_DECODING_TYPE_BYPASS});
+			supportedLandmarkDecodingTypes.insert({"HEATMAP", INFERENCE_LANDMARK_DECODING_TYPE_HEATMAP});
+			supportedLandmarkDecodingTypes.insert({"HEATMAP_REFINE", INFERENCE_LANDMARK_DECODING_TYPE_HEATMAP_REFINE});
+		}
+
+		~Landmark() = default;
+
+		int ParseLandmark(JsonObject *root)
+		{
+			// box
+			JsonArray * rootArray = json_object_get_array_member(root, "landmark");
+			unsigned int elements = json_array_get_length(rootArray);
+
+			// TODO: handling error
+			for (unsigned int elem = 0; elem < elements; ++elem) {
+
+				JsonNode *pNode = json_array_get_element(rootArray, elem);
+				JsonObject *pObject = json_node_get_object(pNode);
+
+				name =
+					static_cast<const char*>(json_object_get_string_member(pObject,"name"));
+				LOGI("layer: %s", name.c_str());
+
+				JsonArray * array = json_object_get_array_member(pObject, "index");
+				unsigned int elements2 = json_array_get_length(array);
+				LOGI("range dim: size[%u]", elements2);
+				for (unsigned int elem2 = 0; elem2 < elements2; ++elem2) {
+					if (static_cast<int>(json_array_get_int_element(array, elem2)) == 1)
+						dimInfo.SetValidIndex(elem2);
+				}
+
+				try {
+					type = GetSupportedType(pObject, "landmark_type", supportedLandmarkTypes);
+					coordinate = GetSupportedType(pObject, "landmark_coordinate", supportedLandmarkCoordinateTypes);
+					decodingType = GetSupportedType(pObject, "decoding_type", supportedLandmarkDecodingTypes);
+				} catch (const std::exception& e) {
+					LOGE("Invalid %s", e.what());
+					return MEDIA_VISION_ERROR_INVALID_OPERATION;
+				}
+
+				offset = static_cast<int>(json_object_get_int_member(pObject, "landmark_offset"));
+				LOGI("landmark offset: %d", offset);
+			}
+
+			LOGI("LEAVE");
+			return MEDIA_VISION_ERROR_NONE;
+		}
+
+		inference_landmark_type_e GetType()
+		{
+			return type;
+		}
+
+		int GetOffset()
+		{
+			return offset;
+		}
+
+		inference_landmark_coorindate_type_e GetCoordinate()
+		{
+			return coordinate;
+		}
+
+		inference_landmark_decoding_type_e GetDecodingType()
+		{
+			return decodingType;
+		}
+
+		HeatMapInfo& GetHeatMapInfo()
+		{
+			return heatMapInfo;
+		}
+
+		std::string GetName() { return name; }
+
+		DimInfo GetDimInfo() { return dimInfo; }
+	};
 } /* Inference */
 } /* MediaVision */
 
-#endif /* __MEDIA_VISION_LANDMARK_H__ */
+#endif /* __LANDMARK_H__ */
diff --git a/mv_machine_learning/mv_inference/inference/include/OffsetVec.h b/mv_machine_learning/mv_inference/inference/include/OffsetVec.h
new file mode 100644
index 0000000..c5fe30b
--- /dev/null
+++ b/mv_machine_learning/mv_inference/inference/include/OffsetVec.h
@@ -0,0 +1,84 @@
+/**
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OFFSET_VEC_H__
+#define __OFFSET_VEC_H__
+
+#include <string>
+#include <vector>
+#include <memory>
+
+#include <mv_inference_type.h>
+#include "DimInfo.h"
+#include "Utils.h"
+
+namespace mediavision
+{
+namespace inference
+{
+	class OffsetVec
+	{
+	private:
+		std::string name;
+		DimInfo dimInfo;
+		int shapeType;
+	public:
+		OffsetVec() : name(), dimInfo(), shapeType() { }
+		~OffsetVec() = default;
+		std::string GetName() { return name; }
+		DimInfo GetDimInfo() { return dimInfo; }
+		int GetShapeType() { return shapeType; }
+
+		int ParseOffset(JsonObject *root, const std::map<std::string, inference_tensor_shape_type_e>& supportedShapeType)
+		{
+			JsonArray * rootArray = json_object_get_array_member(root, "offset");
+			unsigned int elements = json_array_get_length(rootArray);
+
+			// TODO: handling error
+			for (unsigned int elem = 0; elem < elements; ++elem) {
+
+				JsonNode *pNode = json_array_get_element(rootArray, elem);
+				JsonObject *pObject = json_node_get_object(pNode);
+
+				name =
+					static_cast<const char*>(json_object_get_string_member(pObject,"name"));
+				LOGI("layer: %s", name.c_str());
+
+				JsonArray * array = json_object_get_array_member(pObject, "index");
+				unsigned int elements2 = json_array_get_length(array);
+				LOGI("range dim: size[%u]", elements2);
+				for (unsigned int elem2 = 0; elem2 < elements2; ++elem2) {
+					if (static_cast<int>(json_array_get_int_element(array, elem2)) == 1)
+						dimInfo.SetValidIndex(elem2);
+				}
+
+				try {
+					shapeType = GetSupportedType(pObject, "shape_type", supportedShapeType);
+				} catch (const std::exception& e) {
+					LOGE("Invalid %s", e.what());
+					return MEDIA_VISION_ERROR_INVALID_OPERATION;
+				}
+
+			}
+
+			LOGI("LEAVE");
+			return MEDIA_VISION_ERROR_NONE;
+		}
+	};
+} /* Inference */
+} /* MediaVision */
+
+#endif
\ No newline at end of file
diff --git a/mv_machine_learning/mv_inference/inference/include/OutputMetadata.h b/mv_machine_learning/mv_inference/inference/include/OutputMetadata.h
index d223726..6724526 100644
--- a/mv_machine_learning/mv_inference/inference/include/OutputMetadata.h
+++ b/mv_machine_learning/mv_inference/inference/include/OutputMetadata.h
@@ -27,6 +27,14 @@
 #include <json-glib/json-glib.h>
 #include <opencv2/core.hpp>
 #include "OutputMetadataTypes.h"
+#include "DecodeInfo.h"
+#include "Edge.h"
+#include "DispVec.h"
+#include "DimInfo.h"
+#include "OffsetVec.h"
+#include "Landmark.h"
+#include "BoxInfo.h"
+#include "ScoreInfo.h"
 
 /**
  * @file OutputMetadata.h
@@ -38,316 +46,16 @@ namespace mediavision
 {
 namespace inference
 {
-	class DimInfo
+	struct Label
 	{
-	private:
-		std::vector<int> dims;
-
-	public:
-		std::vector<int> GetValidIndexAll() const;
-		void SetValidIndex(int index);
-	};
-
-	class DeQuantization
-	{
-	private:
-		double scale;
-		double zeropoint;
-
-	public:
-		DeQuantization(double s, double z) : scale(s), zeropoint(z) {};
-		~DeQuantization() = default;
-
-		double GetScale() { return scale; }
-		double GetZeroPoint() { return zeropoint; }
-	};
-
-	class ScoreInfo
-	{
-	private:
-		std::string name;
-		DimInfo dimInfo;
-		double threshold;
-		int topNumber;
-		inference_score_type_e type;
-		std::shared_ptr<DeQuantization> deQuantization;
-		std::map<std::string, inference_score_type_e> supportedScoreTypes;
-
-	public:
-		ScoreInfo();
-		~ScoreInfo() = default;
-
-		std::string GetName() { return name; }
-		DimInfo GetDimInfo() { return dimInfo; }
-		double GetThresHold() { return threshold; }
-		inference_score_type_e GetType() { return type; }
-		int GetTopNumber() { return topNumber; }
-		std::shared_ptr<DeQuantization> GetDeQuant() { return deQuantization; }
-		int ParseScore(JsonObject *root);
-	};
-
-	struct AnchorParam {
-		int mode; /**< 0: generate anchor, 1:load pre-anchor*/
-		int numLayers;
-		float minScale;
-		float maxScale;
-		int inputSizeHeight;
-		int inputSizeWidth;
-		float anchorOffsetX;
-		float anchorOffsetY;
-		std::vector<int> strides;
-		std::vector<float> aspectRatios;
-		bool isReduceBoxedInLowestLayer;
-		float interpolatedScaleAspectRatio;
-		bool isFixedAnchorSize;
-		bool isExponentialBoxScale;
-		float xScale;
-		float yScale;
-		float wScale;
-		float hScale;
-	};
-
-	struct NMSParam {
-		inference_box_nms_type_e mode; /**< 0: standard */
-		float iouThreshold;
-		std::map<std::string, inference_box_nms_type_e> supportedBoxNmsTypes;
-	};
-
-	struct RotateParam {
-		int startPointIndex;
-		int endPointIndex;
-		cv::Point2f startPoint;
-		cv::Point2f endPoint;
-		float baseAngle;
-	};
-
-	struct RoiOptionParam {
-		int startPointIndex;
-		int endPointIndex;
-		int centerPointIndex;
-		cv::Point2f centerPoint;
-		float shiftX;
-		float shiftY;
-		float scaleX;
-		float scaleY;
-		int mode;
-	};
-
-	class DecodeInfo {
-	private:
-		AnchorParam anchorParam;
-		std::vector<cv::Rect2f> anchorBoxes;
-		NMSParam nmsParam;
-		RotateParam rotParam;
-		RoiOptionParam roiOptParam;
-
-	public:
-		DecodeInfo() {
-			nmsParam.mode = INFERENCE_BOX_NMS_TYPE_NONE;
-			nmsParam.iouThreshold = 0.2f;
-			nmsParam.supportedBoxNmsTypes.insert({"STANDARD", INFERENCE_BOX_NMS_TYPE_STANDARD});
-
-			rotParam.startPointIndex = -1;
-			rotParam.endPointIndex = -1;
-			rotParam.startPoint = cv::Point2f(0.f,0.f);
-			rotParam.endPoint = cv::Point2f(0.f,0.f);
-			rotParam.baseAngle = 0.f;
-
-			roiOptParam.startPointIndex = -1;
-			roiOptParam.endPointIndex = -1;
-			roiOptParam.centerPointIndex = -1;
-			roiOptParam.centerPoint = cv::Point2f(0.f, 0.f);
-			roiOptParam.shiftX = 0.f;
-			roiOptParam.shiftY = 0.f;
-			roiOptParam.scaleX = 1.f;
-			roiOptParam.scaleY = 1.f;
-			roiOptParam.mode = -1;
-		}
-
-		~DecodeInfo() = default;
-
-		std::vector<cv::Rect2f>& GetAnchorBoxAll();
-		bool IsAnchorBoxEmpty();
-		void AddAnchorBox(cv::Rect2f& ahcnor);
-		void ClearAnchorBox();
-
-		// Anchor param
-		int ParseAnchorParam(JsonObject *root);
-		int GenerateAnchor();
-		bool IsFixedAnchorSize();
-		bool IsExponentialBoxScale();
-		float GetAnchorXscale();
-		float GetAnchorYscale();
-		float GetAnchorWscale();
-		float GetAnchorHscale();
-		float CalculateScale(float min, float max, int index, int maxStride);
-
-		// Nms param
-		int ParseNms(JsonObject *root);
-		int GetNmsMode();
-		float GetNmsIouThreshold();
-
-		// Rotate param
-		int ParseRotate(JsonObject *root);
-		int GetRotStartPointIndex();
-		int GetRotEndPointIndex();
-		float GetBaseAngle();
-
-		// Roi option param
-		int ParseRoiOption(JsonObject *root);
-		int GetRoiMode();
-		int GetRoiCenterPointIndex();
-		int GetRoiStartPointIndex();
-		int GetRoiEndPointIndex();
-		float GetShiftX();
-		float GetShiftY();
-		float GetScaleX();
-		float GetScaleY();
-	};
-
-	class BoxInfo
-	{
-	private:
-		std::string name;
-		DimInfo dimInfo;
-		inference_box_type_e type; // 0:L-T-R-B, 1: Cx-Cy-W-H
-		std::vector<int> order; // Order based on box type
-		inference_box_coordinate_type_e coordinate; // 0: ratio, 1: pixel
-		inference_box_decoding_type_e decodingType; // 0: bypass , 1:ssd with anchor
-		DecodeInfo decodingInfo;
-
-		std::map<std::string, inference_box_type_e> supportedBoxTypes;
-		std::map<std::string, inference_box_coordinate_type_e> supportedBoxCoordinateTypes;
-		std::map<std::string, inference_box_decoding_type_e> supportedBoxDecodingTypes;
-
-	public:
-		BoxInfo();
-		~BoxInfo() = default;
-
-		std::string GetName() { return name; }
-		DimInfo GetDimInfo() { return dimInfo; }
-		inference_box_type_e GetType() { return type; }
-		inference_box_decoding_type_e GetDecodingType() { return decodingType; }
-		std::vector<int> GetOrder() { return order; }
-		int GetCoordinate() { return coordinate; }
-		DecodeInfo& GetDecodeInfo() {return decodingInfo; }
-
-		int ParseBox(JsonObject *root);
-	};
-
-	class Label
-	{
-	private:
-		std::string name;
-		DimInfo dimInfo;
-
-	public:
-		Label() = default;
-		~Label() = default;
-		std::string GetName() { return name; }
-		DimInfo GetDimInfo() { return dimInfo; }
-
-		int ParseLabel(JsonObject *root);
-	};
-
-	class Number
-	{
-	private:
 		std::string name;
 		DimInfo dimInfo;
-
-	public:
-		Number() = default;
-		~Number() = default;
-		std::string GetName() { return name; }
-		DimInfo GetDimInfo() { return dimInfo; }
-
-		int ParseNumber(JsonObject *root);
 	};
 
-	struct HeatMapInfo {
-		int wIdx;
-		int hIdx;
-		int cIdx;
-		float nmsRadius;
-		inference_tensor_shape_type_e shapeType;
-	};
-
-	class Landmark
+	struct Number
 	{
-	private:
 		std::string name;
 		DimInfo dimInfo;
-		inference_landmark_type_e type; /**< 0: 2D_SINGLE, 1: 2D_MULTI, 2: 3D_SINGLE */
-		int offset;
-		inference_landmark_coorindate_type_e coordinate; /**< 0: RATIO, 1: PIXEL */
-		inference_landmark_decoding_type_e decodingType; /**< 0: decoding  unnecessary,
-															1: decoding heatmap,
-															2: decoding heatmap with refinement */
-		HeatMapInfo heatMapInfo;
-
-		std::map<std::string, inference_landmark_type_e> supportedLandmarkTypes;
-		std::map<std::string, inference_landmark_coorindate_type_e> supportedLandmarkCoordinateTypes;
-		std::map<std::string, inference_landmark_decoding_type_e> supportedLandmarkDecodingTypes;
-
-	public:
-		Landmark();
-		~Landmark() = default;
-		std::string GetName() { return name; }
-		DimInfo GetDimInfo() { return dimInfo; }
-		inference_landmark_type_e GetType();
-		int GetOffset();
-		inference_landmark_coorindate_type_e GetCoordinate();
-		inference_landmark_decoding_type_e GetDecodingType();
-		HeatMapInfo& GetHeatMapInfo();
-
-		int ParseLandmark(JsonObject *root);
-	};
-
-	class OffsetVec
-	{
-	private:
-		std::string name;
-		DimInfo dimInfo;
-		int shapeType;
-	public:
-		OffsetVec() = default;
-		~OffsetVec() = default;
-		std::string GetName() { return name; }
-		DimInfo GetDimInfo() { return dimInfo; }
-		int GetShapeType() { return shapeType; }
-
-		int ParseOffset(JsonObject *root);
-	};
-
-	class DispVec
-	{
-	private:
-		std::string name;
-		DimInfo dimInfo;
-		inference_displacement_type_e type;
-		int shapeType;
-		std::map<std::string, inference_displacement_type_e> supportedDispTypes;
-	public:
-		DispVec();
-		~DispVec() = default;
-		std::string GetName() { return name; }
-		DimInfo GetDimInfo() { return dimInfo; }
-		inference_displacement_type_e GetType() { return type; }
-		int GetShapeType() { return shapeType; }
-
-		int ParseDisplacement(JsonObject *root);
-	};
-
-	class Edge
-	{
-	private:
-		std::vector<std::pair<int, int>> edges;
-	public:
-		Edge() = default;
-		~Edge() = default;
-		int ParseEdge(JsonObject *root);
-		std::vector<std::pair<int, int>>& GetEdgesAll() { return edges; }
 	};
 
 	class OutputMetadata
@@ -355,13 +63,14 @@ namespace inference
 	private:
 		bool parsed;
 		ScoreInfo score;
-		BoxInfo box;
+		box::BoxInfo box;
 		Label label;
 		Number number;
 		Landmark landmark;
 		OffsetVec offsetVec;
 		std::vector<DispVec> dispVecs;
 		Edge edgeMap;
+		std::map<std::string, inference_tensor_shape_type_e> mSupportedShapeType;
 
 		int ParseScore(JsonObject *root);
 		int ParseBox(JsonObject *root);
@@ -375,7 +84,6 @@ namespace inference
 		int ParseEdgeMap(JsonObject * root);
 
 	public:
-		static std::map<std::string, inference_tensor_shape_type_e> supportedTensorShapes;
 		/**
 		 * @brief   Creates an OutputMetadata class instance.
 		 *
@@ -405,15 +113,17 @@ namespace inference
 		double GetScoreThreshold() { return score.GetThresHold(); }
 		int GetScoreTopNumber() { return score.GetTopNumber(); }
 		std::shared_ptr<DeQuantization> GetScoreDeQuant() { return score.GetDeQuant(); }
+		double GetScoreDeQuantScale() { return score.GetDeQuantScale(); }
+		double GetScoreDeQuantZeroPoint() { return score.GetDeQuantZeroPoint(); }
 		std::string GetBoxName() { return box.GetName(); }
 		DimInfo GetBoxDimInfo() { return box.GetDimInfo(); }
 		std::vector<int> GetBoxOrder() { return box.GetOrder(); }
-		DecodeInfo& GetBoxDecodeInfo() { return box.GetDecodeInfo(); }
+		box::DecodeInfo& GetBoxDecodeInfo() { return box.GetDecodeInfo(); }
 		inference_box_type_e GetBoxType() { return box.GetType(); }
 		int GetScoreCoordinate() { return box.GetCoordinate(); }
-		std::string GetLabelName() { return label.GetName(); }
-		std::string GetNumberName() { return number.GetName(); }
-		DimInfo GetNumberDimInfo() { return number.GetDimInfo(); }
+		std::string GetLabelName() { return label.name; }
+		std::string GetNumberName() { return number.name; }
+		DimInfo GetNumberDimInfo() { return number.dimInfo; }
 		std::string GetLandmarkName() { return landmark.GetName(); }
 		int GetLandmarkOffset() { return landmark.GetOffset(); }
 		inference_landmark_type_e GetLandmarkType() { return landmark.GetType(); }
@@ -425,9 +135,6 @@ namespace inference
 		inference_box_decoding_type_e GetBoxDecodingType() { return box.GetDecodingType(); }
 		std::vector<DispVec>& GetDispVecAll() { return dispVecs; }
 		std::vector<std::pair<int, int>>& GetEdges() { return edgeMap.GetEdgesAll(); }
-		template <typename T>
-		static T GetSupportedType(JsonObject* root, std::string typeName,
-								std::map<std::string, T>& supportedTypes);
 	};
 } /* Inference */
 } /* MediaVision */
diff --git a/mv_machine_learning/mv_inference/inference/include/ScoreInfo.h b/mv_machine_learning/mv_inference/inference/include/ScoreInfo.h
new file mode 100644
index 0000000..24180d7
--- /dev/null
+++ b/mv_machine_learning/mv_inference/inference/include/ScoreInfo.h
@@ -0,0 +1,129 @@
+/**
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __SCORE_INFO_H__
+#define __SCORE_INFO_H__
+
+#include <string>
+#include <vector>
+#include <map>
+#include <memory>
+
+#include <mv_inference_type.h>
+#include "DimInfo.h"
+
+namespace mediavision
+{
+namespace inference
+{
+	struct DeQuantization
+	{
+		double scale;
+		double zeropoint;
+
+		DeQuantization(double s, double z) : scale(s), zeropoint(z) { }
+	};
+
+	class ScoreInfo
+	{
+	private:
+		std::string name;
+		DimInfo dimInfo;
+		double threshold;
+		int topNumber;
+		inference_score_type_e type;
+		std::shared_ptr<DeQuantization> deQuantization;
+		std::map<std::string, inference_score_type_e> supportedScoreTypes;
+
+	public:
+		ScoreInfo() :
+			name(),
+			dimInfo(),
+			threshold(0.0),
+			topNumber(1),
+			type(INFERENCE_SCORE_TYPE_NORMAL),
+			deQuantization(nullptr)
+		{
+			// Score type
+			supportedScoreTypes.insert({"NORMAL", INFERENCE_SCORE_TYPE_NORMAL});
+			supportedScoreTypes.insert({"SIGMOID", INFERENCE_SCORE_TYPE_SIGMOID});
+		}
+
+		~ScoreInfo() = default;
+
+		std::string GetName() { return name; }
+		DimInfo GetDimInfo() { return dimInfo; }
+		double GetThresHold() { return threshold; }
+		inference_score_type_e GetType() { return type; }
+		int GetTopNumber() { return topNumber; }
+		std::shared_ptr<DeQuantization> GetDeQuant() { return deQuantization; }
+		double GetDeQuantScale() { return deQuantization->scale; }
+		double GetDeQuantZeroPoint() { return deQuantization->zeropoint; }
+
+		int ParseScore(JsonObject *root)
+		{
+			LOGI("ENTER");
+
+			JsonArray * rootArray = json_object_get_array_member(root, "score");
+			unsigned int elements = json_array_get_length(rootArray);
+
+			for (unsigned int elem = 0; elem < elements; ++elem) {
+				JsonNode *pNode = json_array_get_element(rootArray, elem);
+				JsonObject *pObject = json_node_get_object(pNode);
+
+				name = json_object_get_string_member(pObject,"name");
+				LOGI("layer: %s", name.c_str());
+
+				JsonArray * array = json_object_get_array_member(pObject, "index");
+				unsigned int elements2 = json_array_get_length(array);
+				LOGI("range dim: size[%u]", elements2);
+				for (unsigned int elem2 = 0; elem2 < elements2; ++elem2) {
+					if (static_cast<int>(json_array_get_int_element(array, elem2)) == 1)
+						dimInfo.SetValidIndex(elem2);
+				}
+
+				topNumber = static_cast<int>(json_object_get_int_member(pObject, "top_number"));
+				LOGI("top number: %d", topNumber);
+
+				threshold = static_cast<double>(json_object_get_double_member(pObject, "threshold"));
+				LOGI("threshold: %1.3f", threshold);
+
+				try {
+					type = GetSupportedType(pObject, "score_type", supportedScoreTypes);
+				} catch (const std::exception& e) {
+					LOGE("Invalid %s", e.what());
+					return MEDIA_VISION_ERROR_INVALID_OPERATION;
+				}
+
+				if (json_object_has_member(pObject, "dequantization")) {
+					array = json_object_get_array_member(pObject, "dequantization");
+					JsonNode *node = json_array_get_element(array, 0);
+					JsonObject *object = json_node_get_object(node);
+
+					deQuantization = std::make_shared<DeQuantization>(
+						json_object_get_double_member(object, "scale"),
+						json_object_get_double_member(object, "zeropoint"));
+				}
+			}
+
+			LOGI("LEAVE");
+			return MEDIA_VISION_ERROR_NONE;
+		}
+	};
+} /* Inference */
+} /* MediaVision */
+
+#endif
\ No newline at end of file
diff --git a/mv_machine_learning/mv_inference/inference/include/Utils.h b/mv_machine_learning/mv_inference/inference/include/Utils.h
new file mode 100644
index 0000000..c8a37cd
--- /dev/null
+++ b/mv_machine_learning/mv_inference/inference/include/Utils.h
@@ -0,0 +1,46 @@
+/**
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTILS_H__
+#define __UTILS_H__
+
+#include <string>
+#include <vector>
+#include <map>
+#include <memory>
+
+#include <json-glib/json-glib.h>
+
+namespace mediavision
+{
+namespace inference
+{
+	template <typename T>
+	T GetSupportedType(JsonObject* root, std::string typeName, const std::map<std::string, T>& supportedTypes)
+	{
+		auto supportedType = supportedTypes.find(json_object_get_string_member(root, typeName.c_str()));
+		if (supportedType == supportedTypes.end()) {
+			throw std::invalid_argument(typeName);
+		}
+
+		LOGI("%s: %d:%s", typeName.c_str(), supportedType->second, supportedType->first.c_str());
+
+		return supportedType->second;
+	}
+} /* Inference */
+} /* MediaVision */
+
+#endif
\ No newline at end of file
diff --git a/mv_machine_learning/mv_inference/inference/src/Inference.cpp b/mv_machine_learning/mv_inference/inference/src/Inference.cpp
index d79b3ff..bf9a102 100755
--- a/mv_machine_learning/mv_inference/inference/src/Inference.cpp
+++ b/mv_machine_learning/mv_inference/inference/src/Inference.cpp
@@ -1130,8 +1130,8 @@ namespace inference
 
 				if (outputMetadata.GetScoreDeQuant()) {
 					value = PostProcess::dequant(value,
-											outputMetadata.GetScoreDeQuant()->GetScale(),
-											outputMetadata.GetScoreDeQuant()->GetZeroPoint());
+											outputMetadata.GetScoreDeQuantScale(),
+											outputMetadata.GetScoreDeQuantZeroPoint());
 				}
 
 				if (outputMetadata.GetScoreType() == INFERENCE_SCORE_TYPE_SIGMOID)
diff --git a/mv_machine_learning/mv_inference/inference/src/InputMetadata.cpp b/mv_machine_learning/mv_inference/inference/src/InputMetadata.cpp
index 66d257c..d6bc290 100644
--- a/mv_machine_learning/mv_inference/inference/src/InputMetadata.cpp
+++ b/mv_machine_learning/mv_inference/inference/src/InputMetadata.cpp
@@ -23,6 +23,7 @@
 #include <algorithm>
 #include "InputMetadata.h"
 #include <mv_common.h>
+#include "Utils.h"
 
 namespace mediavision
 {
@@ -46,20 +47,6 @@ namespace inference
 		mSupportedColorSpace.insert({"GRAY8", MEDIA_VISION_COLORSPACE_Y800});
 	}
 
-	template <typename T>
-	T InputMetadata::GetSupportedType(JsonObject* root, std::string typeName,
-									std::map<std::string, T>& supportedTypes)
-	{
-		auto supportedType = supportedTypes.find(json_object_get_string_member(root, typeName.c_str()));
-		if (supportedType == supportedTypes.end()) {
-			throw std::invalid_argument(typeName);
-		}
-
-		LOGI("%s: %d:%s", typeName.c_str(), supportedType->second, supportedType->first.c_str());
-
-		return supportedType->second;
-	}
-
 	int InputMetadata::GetTensorInfo(JsonObject *root)
 	{
 		LOGI("ENTER");
diff --git a/mv_machine_learning/mv_inference/inference/src/OutputMetadata.cpp b/mv_machine_learning/mv_inference/inference/src/OutputMetadata.cpp
index 176b0eb..8a1362a 100755
--- a/mv_machine_learning/mv_inference/inference/src/OutputMetadata.cpp
+++ b/mv_machine_learning/mv_inference/inference/src/OutputMetadata.cpp
@@ -21,15 +21,16 @@
 #include <string>
 #include <queue>
 #include <algorithm>
+
 #include "OutputMetadata.h"
+#include "Utils.h"
+
+using namespace mediavision::inference::box;
 
 namespace mediavision
 {
 namespace inference
 {
-	std::map<std::string, inference_tensor_shape_type_e> OutputMetadata::supportedTensorShapes =
-		{{"NCHW", INFERENCE_TENSOR_SHAPE_NCHW}, {"NHWC", INFERENCE_TENSOR_SHAPE_NHWC}};
-
 	OutputMetadata::OutputMetadata() :
 			parsed(false),
 			score(),
@@ -41,84 +42,9 @@ namespace inference
 			dispVecs(),
 			edgeMap()
 	{
-
-	}
-
-	ScoreInfo::ScoreInfo() :
-			name(),
-			dimInfo(),
-			threshold(0.0),
-			topNumber(1),
-			type(INFERENCE_SCORE_TYPE_NORMAL),
-			deQuantization(nullptr)
-	{
-		// Score type
-		supportedScoreTypes.insert({"NORMAL", INFERENCE_SCORE_TYPE_NORMAL});
-		supportedScoreTypes.insert({"SIGMOID", INFERENCE_SCORE_TYPE_SIGMOID});
-	}
-
-	template <typename T>
-	T OutputMetadata::GetSupportedType(JsonObject* root, std::string typeName,
-									std::map<std::string, T>& supportedTypes)
-	{
-		auto supportedType = supportedTypes.find(json_object_get_string_member(root, typeName.c_str()));
-		if (supportedType == supportedTypes.end()) {
-			throw std::invalid_argument(typeName);
-		}
-
-		LOGI("%s: %d:%s", typeName.c_str(), supportedType->second, supportedType->first.c_str());
-
-		return supportedType->second;
-	}
-
-	int ScoreInfo::ParseScore(JsonObject *root)
-	{
-		LOGI("ENTER");
-
-		JsonArray * rootArray = json_object_get_array_member(root, "score");
-		unsigned int elements = json_array_get_length(rootArray);
-
-		for (unsigned int elem = 0; elem < elements; ++elem) {
-			JsonNode *pNode = json_array_get_element(rootArray, elem);
-			JsonObject *pObject = json_node_get_object(pNode);
-
-			name = json_object_get_string_member(pObject,"name");
-			LOGI("layer: %s", name.c_str());
-
-			JsonArray * array = json_object_get_array_member(pObject, "index");
-			unsigned int elements2 = json_array_get_length(array);
-			LOGI("range dim: size[%u]", elements2);
-			for (unsigned int elem2 = 0; elem2 < elements2; ++elem2) {
-				if (static_cast<int>(json_array_get_int_element(array, elem2)) == 1)
-					dimInfo.SetValidIndex(elem2);
-			}
-
-			topNumber = static_cast<int>(json_object_get_int_member(pObject, "top_number"));
-			LOGI("top number: %d", topNumber);
-
-			threshold = static_cast<double>(json_object_get_double_member(pObject, "threshold"));
-			LOGI("threshold: %1.3f", threshold);
-
-			try {
-				type = OutputMetadata::GetSupportedType(pObject, "score_type", supportedScoreTypes);
-			} catch (const std::exception& e) {
-				LOGE("Invalid %s", e.what());
-				return MEDIA_VISION_ERROR_INVALID_OPERATION;
-			}
-
-			if (json_object_has_member(pObject, "dequantization")) {
-				array = json_object_get_array_member(pObject, "dequantization");
-				JsonNode *node = json_array_get_element(array, 0);
-				JsonObject *object = json_node_get_object(node);
-
-				deQuantization = std::make_shared<DeQuantization>(
-					json_object_get_double_member(object, "scale"),
-					json_object_get_double_member(object, "zeropoint"));
-			}
-		}
-
-		LOGI("LEAVE");
-		return MEDIA_VISION_ERROR_NONE;
+		// shape_type
+		mSupportedShapeType.insert({"NCHW", INFERENCE_TENSOR_SHAPE_NCHW});
+		mSupportedShapeType.insert({"NHWC", INFERENCE_TENSOR_SHAPE_NHWC});
 	}
 
 	int OutputMetadata::ParseScore(JsonObject *root)
@@ -131,71 +57,6 @@ namespace inference
 		return score.ParseScore(root);
 	}
 
-	BoxInfo::BoxInfo() :
-			name(),
-			dimInfo(),
-			type(INFERENCE_BOX_TYPE_ORIGIN_LEFTTOP),
-			order(),
-			coordinate(INFERENCE_BOX_COORDINATE_TYPE_RATIO),
-			decodingType(INFERENCE_BOX_DECODING_TYPE_BYPASS),
-			decodingInfo()
-
-	{
-		supportedBoxTypes.insert({"ORIGIN_LEFTTOP", INFERENCE_BOX_TYPE_ORIGIN_LEFTTOP});
-		supportedBoxTypes.insert({"ORIGIN_CENTER", INFERENCE_BOX_TYPE_ORIGIN_CENTER});
-
-		supportedBoxCoordinateTypes.insert({"RATIO", INFERENCE_BOX_COORDINATE_TYPE_RATIO});
-		supportedBoxCoordinateTypes.insert({"PIXEL", INFERENCE_BOX_COORDINATE_TYPE_PIXEL});
-
-		supportedBoxDecodingTypes.insert({"BYPASS", INFERENCE_BOX_DECODING_TYPE_BYPASS});
-		supportedBoxDecodingTypes.insert({"SSD_ANCHOR", INFERENCE_BOX_DECODING_TYPE_SSD_ANCHOR});
-	}
-
-	int BoxInfo::ParseBox(JsonObject *root)
-	{
-		LOGI("ENTER");
-
-		JsonArray * rootArray = json_object_get_array_member(root, "box");
-		unsigned int elements = json_array_get_length(rootArray);
-
-		for (unsigned int elem = 0; elem < elements; ++elem) {
-			JsonNode *pNode = json_array_get_element(rootArray, elem);
-			JsonObject *pObject = json_node_get_object(pNode);
-
-			name = json_object_get_string_member(pObject,"name");
-			LOGI("layer: %s", name.c_str());
-
-			JsonArray * array = json_object_get_array_member(pObject, "index");
-			unsigned int elements2 = json_array_get_length(array);
-			LOGI("range dim: size[%u]", elements2);
-			for (unsigned int elem2 = 0; elem2 < elements2; ++elem2) {
-				if (static_cast<int>(json_array_get_int_element(array, elem2)) == 1)
-					dimInfo.SetValidIndex(elem2);
-			}
-
-			try {
-				type = OutputMetadata::GetSupportedType(pObject, "box_type", supportedBoxTypes);
-				coordinate = OutputMetadata::GetSupportedType(pObject, "box_coordinate", supportedBoxCoordinateTypes);
-				decodingType = OutputMetadata::GetSupportedType(pObject, "decoding_type", supportedBoxDecodingTypes);
-			} catch (const std::exception& e) {
-				LOGE("Invalid %s", e.what());
-				return MEDIA_VISION_ERROR_INVALID_OPERATION;
-			}
-
-			array = json_object_get_array_member(pObject, "box_order");
-			elements2 = json_array_get_length(array);
-			LOGI("box order should have 4 elements and it has [%u]", elements2);
-			for (unsigned int elem2 = 0; elem2 < elements2; ++elem2) {
-				auto val = static_cast<int>(json_array_get_int_element(array, elem2));
-				order.push_back(val);
-				LOGI("%d", val);
-			}
-		}
-
-		LOGI("LEAVE");
-		return MEDIA_VISION_ERROR_NONE;
-	}
-
 	void DecodeInfo::AddAnchorBox(cv::Rect2f& anchor)
 	{
 		anchorBoxes.push_back(anchor);
@@ -226,10 +87,16 @@ namespace inference
 		return box.ParseBox(root);
 	}
 
-	int Label::ParseLabel(JsonObject *root)
+	int OutputMetadata::ParseLabel(JsonObject *root)
 	{
 		LOGI("ENTER");
 
+		if (!json_object_has_member(root, "label")) {
+			LOGE("No box outputmetadata");
+			LOGI("LEAVE");
+			return MEDIA_VISION_ERROR_INVALID_OPERATION;
+		}
+
 		JsonArray * rootArray = json_object_get_array_member(root, "label");
 		unsigned int elements = json_array_get_length(rootArray);
 
@@ -238,40 +105,32 @@ namespace inference
 			JsonNode *pNode = json_array_get_element(rootArray, elem);
 			JsonObject *pObject = json_node_get_object(pNode);
 
-			name = json_object_get_string_member(pObject,"name");
-			LOGI("layer: %s", name.c_str());
+			label.name = json_object_get_string_member(pObject,"name");
+			LOGI("layer: %s", label.name.c_str());
 
 			JsonArray * array = json_object_get_array_member(pObject, "index");
 			unsigned int elements2 = json_array_get_length(array);
 			LOGI("range dim: size[%u]", elements2);
 			for (unsigned int elem2 = 0; elem2 < elements2; ++elem2) {
 				if (static_cast<int>(json_array_get_int_element(array, elem2)) == 1)
-					dimInfo.SetValidIndex(elem2);
+					label.dimInfo.SetValidIndex(elem2);
 			}
 		}
 
-		LOGI("LEAVEL");
+		LOGI("LEAVE");
 		return MEDIA_VISION_ERROR_NONE;
 	}
 
-	int OutputMetadata::ParseLabel(JsonObject *root)
+	int OutputMetadata::ParseNumber(JsonObject *root)
 	{
 		LOGI("ENTER");
 
-		if (!json_object_has_member(root, "label")) {
-			LOGE("No box outputmetadata");
+		if (!json_object_has_member(root, "number")) {
+			LOGE("No number outputmetadata");
 			LOGI("LEAVE");
 			return MEDIA_VISION_ERROR_INVALID_OPERATION;
 		}
 
-		label.ParseLabel(root);
-
-		LOGI("LEAVE");
-		return MEDIA_VISION_ERROR_NONE;
-	}
-
-	int Number::ParseNumber(JsonObject *root)
-	{
 		// box
 		JsonArray * rootArray = json_object_get_array_member(root, "number");
 		unsigned int elements = json_array_get_length(rootArray);
@@ -281,31 +140,19 @@ namespace inference
 			JsonNode *pNode = json_array_get_element(rootArray, elem);
 			JsonObject *pObject = json_node_get_object(pNode);
 
-			name = json_object_get_string_member(pObject,"name");
-			LOGI("layer: %s", name.c_str());
+			number.name = json_object_get_string_member(pObject,"name");
+
+			LOGI("layer: %s", number.name.c_str());
 
 			JsonArray * array = json_object_get_array_member(pObject, "index");
 			unsigned int elements2 = json_array_get_length(array);
-			LOGI("range dim: size[%u]", elements2);
-			for (unsigned int elem2 = 0; elem2 < elements2; ++elem2) {
-				if (static_cast<int>(json_array_get_int_element(array, elem2)) == 1)
-					dimInfo.SetValidIndex(elem2);
-			}
-		}
 
-		return MEDIA_VISION_ERROR_NONE;
-	}
-
-	int OutputMetadata::ParseNumber(JsonObject *root)
-	{
-		LOGI("ENTER");
+			LOGI("range dim: size[%u]", elements2);
 
-		if (!json_object_has_member(root, "number")) {
-			LOGE("No number outputmetadata");
-			LOGI("LEAVE");
-			return MEDIA_VISION_ERROR_INVALID_OPERATION;
+			for (unsigned int elem2 = 0; elem2 < elements2; ++elem2)
+				if (static_cast<int>(json_array_get_int_element(array, elem2)) == 1)
+					number.dimInfo.SetValidIndex(elem2);
 		}
-		number.ParseNumber(root);
 
 		LOGI("LEAVE");
 		return MEDIA_VISION_ERROR_NONE;
@@ -560,7 +407,7 @@ namespace inference
 
 		JsonObject *object = json_object_get_object_member(root, "nms");
 		try {
-			this->nmsParam.mode = OutputMetadata::GetSupportedType(object, "mode", this->nmsParam.supportedBoxNmsTypes);
+			this->nmsParam.mode = GetSupportedType(object, "mode", this->nmsParam.supportedBoxNmsTypes);
 		} catch (const std::exception& e) {
 			LOGE("Invalid %s", e.what());
 			return MEDIA_VISION_ERROR_INVALID_OPERATION;
@@ -671,94 +518,6 @@ namespace inference
 		return MEDIA_VISION_ERROR_NONE;
 	}
 
-	Landmark::Landmark() :
-			name(),
-			dimInfo(),
-			type(INFERENCE_LANDMARK_TYPE_2D_SINGLE),
-			offset(),
-			coordinate(INFERENCE_LANDMARK_COORDINATE_TYPE_RATIO),
-			decodingType(INFERENCE_LANDMARK_DECODING_TYPE_BYPASS),
-			heatMapInfo()
-
-	{
-		supportedLandmarkTypes.insert({"2D_SINGLE", INFERENCE_LANDMARK_TYPE_2D_SINGLE});
-		supportedLandmarkTypes.insert({"2D_MULTI",  INFERENCE_LANDMARK_TYPE_2D_MULTI});
-		supportedLandmarkTypes.insert({"3D_SINGLE", INFERENCE_LANDMARK_TYPE_3D_SINGLE});
-
-		supportedLandmarkCoordinateTypes.insert({"RATIO", INFERENCE_LANDMARK_COORDINATE_TYPE_RATIO});
-		supportedLandmarkCoordinateTypes.insert({"PIXEL", INFERENCE_LANDMARK_COORDINATE_TYPE_PIXEL});
-
-		supportedLandmarkDecodingTypes.insert({"BYPASS", INFERENCE_LANDMARK_DECODING_TYPE_BYPASS});
-		supportedLandmarkDecodingTypes.insert({"HEATMAP", INFERENCE_LANDMARK_DECODING_TYPE_HEATMAP});
-		supportedLandmarkDecodingTypes.insert({"HEATMAP_REFINE", INFERENCE_LANDMARK_DECODING_TYPE_HEATMAP_REFINE});
-	}
-
-	int Landmark::ParseLandmark(JsonObject *root)
-	{
-		// box
-		JsonArray * rootArray = json_object_get_array_member(root, "landmark");
-		unsigned int elements = json_array_get_length(rootArray);
-
-		// TODO: handling error
-		for (unsigned int elem = 0; elem < elements; ++elem) {
-
-			JsonNode *pNode = json_array_get_element(rootArray, elem);
-			JsonObject *pObject = json_node_get_object(pNode);
-
-			name =
-				static_cast<const char*>(json_object_get_string_member(pObject,"name"));
-			LOGI("layer: %s", name.c_str());
-
-			JsonArray * array = json_object_get_array_member(pObject, "index");
-			unsigned int elements2 = json_array_get_length(array);
-			LOGI("range dim: size[%u]", elements2);
-			for (unsigned int elem2 = 0; elem2 < elements2; ++elem2) {
-				if (static_cast<int>(json_array_get_int_element(array, elem2)) == 1)
-					dimInfo.SetValidIndex(elem2);
-			}
-
-			try {
-				type = OutputMetadata::GetSupportedType(pObject, "landmark_type", supportedLandmarkTypes);
-				coordinate = OutputMetadata::GetSupportedType(pObject, "landmark_coordinate", supportedLandmarkCoordinateTypes);
-				decodingType = OutputMetadata::GetSupportedType(pObject, "decoding_type", supportedLandmarkDecodingTypes);
-			} catch (const std::exception& e) {
-				LOGE("Invalid %s", e.what());
-				return MEDIA_VISION_ERROR_INVALID_OPERATION;
-			}
-
-			offset = static_cast<int>(json_object_get_int_member(pObject, "landmark_offset"));
-			LOGI("landmark offset: %d", offset);
-		}
-
-		LOGI("LEAVE");
-		return MEDIA_VISION_ERROR_NONE;
-	}
-
-	inference_landmark_type_e Landmark::GetType()
-	{
-		return type;
-	}
-
-	int Landmark::GetOffset()
-	{
-		return offset;
-	}
-
-	inference_landmark_coorindate_type_e Landmark::GetCoordinate()
-	{
-		return coordinate;
-	}
-
-	inference_landmark_decoding_type_e Landmark::GetDecodingType()
-	{
-		return decodingType;
-	}
-
-	HeatMapInfo& Landmark::GetHeatMapInfo()
-	{
-		return heatMapInfo;
-	}
-
 	int OutputMetadata::ParseLandmark(JsonObject *root)
 	{
 		LOGI("ENTER");
@@ -812,7 +571,7 @@ namespace inference
 
 			JsonObject *object = json_object_get_object_member(cObject, "heatmap") ;
 			try {
-				landmark.GetHeatMapInfo().shapeType = OutputMetadata::GetSupportedType(object, "shape_type", supportedTensorShapes);
+				landmark.GetHeatMapInfo().shapeType = GetSupportedType(object, "shape_type", mSupportedShapeType);
 			} catch (const std::exception& e) {
 				LOGE("Invalid %s", e.what());
 				return MEDIA_VISION_ERROR_INVALID_OPERATION;
@@ -839,42 +598,6 @@ namespace inference
 		return MEDIA_VISION_ERROR_NONE;
 	}
 
-	int OffsetVec::ParseOffset(JsonObject *root)
-	{
-		JsonArray * rootArray = json_object_get_array_member(root, "offset");
-		unsigned int elements = json_array_get_length(rootArray);
-
-		// TODO: handling error
-		for (unsigned int elem = 0; elem < elements; ++elem) {
-
-			JsonNode *pNode = json_array_get_element(rootArray, elem);
-			JsonObject *pObject = json_node_get_object(pNode);
-
-			name =
-				static_cast<const char*>(json_object_get_string_member(pObject,"name"));
-			LOGI("layer: %s", name.c_str());
-
-			JsonArray * array = json_object_get_array_member(pObject, "index");
-			unsigned int elements2 = json_array_get_length(array);
-			LOGI("range dim: size[%u]", elements2);
-			for (unsigned int elem2 = 0; elem2 < elements2; ++elem2) {
-				if (static_cast<int>(json_array_get_int_element(array, elem2)) == 1)
-					dimInfo.SetValidIndex(elem2);
-			}
-
-			try {
-				shapeType = OutputMetadata::GetSupportedType(pObject, "shape_type", OutputMetadata::supportedTensorShapes);
-			} catch (const std::exception& e) {
-				LOGE("Invalid %s", e.what());
-				return MEDIA_VISION_ERROR_INVALID_OPERATION;
-			}
-
-		}
-
-		LOGI("LEAVE");
-		return MEDIA_VISION_ERROR_NONE;
-	}
-
 	int OutputMetadata::ParseOffset(JsonObject *root)
 	{
 		LOGI("ENTER");
@@ -885,44 +608,7 @@ namespace inference
 			return MEDIA_VISION_ERROR_INVALID_OPERATION;
 		}
 
-		offsetVec.ParseOffset(root);
-
-		LOGI("LEAVE");
-		return MEDIA_VISION_ERROR_NONE;
-	}
-
-	DispVec::DispVec() :
-			name(),
-			dimInfo(),
-			type(INFERENCE_DISPLACEMENT_TYPE_FORWARD),
-			shapeType(INFERENCE_TENSOR_SHAPE_NCHW)
-	{
-		supportedDispTypes.insert({"FORWARD", INFERENCE_DISPLACEMENT_TYPE_FORWARD});
-		supportedDispTypes.insert({"BACKWARD", INFERENCE_DISPLACEMENT_TYPE_BACKWARD});
-	}
-
-	int DispVec::ParseDisplacement(JsonObject *root)
-	{
-		LOGI("ENTER");
-		name =
-			static_cast<const char*>(json_object_get_string_member(root,"name"));
-		LOGI("layer: %s", name.c_str());
-
-		JsonArray * array = json_object_get_array_member(root, "index");
-		unsigned int elements2 = json_array_get_length(array);
-		LOGI("range dim: size[%u]", elements2);
-		for (unsigned int elem2 = 0; elem2 < elements2; ++elem2) {
-			if(static_cast<int>(json_array_get_int_element(array, elem2)) == 1)
-				dimInfo.SetValidIndex(elem2);
-		}
-
-		try {
-			shapeType = OutputMetadata::GetSupportedType(root, "shape_type", OutputMetadata::supportedTensorShapes);
-			type = OutputMetadata::GetSupportedType(root, "type", supportedDispTypes);
-		} catch (const std::exception& e) {
-			LOGE("Invalid %s", e.what());
-			return MEDIA_VISION_ERROR_INVALID_OPERATION;
-		}
+		offsetVec.ParseOffset(root, mSupportedShapeType);
 
 		LOGI("LEAVE");
 		return MEDIA_VISION_ERROR_NONE;
@@ -946,32 +632,10 @@ namespace inference
 		for (auto& disp : dispVecs) {
 			JsonNode *pNode = json_array_get_element(rootArray, elem++);
 			JsonObject *pObject = json_node_get_object(pNode);
-			disp.ParseDisplacement(pObject);
-		}
-
-		LOGI("LEAVE");
-		return MEDIA_VISION_ERROR_NONE;
-	}
-
-	int Edge::ParseEdge(JsonObject *root)
-	{
-		LOGI("ENTER");
-		JsonArray * rootArray = json_object_get_array_member(root, "edgemap");
-		unsigned int elements = json_array_get_length(rootArray);
-
-		// TODO: handling error
-		int pEdgeNode, cEdgeNode;
-		for (unsigned int elem = 0; elem < elements; ++elem) {
-
-			JsonNode *pNode = json_array_get_element(rootArray, elem);
-			JsonObject *pObject = json_node_get_object(pNode);
 
-			pEdgeNode = json_object_get_int_member(pObject, "parent");
-			cEdgeNode = json_object_get_int_member(pObject, "child");
-
-			edges.push_back(std::make_pair(pEdgeNode, cEdgeNode));
-			LOGI("%ud: parent - child: %d - %d", elem, pEdgeNode, cEdgeNode);
+			disp.ParseDisplacement(pObject, mSupportedShapeType);
 		}
+
 		LOGI("LEAVE");
 		return MEDIA_VISION_ERROR_NONE;
 	}
@@ -1084,22 +748,5 @@ namespace inference
 
 		return MEDIA_VISION_ERROR_NONE;
 	}
-
-	void DimInfo::SetValidIndex(int index)
-	{
-		LOGI("ENTER");
-
-		dims.push_back(index);
-
-		LOGI("LEAVE");
-	}
-
-	std::vector<int> DimInfo::GetValidIndexAll() const
-	{
-		LOGI("ENTER");
-
-		LOGI("LEAVE");
-		return dims;
-	}
 } /* Inference */
 } /* MediaVision */
diff --git a/packaging/capi-media-vision.spec b/packaging/capi-media-vision.spec
index 205f377..789842c 100644
--- a/packaging/capi-media-vision.spec
+++ b/packaging/capi-media-vision.spec
@@ -1,6 +1,6 @@
 Name:        capi-media-vision
 Summary:     Media Vision library for Tizen Native API
-Version:     0.8.18
+Version:     0.8.19
 Release:     1
 Group:       Multimedia/Framework
 License:     Apache-2.0 and BSD-3-Clause
-- 
2.7.4


From 1667c0828c5e104db8a7d0523cb8593573f5c8ac Mon Sep 17 00:00:00 2001
From: Inki Dae <inki.dae@samsung.com>
Date: Mon, 18 Oct 2021 16:06:27 +0900
Subject: [PATCH 06/16] mv_machine_learning: code refactoring to property
 parsing

Did code refactoring to propery parsing of BoxInfo and Landmark classes.

What this patch did,
	 - Move parseLabel, ParseNumber, ParseDecodeInfo functions from
	   OutputMetadata class to BoxInfo one because these properties
	   are related to BoxInfo class not OutputMetadata class.
	 - Move parseDisplayment, parseEdgeMap, parseDecodeInfo functions from
	   OutputMetadata class to Landmark one because these properties
	   are related to Landmark class not OutputMetadata class.

This is just one step for next code refactoring.

Change-Id: I0a17ee29f492bee53acacab339ba746a44495790
Signed-off-by: Inki Dae <inki.dae@samsung.com>
---
 .../mv_inference/inference/include/BoxInfo.h       | 150 ++++++++++++
 .../mv_inference/inference/include/DecodeInfo.h    |   1 +
 .../mv_inference/inference/include/Landmark.h      | 113 +++++++++
 .../inference/include/OutputMetadata.h             |  33 +--
 .../mv_inference/inference/include/ScoreInfo.h     |   1 +
 .../mv_inference/inference/src/Inference.cpp       |  10 +-
 .../mv_inference/inference/src/ObjectDecoder.cpp   |  14 +-
 .../mv_inference/inference/src/OutputMetadata.cpp  | 259 +--------------------
 .../mv_inference/inference/src/PoseDecoder.cpp     |  20 +-
 9 files changed, 300 insertions(+), 301 deletions(-)

diff --git a/mv_machine_learning/mv_inference/inference/include/BoxInfo.h b/mv_machine_learning/mv_inference/inference/include/BoxInfo.h
index ceffaa0..63bc28d 100644
--- a/mv_machine_learning/mv_inference/inference/include/BoxInfo.h
+++ b/mv_machine_learning/mv_inference/inference/include/BoxInfo.h
@@ -22,15 +22,32 @@
 #include <map>
 #include <memory>
 
+#include <json-glib/json-glib.h>
 #include <mv_inference_type.h>
 #include <inference_engine_type.h>
 
+#include "DecodeInfo.h"
+#include "DimInfo.h"
+#include "Utils.h"
+
 namespace mediavision
 {
 namespace inference
 {
 namespace box
 {
+	struct Label
+	{
+		std::string name;
+		DimInfo dimInfo;
+	};
+
+	struct Number
+	{
+		std::string name;
+		DimInfo dimInfo;
+	};
+
 	class BoxInfo
 	{
 	private:
@@ -41,6 +58,8 @@ namespace box
 		inference_box_coordinate_type_e coordinate; // 0: ratio, 1: pixel
 		inference_box_decoding_type_e decodingType; // 0: bypass , 1:ssd with anchor
 		DecodeInfo decodingInfo;
+		Label label;
+		Number number;
 
 		std::map<std::string, inference_box_type_e> supportedBoxTypes;
 		std::map<std::string, inference_box_coordinate_type_e> supportedBoxCoordinateTypes;
@@ -76,6 +95,9 @@ namespace box
 		std::vector<int> GetOrder() { return order; }
 		int GetCoordinate() { return coordinate; }
 		DecodeInfo& GetDecodeInfo() {return decodingInfo; }
+		std::string GetLabelName() { return label.name; }
+		std::string GetNumberName() { return number.name; }
+		DimInfo GetNumberDimInfo() { return number.dimInfo; }
 
 		int ParseBox(JsonObject *root)
 		{
@@ -123,6 +145,134 @@ namespace box
 			LOGI("LEAVE");
 			return MEDIA_VISION_ERROR_NONE;
 		}
+
+		int ParseLabel(JsonObject *root)
+		{
+			LOGI("ENTER");
+
+			if (!json_object_has_member(root, "label")) {
+				LOGE("No box outputmetadata");
+				LOGI("LEAVE");
+				return MEDIA_VISION_ERROR_INVALID_OPERATION;
+			}
+
+			JsonArray * rootArray = json_object_get_array_member(root, "label");
+			unsigned int elements = json_array_get_length(rootArray);
+
+			// TODO: handling error
+			for (unsigned int elem = 0; elem < elements; ++elem) {
+				JsonNode *pNode = json_array_get_element(rootArray, elem);
+				JsonObject *pObject = json_node_get_object(pNode);
+
+				label.name = json_object_get_string_member(pObject,"name");
+				LOGI("layer: %s", label.name.c_str());
+
+				JsonArray * array = json_object_get_array_member(pObject, "index");
+				unsigned int elements2 = json_array_get_length(array);
+				LOGI("range dim: size[%u]", elements2);
+				for (unsigned int elem2 = 0; elem2 < elements2; ++elem2) {
+					if (static_cast<int>(json_array_get_int_element(array, elem2)) == 1)
+						label.dimInfo.SetValidIndex(elem2);
+				}
+			}
+
+			LOGI("LEAVE");
+			return MEDIA_VISION_ERROR_NONE;
+		}
+
+		int ParseNumber(JsonObject *root)
+		{
+			LOGI("ENTER");
+
+			if (!json_object_has_member(root, "number")) {
+				LOGE("No number outputmetadata");
+				LOGI("LEAVE");
+				return MEDIA_VISION_ERROR_INVALID_OPERATION;
+			}
+
+			// box
+			JsonArray * rootArray = json_object_get_array_member(root, "number");
+			unsigned int elements = json_array_get_length(rootArray);
+
+			// TODO: handling error
+			for (unsigned int elem = 0; elem < elements; ++elem) {
+				JsonNode *pNode = json_array_get_element(rootArray, elem);
+				JsonObject *pObject = json_node_get_object(pNode);
+
+				number.name = json_object_get_string_member(pObject,"name");
+
+				LOGI("layer: %s", number.name.c_str());
+
+				JsonArray * array = json_object_get_array_member(pObject, "index");
+				unsigned int elements2 = json_array_get_length(array);
+
+				LOGI("range dim: size[%u]", elements2);
+
+				for (unsigned int elem2 = 0; elem2 < elements2; ++elem2)
+					if (static_cast<int>(json_array_get_int_element(array, elem2)) == 1)
+						number.dimInfo.SetValidIndex(elem2);
+			}
+
+			LOGI("LEAVE");
+			return MEDIA_VISION_ERROR_NONE;
+		}
+
+		int ParseDecodeInfo(JsonObject *root)
+		{
+			LOGI("ENTER");
+
+			// box
+			JsonArray * rootArray = json_object_get_array_member(root, "box");
+			unsigned int elements = json_array_get_length(rootArray);
+
+			// TODO: handling error
+			for (unsigned int elem = 0; elem < elements; ++elem) {
+				JsonNode *pNode = json_array_get_element(rootArray, elem);
+				JsonObject *pObject = json_node_get_object(pNode);
+
+				if (!json_object_has_member(pObject, "decoding_info")) {
+					LOGE("decoding_info is mandatory. Invalid metadata");
+					LOGI("LEAVE");
+
+					return MEDIA_VISION_ERROR_INVALID_OPERATION;
+				}
+
+				JsonObject *cObject = json_object_get_object_member(pObject, "decoding_info");
+				if (!json_object_has_member(cObject, "anchor")) {
+					LOGE("anchor is mandatory. Invalid metadata");
+					LOGI("LEAVE");
+
+					return MEDIA_VISION_ERROR_INVALID_OPERATION;
+				}
+
+				int ret = GetDecodeInfo().ParseAnchorParam(cObject);
+				if (ret != MEDIA_VISION_ERROR_NONE) {
+					LOGE("Fail to ParseAnchorParam[%d]", ret);
+					return ret;
+				}
+
+				ret = GetDecodeInfo().ParseNms(cObject);
+				if (ret != MEDIA_VISION_ERROR_NONE) {
+					LOGE("Fail to ParseNms[%d]", ret);
+					return ret;
+				}
+
+				ret = GetDecodeInfo().ParseRotate(cObject);
+				if (ret != MEDIA_VISION_ERROR_NONE) {
+					LOGE("Fail to ParseRotate[%d]", ret);
+					return ret;
+				}
+
+				ret = GetDecodeInfo().ParseRoiOption(cObject);
+				if (ret != MEDIA_VISION_ERROR_NONE) {
+					LOGE("Fail to ParseRoiOption[%d]", ret);
+					return ret;
+				}
+			}
+
+			LOGI("LEAVE");
+			return MEDIA_VISION_ERROR_NONE;
+		}
 	};
 } /* box */
 } /* Inference */
diff --git a/mv_machine_learning/mv_inference/inference/include/DecodeInfo.h b/mv_machine_learning/mv_inference/inference/include/DecodeInfo.h
index a872c3a..7cdbca4 100644
--- a/mv_machine_learning/mv_inference/inference/include/DecodeInfo.h
+++ b/mv_machine_learning/mv_inference/inference/include/DecodeInfo.h
@@ -24,6 +24,7 @@
 
 #include <mv_inference_type.h>
 #include <opencv2/core.hpp>
+#include "Utils.h"
 
 namespace mediavision
 {
diff --git a/mv_machine_learning/mv_inference/inference/include/Landmark.h b/mv_machine_learning/mv_inference/inference/include/Landmark.h
index 2fe6c9a..26dda7f 100644
--- a/mv_machine_learning/mv_inference/inference/include/Landmark.h
+++ b/mv_machine_learning/mv_inference/inference/include/Landmark.h
@@ -20,7 +20,12 @@
 #include <string>
 #include <vector>
 #include <map>
+
 #include <opencv2/core.hpp>
+#include <json-glib/json-glib.h>
+#include "DimInfo.h"
+#include "DispVec.h"
+#include "Utils.h"
 
 /**
  * @file Landmark.h
@@ -67,6 +72,8 @@ namespace inference
 															1: decoding heatmap,
 															2: decoding heatmap with refinement */
 		HeatMapInfo heatMapInfo;
+		std::vector<DispVec> dispVecs;
+		Edge edgeMap;
 
 		std::map<std::string, inference_landmark_type_e> supportedLandmarkTypes;
 		std::map<std::string, inference_landmark_coorindate_type_e> supportedLandmarkCoordinateTypes;
@@ -139,6 +146,108 @@ namespace inference
 			return MEDIA_VISION_ERROR_NONE;
 		}
 
+		int ParseDisplacement(JsonObject *root,
+							  const std::map<std::string, inference_tensor_shape_type_e>& supportedShapeType)
+		{
+			LOGI("ENTER");
+
+			if (!json_object_has_member(root, "displacement")) {
+				LOGI("No displacement outputmetadata");
+				LOGI("LEAVE");
+				return MEDIA_VISION_ERROR_INVALID_OPERATION;
+			}
+
+			JsonArray * rootArray = json_object_get_array_member(root, "displacement");
+			unsigned int elements = json_array_get_length(rootArray);
+
+			dispVecs.resize(elements);
+			unsigned int elem = 0;
+			for (auto& disp : dispVecs) {
+				JsonNode *pNode = json_array_get_element(rootArray, elem++);
+				JsonObject *pObject = json_node_get_object(pNode);
+
+				disp.ParseDisplacement(pObject, supportedShapeType);
+			}
+
+			LOGI("LEAVE");
+			return MEDIA_VISION_ERROR_NONE;
+		}
+
+		int ParseEdgeMap(JsonObject * root)
+		{
+			LOGI("ENTER");
+
+			if (!json_object_has_member(root, "edgemap")) {
+				LOGI("No edgemap outputmetadata");
+				LOGI("LEAVE");
+				return MEDIA_VISION_ERROR_INVALID_OPERATION;
+			}
+
+			edgeMap.ParseEdge(root);
+
+			LOGI("LEAVE");
+			return MEDIA_VISION_ERROR_NONE;
+		}
+
+		int ParseDecodeInfo(JsonObject *root,
+							const std::map<std::string, inference_tensor_shape_type_e>& supportedShapeType)
+		{
+			LOGI("ENTER");
+
+			// box
+			JsonArray * rootArray = json_object_get_array_member(root, "landmark");
+			unsigned int elements = json_array_get_length(rootArray);
+
+			// TODO: handling error
+			for (unsigned int elem = 0; elem < elements; ++elem) {
+
+				JsonNode *pNode = json_array_get_element(rootArray, elem);
+				JsonObject *pObject = json_node_get_object(pNode);
+
+				if (!json_object_has_member(pObject, "decoding_info")) {
+					LOGE("decoding_info is mandatory. Invalid metadata");
+					LOGI("LEAVE");
+
+					return MEDIA_VISION_ERROR_INVALID_OPERATION;
+				}
+
+				JsonObject *cObject = json_object_get_object_member(pObject, "decoding_info");
+				if (!json_object_has_member(cObject, "heatmap")) {
+					LOGE("heatmap is mandatory. Invalid metadata");
+					LOGI("LEAVE");
+
+					return MEDIA_VISION_ERROR_INVALID_OPERATION;
+				}
+
+				JsonObject *object = json_object_get_object_member(cObject, "heatmap") ;
+				try {
+					GetHeatMapInfo().shapeType = GetSupportedType(object, "shape_type", supportedShapeType);
+				} catch (const std::exception& e) {
+					LOGE("Invalid %s", e.what());
+					return MEDIA_VISION_ERROR_INVALID_OPERATION;
+				}
+
+				std::vector<int> heatMapIndexes = GetDimInfo().GetValidIndexAll();
+				if (GetHeatMapInfo().shapeType == INFERENCE_TENSOR_SHAPE_NCHW) {
+					GetHeatMapInfo().cIdx = heatMapIndexes[0];
+					GetHeatMapInfo().hIdx = heatMapIndexes[1];
+					GetHeatMapInfo().wIdx = heatMapIndexes[2];
+				} else {
+					GetHeatMapInfo().hIdx = heatMapIndexes[0];
+					GetHeatMapInfo().wIdx = heatMapIndexes[1];
+					GetHeatMapInfo().cIdx = heatMapIndexes[2];
+				}
+
+				if (json_object_has_member(object, "nms_radius")) {
+					GetHeatMapInfo().nmsRadius = static_cast<float>(json_object_get_double_member(object, "nms_radius"));
+					LOGI("nms is enabled with %3.f", GetHeatMapInfo().nmsRadius );
+				}
+			}
+
+			LOGI("LEAVE");
+			return MEDIA_VISION_ERROR_NONE;
+		}
+
 		inference_landmark_type_e GetType()
 		{
 			return type;
@@ -167,6 +276,10 @@ namespace inference
 		std::string GetName() { return name; }
 
 		DimInfo GetDimInfo() { return dimInfo; }
+
+		std::vector<DispVec>& GetDispVecAll() { return dispVecs; }
+
+		std::vector<std::pair<int, int>>& GetEdges() { return edgeMap.GetEdgesAll(); }
 	};
 } /* Inference */
 } /* MediaVision */
diff --git a/mv_machine_learning/mv_inference/inference/include/OutputMetadata.h b/mv_machine_learning/mv_inference/inference/include/OutputMetadata.h
index 6724526..9385aa7 100644
--- a/mv_machine_learning/mv_inference/inference/include/OutputMetadata.h
+++ b/mv_machine_learning/mv_inference/inference/include/OutputMetadata.h
@@ -46,42 +46,20 @@ namespace mediavision
 {
 namespace inference
 {
-	struct Label
-	{
-		std::string name;
-		DimInfo dimInfo;
-	};
-
-	struct Number
-	{
-		std::string name;
-		DimInfo dimInfo;
-	};
-
 	class OutputMetadata
 	{
 	private:
 		bool parsed;
 		ScoreInfo score;
 		box::BoxInfo box;
-		Label label;
-		Number number;
 		Landmark landmark;
 		OffsetVec offsetVec;
-		std::vector<DispVec> dispVecs;
-		Edge edgeMap;
 		std::map<std::string, inference_tensor_shape_type_e> mSupportedShapeType;
 
 		int ParseScore(JsonObject *root);
 		int ParseBox(JsonObject *root);
-		int ParseLabel(JsonObject *root);
-		int ParseNumber(JsonObject *root);
-		int ParseBoxDecodeInfo(JsonObject *root);
 		int ParseLandmark(JsonObject *root);
-		int ParseLandmarkDecodeInfo(JsonObject *root);
 		int ParseOffset(JsonObject *root);
-		int ParseDisplacement(JsonObject *root);
-		int ParseEdgeMap(JsonObject * root);
 
 	public:
 		/**
@@ -120,10 +98,11 @@ namespace inference
 		std::vector<int> GetBoxOrder() { return box.GetOrder(); }
 		box::DecodeInfo& GetBoxDecodeInfo() { return box.GetDecodeInfo(); }
 		inference_box_type_e GetBoxType() { return box.GetType(); }
+		std::string GetBoxLabelName() { return box.GetLabelName(); }
+		std::string GetBoxNumberName() { return box.GetNumberName(); }
+		DimInfo GetBoxNumberDimInfo() { return box.GetNumberDimInfo(); }
+
 		int GetScoreCoordinate() { return box.GetCoordinate(); }
-		std::string GetLabelName() { return label.name; }
-		std::string GetNumberName() { return number.name; }
-		DimInfo GetNumberDimInfo() { return number.dimInfo; }
 		std::string GetLandmarkName() { return landmark.GetName(); }
 		int GetLandmarkOffset() { return landmark.GetOffset(); }
 		inference_landmark_type_e GetLandmarkType() { return landmark.GetType(); }
@@ -131,10 +110,10 @@ namespace inference
 		HeatMapInfo& GetLandmarkHeatMapInfo() { return landmark.GetHeatMapInfo(); }
 		inference_landmark_coorindate_type_e GetLandmarkCoordinate() { return landmark.GetCoordinate(); }
 		inference_landmark_decoding_type_e GetLandmarkDecodingType() { return landmark.GetDecodingType(); }
+		std::vector<DispVec>& GetLandmarkDispVecAll() { return landmark.GetDispVecAll(); }
+		std::vector<std::pair<int, int>>& GetLandmarkEdges() { return landmark.GetEdges(); }
 		std::string GetOffsetVecName() { return offsetVec.GetName(); }
 		inference_box_decoding_type_e GetBoxDecodingType() { return box.GetDecodingType(); }
-		std::vector<DispVec>& GetDispVecAll() { return dispVecs; }
-		std::vector<std::pair<int, int>>& GetEdges() { return edgeMap.GetEdgesAll(); }
 	};
 } /* Inference */
 } /* MediaVision */
diff --git a/mv_machine_learning/mv_inference/inference/include/ScoreInfo.h b/mv_machine_learning/mv_inference/inference/include/ScoreInfo.h
index 24180d7..e3e3393 100644
--- a/mv_machine_learning/mv_inference/inference/include/ScoreInfo.h
+++ b/mv_machine_learning/mv_inference/inference/include/ScoreInfo.h
@@ -24,6 +24,7 @@
 
 #include <mv_inference_type.h>
 #include "DimInfo.h"
+#include "Utils.h"
 
 namespace mediavision
 {
diff --git a/mv_machine_learning/mv_inference/inference/src/Inference.cpp b/mv_machine_learning/mv_inference/inference/src/Inference.cpp
index bf9a102..0308e49 100755
--- a/mv_machine_learning/mv_inference/inference/src/Inference.cpp
+++ b/mv_machine_learning/mv_inference/inference/src/Inference.cpp
@@ -451,11 +451,11 @@ namespace inference
 			if (!outputMeta.GetBoxName().empty())
 				mConfig.mOutputLayerNames.push_back(outputMeta.GetBoxName());
 
-			if (!outputMeta.GetLabelName().empty())
-				mConfig.mOutputLayerNames.push_back(outputMeta.GetLabelName());
+			if (!outputMeta.GetBoxLabelName().empty())
+				mConfig.mOutputLayerNames.push_back(outputMeta.GetBoxLabelName());
 
-			if (!outputMeta.GetNumberName().empty())
-				mConfig.mOutputLayerNames.push_back(outputMeta.GetNumberName());
+			if (!outputMeta.GetBoxNumberName().empty())
+				mConfig.mOutputLayerNames.push_back(outputMeta.GetBoxNumberName());
 
 			if (!outputMeta.GetLandmarkName().empty())
 				mConfig.mOutputLayerNames.push_back(outputMeta.GetLandmarkName());
@@ -463,7 +463,7 @@ namespace inference
 			if (!outputMeta.GetOffsetVecName().empty())
 				mConfig.mOutputLayerNames.push_back(outputMeta.GetOffsetVecName());
 
-			for (auto& dispVec : outputMeta.GetDispVecAll()) {
+			for (auto& dispVec : outputMeta.GetLandmarkDispVecAll()) {
 				mConfig.mOutputLayerNames.push_back(dispVec.GetName());
 			}
 		}
diff --git a/mv_machine_learning/mv_inference/inference/src/ObjectDecoder.cpp b/mv_machine_learning/mv_inference/inference/src/ObjectDecoder.cpp
index 4d5e36e..e631ff0 100755
--- a/mv_machine_learning/mv_inference/inference/src/ObjectDecoder.cpp
+++ b/mv_machine_learning/mv_inference/inference/src/ObjectDecoder.cpp
@@ -28,15 +28,15 @@ namespace inference
 	int ObjectDecoder::init()
 	{
 		if (mMeta.GetBoxDecodingType() == INFERENCE_BOX_DECODING_TYPE_BYPASS) {
-			if (!mTensorBuffer.exist(mMeta.GetLabelName()) ||
-				!mTensorBuffer.exist(mMeta.GetNumberName()) ) {
+			if (!mTensorBuffer.exist(mMeta.GetBoxLabelName()) ||
+				!mTensorBuffer.exist(mMeta.GetBoxNumberName()) ) {
 				LOGE("buffer buffers named of %s or %s are NULL",
-					mMeta.GetLabelName().c_str(), mMeta.GetNumberName().c_str());
+					mMeta.GetBoxLabelName().c_str(), mMeta.GetBoxNumberName().c_str());
 
 				return MEDIA_VISION_ERROR_INVALID_OPERATION;
 			}
 
-			std::vector<int> indexes = mMeta.GetNumberDimInfo().GetValidIndexAll();
+			std::vector<int> indexes = mMeta.GetBoxNumberDimInfo().GetValidIndexAll();
 			if (indexes.size() != 1) {
 				LOGE("Invalid dim size. It should be 1");
 				return MEDIA_VISION_ERROR_INVALID_OPERATION;
@@ -45,7 +45,7 @@ namespace inference
 			// mNumberOfObjects is set again if INFERENCE_BOX_DECODING_TYPE_BYPASS.
 			// Otherwise it is set already within ctor.
 			mNumberOfOjects = mTensorBuffer.getValue<int>(
-								mMeta.GetNumberName(), indexes[0]);
+								mMeta.GetBoxNumberName(), indexes[0]);
 		} else {
 			if (mMeta.GetBoxDecodeInfo().IsAnchorBoxEmpty()) {
 				LOGE("Anchor boxes are required but empty.");
@@ -101,9 +101,9 @@ namespace inference
 		}
 
 		Box box = {
-			.index = mMeta.GetLabelName().empty() ?
+			.index = mMeta.GetBoxLabelName().empty() ?
 						label :
-						mTensorBuffer.getValue<int>(mMeta.GetLabelName(), idx),
+						mTensorBuffer.getValue<int>(mMeta.GetBoxLabelName(), idx),
 			.score = score,
 			.location = cv::Rect2f(cx, cy, cWidth, cHeight)
 		};
diff --git a/mv_machine_learning/mv_inference/inference/src/OutputMetadata.cpp b/mv_machine_learning/mv_inference/inference/src/OutputMetadata.cpp
index 8a1362a..738116f 100755
--- a/mv_machine_learning/mv_inference/inference/src/OutputMetadata.cpp
+++ b/mv_machine_learning/mv_inference/inference/src/OutputMetadata.cpp
@@ -35,12 +35,8 @@ namespace inference
 			parsed(false),
 			score(),
 			box(),
-			label(),
-			number(),
 			landmark(),
-			offsetVec(),
-			dispVecs(),
-			edgeMap()
+			offsetVec()
 	{
 		// shape_type
 		mSupportedShapeType.insert({"NCHW", INFERENCE_TENSOR_SHAPE_NCHW});
@@ -87,141 +83,6 @@ namespace inference
 		return box.ParseBox(root);
 	}
 
-	int OutputMetadata::ParseLabel(JsonObject *root)
-	{
-		LOGI("ENTER");
-
-		if (!json_object_has_member(root, "label")) {
-			LOGE("No box outputmetadata");
-			LOGI("LEAVE");
-			return MEDIA_VISION_ERROR_INVALID_OPERATION;
-		}
-
-		JsonArray * rootArray = json_object_get_array_member(root, "label");
-		unsigned int elements = json_array_get_length(rootArray);
-
-		// TODO: handling error
-		for (unsigned int elem = 0; elem < elements; ++elem) {
-			JsonNode *pNode = json_array_get_element(rootArray, elem);
-			JsonObject *pObject = json_node_get_object(pNode);
-
-			label.name = json_object_get_string_member(pObject,"name");
-			LOGI("layer: %s", label.name.c_str());
-
-			JsonArray * array = json_object_get_array_member(pObject, "index");
-			unsigned int elements2 = json_array_get_length(array);
-			LOGI("range dim: size[%u]", elements2);
-			for (unsigned int elem2 = 0; elem2 < elements2; ++elem2) {
-				if (static_cast<int>(json_array_get_int_element(array, elem2)) == 1)
-					label.dimInfo.SetValidIndex(elem2);
-			}
-		}
-
-		LOGI("LEAVE");
-		return MEDIA_VISION_ERROR_NONE;
-	}
-
-	int OutputMetadata::ParseNumber(JsonObject *root)
-	{
-		LOGI("ENTER");
-
-		if (!json_object_has_member(root, "number")) {
-			LOGE("No number outputmetadata");
-			LOGI("LEAVE");
-			return MEDIA_VISION_ERROR_INVALID_OPERATION;
-		}
-
-		// box
-		JsonArray * rootArray = json_object_get_array_member(root, "number");
-		unsigned int elements = json_array_get_length(rootArray);
-
-		// TODO: handling error
-		for (unsigned int elem = 0; elem < elements; ++elem) {
-			JsonNode *pNode = json_array_get_element(rootArray, elem);
-			JsonObject *pObject = json_node_get_object(pNode);
-
-			number.name = json_object_get_string_member(pObject,"name");
-
-			LOGI("layer: %s", number.name.c_str());
-
-			JsonArray * array = json_object_get_array_member(pObject, "index");
-			unsigned int elements2 = json_array_get_length(array);
-
-			LOGI("range dim: size[%u]", elements2);
-
-			for (unsigned int elem2 = 0; elem2 < elements2; ++elem2)
-				if (static_cast<int>(json_array_get_int_element(array, elem2)) == 1)
-					number.dimInfo.SetValidIndex(elem2);
-		}
-
-		LOGI("LEAVE");
-		return MEDIA_VISION_ERROR_NONE;
-	}
-
-	int OutputMetadata::ParseBoxDecodeInfo(JsonObject *root)
-	{
-		LOGI("ENTER");
-
-		if (!json_object_has_member(root, "box")) {
-			LOGE("No box outputmetadata");
-			LOGI("LEAVE");
-			return MEDIA_VISION_ERROR_NONE;
-		}
-
-		// box
-		JsonArray * rootArray = json_object_get_array_member(root, "box");
-		unsigned int elements = json_array_get_length(rootArray);
-
-		// TODO: handling error
-		for (unsigned int elem = 0; elem < elements; ++elem) {
-			JsonNode *pNode = json_array_get_element(rootArray, elem);
-			JsonObject *pObject = json_node_get_object(pNode);
-
-			if (!json_object_has_member(pObject, "decoding_info")) {
-				LOGE("decoding_info is mandatory. Invalid metadata");
-				LOGI("LEAVE");
-
-				return MEDIA_VISION_ERROR_INVALID_OPERATION;
-			}
-
-			JsonObject *cObject = json_object_get_object_member(pObject, "decoding_info");
-			if (!json_object_has_member(cObject, "anchor")) {
-				LOGE("anchor is mandatory. Invalid metadata");
-				LOGI("LEAVE");
-
-				return MEDIA_VISION_ERROR_INVALID_OPERATION;
-			}
-
-			int ret = box.GetDecodeInfo().ParseAnchorParam(cObject);
-			if (ret != MEDIA_VISION_ERROR_NONE) {
-				LOGE("Fail to ParseAnchorParam[%d]", ret);
-				return ret;
-			}
-
-			ret = box.GetDecodeInfo().ParseNms(cObject);
-			if (ret != MEDIA_VISION_ERROR_NONE) {
-				LOGE("Fail to ParseNms[%d]", ret);
-				return ret;
-			}
-
-			ret = box.GetDecodeInfo().ParseRotate(cObject);
-			if (ret != MEDIA_VISION_ERROR_NONE) {
-				LOGE("Fail to ParseRotate[%d]", ret);
-				return ret;
-			}
-
-			ret = box.GetDecodeInfo().ParseRoiOption(cObject);
-			if (ret != MEDIA_VISION_ERROR_NONE) {
-				LOGE("Fail to ParseRoiOption[%d]", ret);
-				return ret;
-			}
-		}
-
-		LOGI("LEAVE");
-		return MEDIA_VISION_ERROR_NONE;
-
-	}
-
 	int DecodeInfo::ParseAnchorParam(JsonObject *root)
 	{
 		JsonObject *object = json_object_get_object_member(root, "anchor") ;
@@ -534,70 +395,6 @@ namespace inference
 		return MEDIA_VISION_ERROR_NONE;
 	}
 
-	int OutputMetadata::ParseLandmarkDecodeInfo(JsonObject *root)
-	{
-		LOGI("ENTER");
-
-		if (!json_object_has_member(root, "landmark")) {
-			LOGI("No landmark outputmetadata");
-			LOGI("LEAVE");
-			return MEDIA_VISION_ERROR_NONE;
-		}
-
-		// box
-		JsonArray * rootArray = json_object_get_array_member(root, "landmark");
-		unsigned int elements = json_array_get_length(rootArray);
-
-		// TODO: handling error
-		for (unsigned int elem = 0; elem < elements; ++elem) {
-
-			JsonNode *pNode = json_array_get_element(rootArray, elem);
-			JsonObject *pObject = json_node_get_object(pNode);
-
-			if (!json_object_has_member(pObject, "decoding_info")) {
-				LOGE("decoding_info is mandatory. Invalid metadata");
-				LOGI("LEAVE");
-
-				return MEDIA_VISION_ERROR_INVALID_OPERATION;
-			}
-
-			JsonObject *cObject = json_object_get_object_member(pObject, "decoding_info");
-			if (!json_object_has_member(cObject, "heatmap")) {
-				LOGE("heatmap is mandatory. Invalid metadata");
-				LOGI("LEAVE");
-
-				return MEDIA_VISION_ERROR_INVALID_OPERATION;
-			}
-
-			JsonObject *object = json_object_get_object_member(cObject, "heatmap") ;
-			try {
-				landmark.GetHeatMapInfo().shapeType = GetSupportedType(object, "shape_type", mSupportedShapeType);
-			} catch (const std::exception& e) {
-				LOGE("Invalid %s", e.what());
-				return MEDIA_VISION_ERROR_INVALID_OPERATION;
-			}
-
-			std::vector<int> heatMapIndexes = landmark.GetDimInfo().GetValidIndexAll();
-			if (landmark.GetHeatMapInfo().shapeType == INFERENCE_TENSOR_SHAPE_NCHW) {
-				landmark.GetHeatMapInfo().cIdx = heatMapIndexes[0];
-				landmark.GetHeatMapInfo().hIdx = heatMapIndexes[1];
-				landmark.GetHeatMapInfo().wIdx = heatMapIndexes[2];
-			} else {
-				landmark.GetHeatMapInfo().hIdx = heatMapIndexes[0];
-				landmark.GetHeatMapInfo().wIdx = heatMapIndexes[1];
-				landmark.GetHeatMapInfo().cIdx = heatMapIndexes[2];
-			}
-
-			if (json_object_has_member(object, "nms_radius")) {
-				landmark.GetHeatMapInfo().nmsRadius = static_cast<float>(json_object_get_double_member(object, "nms_radius"));
-				LOGI("nms is enabled with %3.f", landmark.GetHeatMapInfo().nmsRadius );
-			}
-		}
-
-		LOGI("LEAVE");
-		return MEDIA_VISION_ERROR_NONE;
-	}
-
 	int OutputMetadata::ParseOffset(JsonObject *root)
 	{
 		LOGI("ENTER");
@@ -614,48 +411,6 @@ namespace inference
 		return MEDIA_VISION_ERROR_NONE;
 	}
 
-	int OutputMetadata::ParseDisplacement(JsonObject *root)
-	{
-		LOGI("ENTER");
-
-		if (!json_object_has_member(root, "displacement")) {
-			LOGI("No displacement outputmetadata");
-			LOGI("LEAVE");
-			return MEDIA_VISION_ERROR_INVALID_OPERATION;
-		}
-
-		JsonArray * rootArray = json_object_get_array_member(root, "displacement");
-		unsigned int elements = json_array_get_length(rootArray);
-
-		dispVecs.resize(elements);
-		unsigned int elem = 0;
-		for (auto& disp : dispVecs) {
-			JsonNode *pNode = json_array_get_element(rootArray, elem++);
-			JsonObject *pObject = json_node_get_object(pNode);
-
-			disp.ParseDisplacement(pObject, mSupportedShapeType);
-		}
-
-		LOGI("LEAVE");
-		return MEDIA_VISION_ERROR_NONE;
-	}
-
-	int OutputMetadata::ParseEdgeMap(JsonObject * root)
-	{
-		LOGI("ENTER");
-
-		if (!json_object_has_member(root, "edgemap")) {
-			LOGI("No edgemap outputmetadata");
-			LOGI("LEAVE");
-			return MEDIA_VISION_ERROR_INVALID_OPERATION;
-		}
-
-		edgeMap.ParseEdge(root);
-
-		LOGI("LEAVE");
-		return MEDIA_VISION_ERROR_NONE;
-	}
-
 	int OutputMetadata::Parse(JsonObject *root)
 	{
 		LOGI("ENTER");
@@ -676,20 +431,20 @@ namespace inference
 			// addtional parsing is required according to decoding type
 			if (box.GetDecodingType() == INFERENCE_BOX_DECODING_TYPE_BYPASS) {
 
-				ret = ParseLabel(root);
+				ret = box.ParseLabel(root);
 				if (ret != MEDIA_VISION_ERROR_NONE) {
 					LOGE("Fail to GetLabel[%d]", ret);
 					return ret;
 				}
 
-				ret = ParseNumber(root);
+				ret = box.ParseNumber(root);
 				if (ret != MEDIA_VISION_ERROR_NONE) {
 					LOGE("Fail to GetNumber[%d]", ret);
 					return ret;
 				}
 
 			} else if (box.GetDecodingType() == INFERENCE_BOX_DECODING_TYPE_SSD_ANCHOR) {
-				ret = ParseBoxDecodeInfo(root);
+				ret = box.ParseDecodeInfo(root);
 				if (ret != MEDIA_VISION_ERROR_NONE) {
 					LOGE("Fail to GetBoxDecodeInfo[%d]", ret);
 					return ret;
@@ -714,7 +469,7 @@ namespace inference
 
 		if (!landmark.GetName().empty()) {
 			if (landmark.GetDecodingType() != INFERENCE_LANDMARK_DECODING_TYPE_BYPASS) {
-				ret = ParseLandmarkDecodeInfo(root);
+				ret = landmark.ParseDecodeInfo(root, mSupportedShapeType);
 				if (ret != MEDIA_VISION_ERROR_NONE) {
 					LOGE("Fail to GetLandmarkDecodeInfo[%d]", ret);
 					return ret;
@@ -728,13 +483,13 @@ namespace inference
 					return ret;
 				}
 
-				ret = ParseDisplacement(root);
+				ret = landmark.ParseDisplacement(root, mSupportedShapeType);
 				if (ret != MEDIA_VISION_ERROR_NONE) {
 					LOGE("Fail to GetDispVector[%d]", ret);
 					return ret;
 				}
 
-				ret = ParseEdgeMap(root);
+				ret = landmark.ParseEdgeMap(root);
 				if (ret != MEDIA_VISION_ERROR_NONE) {
 					LOGE("Fail to GetEdgeConnection[%d]", ret);
 					return ret;
diff --git a/mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp b/mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp
index a1efd2d..ca04829 100644
--- a/mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp
+++ b/mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp
@@ -366,9 +366,9 @@ namespace inference
 		LOGI("KeyId: [%d], heatMap: %d, %d", root.id, root.heatMapLoc.x, root.heatMapLoc.y);
 		LOGI("KeyId: [%d], decoded: %.4f, %.4f, score %.3f", root.id, root.decodedLoc.x, root.decodedLoc.y, root.score);
 
-		int index = static_cast<int>(mMeta.GetEdges().size()) - 1;
-		for (auto riter = mMeta.GetEdges().rbegin();
-			riter != mMeta.GetEdges().rend(); ++riter) {
+		int index = static_cast<int>(mMeta.GetLandmarkEdges().size()) - 1;
+		for (auto riter = mMeta.GetLandmarkEdges().rbegin();
+			riter != mMeta.GetLandmarkEdges().rend(); ++riter) {
 			int fromKeyId = riter->second;
 			int toKeyId = riter->first;
 
@@ -387,8 +387,8 @@ namespace inference
 		}
 
 		index = 0;
-		for (auto iter = mMeta.GetEdges().begin();
-			iter != mMeta.GetEdges().end(); ++iter) {
+		for (auto iter = mMeta.GetLandmarkEdges().begin();
+			iter != mMeta.GetLandmarkEdges().end(); ++iter) {
 			int fromKeyId = iter->first;
 			int toKeyId = iter->second;
 
@@ -459,16 +459,16 @@ namespace inference
 	{
 		LOGI("ENTER");
 
-		LOGI("edge size: %zd", mMeta.GetEdges().size());
+		LOGI("edge size: %zd", mMeta.GetLandmarkEdges().size());
 
 		int idxY = index.y * mHeatMapWidth
-					* static_cast<int>(mMeta.GetEdges().size()) * 2;
+					* static_cast<int>(mMeta.GetLandmarkEdges().size()) * 2;
 
-		idxY += index.x * static_cast<int>(mMeta.GetEdges().size()) * 2 + edgeId;
+		idxY += index.x * static_cast<int>(mMeta.GetLandmarkEdges().size()) * 2 + edgeId;
 
-		int idxX = idxY + static_cast<int>(mMeta.GetEdges().size());
+		int idxX = idxY + static_cast<int>(mMeta.GetLandmarkEdges().size());
 
-		for(auto& dispVec : mMeta.GetDispVecAll()){
+		for(auto& dispVec : mMeta.GetLandmarkDispVecAll()){
 			if (dispVec.GetType() == type) { // 0: forward
 				LOGI("%s", dispVec.GetName().c_str());
 				vector.x = mTensorBuffer.getValue<float>(dispVec.GetName(), idxX);
-- 
2.7.4


From cc15518fce51c6e7e289701f2ccb07faddcc29ed Mon Sep 17 00:00:00 2001
From: Inki Dae <inki.dae@samsung.com>
Date: Wed, 24 Nov 2021 17:39:23 +0900
Subject: [PATCH 07/16] mv_machine_learning: add SNPE inference engine support

[Version] : 0.9.0-0
[Issue type] : new feature

Change-Id: Iabfc4932d808296c8941bc8bbacf88b9fbe09616
Signed-off-by: Inki Dae <inki.dae@samsung.com>
---
 include/mv_inference_type.h                                  | 2 ++
 mv_machine_learning/mv_inference/inference/src/Inference.cpp | 7 +++++++
 packaging/capi-media-vision.spec                             | 4 ++--
 3 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/include/mv_inference_type.h b/include/mv_inference_type.h
index a259b77..5a6572d 100644
--- a/include/mv_inference_type.h
+++ b/include/mv_inference_type.h
@@ -65,6 +65,8 @@ typedef enum {
     MV_INFERENCE_BACKEND_ARMNN,     /**< ARMNN (Since 6.0) */
     MV_INFERENCE_BACKEND_MLAPI,     /**< ML Single API of NNStreamer (Since 6.0) */
     MV_INFERENCE_BACKEND_ONE,       /**< On-device Neural Engine (Since 6.0) */
+    MV_INFERENCE_BACKEND_NNTRAINER, /**< NNTrainer (Since 7.0) */
+    MV_INFERENCE_BACKEND_SNPE,      /**< SNPE Engine (Since 7.0) */
     MV_INFERENCE_BACKEND_MAX        /**< Backend MAX */
 } mv_inference_backend_type_e;
 
diff --git a/mv_machine_learning/mv_inference/inference/src/Inference.cpp b/mv_machine_learning/mv_inference/inference/src/Inference.cpp
index 0308e49..fdd0560 100755
--- a/mv_machine_learning/mv_inference/inference/src/Inference.cpp
+++ b/mv_machine_learning/mv_inference/inference/src/Inference.cpp
@@ -81,6 +81,8 @@ namespace inference
 	{
 		LOGI("ENTER");
 
+		// Mediavision can support several inference engines via ML Single API
+		// "mlapi" means that the inference backend is used via ML Single API.
 		mSupportedInferenceBackend.insert(std::make_pair(
 				MV_INFERENCE_BACKEND_OPENCV, std::make_pair("opencv", false)));
 		mSupportedInferenceBackend.insert(std::make_pair(
@@ -91,6 +93,8 @@ namespace inference
 				MV_INFERENCE_BACKEND_MLAPI, std::make_pair("mlapi", false)));
 		mSupportedInferenceBackend.insert(std::make_pair(
 				MV_INFERENCE_BACKEND_ONE, std::make_pair("mlapi", false)));
+		mSupportedInferenceBackend.insert(std::make_pair(
+				MV_INFERENCE_BACKEND_SNPE, std::make_pair("mlapi", false)));
 
 		CheckSupportedInferenceBackend();
 
@@ -115,6 +119,8 @@ namespace inference
 				std::make_pair<std::string, int>("onnx", INFERENCE_MODEL_ONNX));
 		mModelFormats.insert(std::make_pair<std::string, int>(
 				"nb", INFERENCE_MODEL_VIVANTE));
+		mModelFormats.insert(std::make_pair<std::string, int>(
+				"dlc", INFERENCE_MODEL_SNPE));
 
 		LOGI("LEAVE");
 	}
@@ -938,6 +944,7 @@ namespace inference
 			break;
 		case INFERENCE_MODEL_TFLITE:
 		case INFERENCE_MODEL_TORCH:
+		case INFERENCE_MODEL_SNPE:
 			models.push_back(mConfig.mWeightFilePath);
 			break;
 		default:
diff --git a/packaging/capi-media-vision.spec b/packaging/capi-media-vision.spec
index 789842c..1af9b65 100644
--- a/packaging/capi-media-vision.spec
+++ b/packaging/capi-media-vision.spec
@@ -1,7 +1,7 @@
 Name:        capi-media-vision
 Summary:     Media Vision library for Tizen Native API
-Version:     0.8.19
-Release:     1
+Version:     0.9.0
+Release:     0
 Group:       Multimedia/Framework
 License:     Apache-2.0 and BSD-3-Clause
 Source0:     %{name}-%{version}.tar.gz
-- 
2.7.4


From 1cae83e2c74cefc4e5fe1cb2bd88ad984e67c39a Mon Sep 17 00:00:00 2001
From: Inki Dae <inki.dae@samsung.com>
Date: Tue, 28 Dec 2021 19:31:33 +0900
Subject: [PATCH 08/16] test/machine_learning: add snpe model support

[Version] : 0.10.0-0
[Issue type] : new feature

Added a test case for SNPE engnie with dlc model.

Change-Id: I7d20f9974300130ddeaf4e8eb77482d89dee0b9d
Signed-off-by: Inki Dae <inki.dae@samsung.com>
---
 packaging/capi-media-vision.spec                   |  2 +-
 .../inference/test_face_detection.cpp              |  2 +-
 .../inference/test_image_classification.cpp        | 41 ++++++++++++++++++++++
 .../inference/test_inference_helper.cpp            | 28 +++++++++++++--
 .../inference/test_inference_helper.hpp            |  7 +++-
 .../inference/test_pose_landmark_detection.cpp     |  2 +-
 6 files changed, 75 insertions(+), 7 deletions(-)

diff --git a/packaging/capi-media-vision.spec b/packaging/capi-media-vision.spec
index 1af9b65..00147d8 100644
--- a/packaging/capi-media-vision.spec
+++ b/packaging/capi-media-vision.spec
@@ -1,6 +1,6 @@
 Name:        capi-media-vision
 Summary:     Media Vision library for Tizen Native API
-Version:     0.9.0
+Version:     0.10.0
 Release:     0
 Group:       Multimedia/Framework
 License:     Apache-2.0 and BSD-3-Clause
diff --git a/test/testsuites/machine_learning/inference/test_face_detection.cpp b/test/testsuites/machine_learning/inference/test_face_detection.cpp
index 376a717..59a357f 100644
--- a/test/testsuites/machine_learning/inference/test_face_detection.cpp
+++ b/test/testsuites/machine_learning/inference/test_face_detection.cpp
@@ -40,7 +40,7 @@ public:
 
 TEST_P(TestFaceDetection, CPU_TFLITE_MobilenetV1_SSD)
 {
-	engine_config_hosted_cpu_tflite(engine_cfg,
+	engine_config_hosted_model_config(engine_cfg,
 									FD_TFLITE_WEIGHT_MOBILENET_V1_SSD_300_PATH, _use_json_parser);
 	if (!_use_json_parser) {
 		const char *inputNodeName = "normalized_input_image_tensor";
diff --git a/test/testsuites/machine_learning/inference/test_image_classification.cpp b/test/testsuites/machine_learning/inference/test_image_classification.cpp
index cdda3f6..103f6df 100644
--- a/test/testsuites/machine_learning/inference/test_image_classification.cpp
+++ b/test/testsuites/machine_learning/inference/test_image_classification.cpp
@@ -31,6 +31,13 @@
 	MV_CONFIG_PATH                                   \
 	"/models/IC/tflite/quant_mobilenet_v1_224x224.tflite"
 
+#define IC_LABEL_INCEPTION_V3_299_PATH \
+	MV_CONFIG_PATH                     \
+	"/models/IC_Q/snpe/imagenet_slim_labels.txt"
+#define IC_SNPE_WEIGHT_QUANT_INCEPTION_V3_299_PATH \
+	MV_CONFIG_PATH                                   \
+	"/models/IC_Q/snpe/inception_v3_quantized.dlc"
+
 void _image_classified_cb(mv_source_h source, const int number_of_classes,
 						  const int *indices, const char **names,
 						  const float *confidences, void *user_data)
@@ -286,6 +293,40 @@ TEST_P(TestImageClassification, CPU_TFLITE_QUANT_MobilenetV1)
 	inferenceBanana();
 }
 
+TEST_P(TestImageClassification, SNPE_InceptionV3_Quantized)
+{
+	engine_config_hosted_cpu_snpe_user_model(
+			engine_cfg, IC_SNPE_WEIGHT_QUANT_INCEPTION_V3_299_PATH,
+			IC_LABEL_INCEPTION_V3_299_PATH,
+			_use_json_parser);
+
+	if (!_use_json_parser) {
+		const char *inputNodeName = "input";
+		const char *outputNodeName[] = { "output" };
+
+		ASSERT_EQ(mv_engine_config_set_int_attribute(engine_cfg, MV_INFERENCE_INPUT_DATA_TYPE, MV_INFERENCE_DATA_UINT8),
+					MEDIA_VISION_ERROR_NONE);
+		ASSERT_EQ(mv_engine_config_set_double_attribute(engine_cfg, MV_INFERENCE_MODEL_MEAN_VALUE, 127.5),
+					MEDIA_VISION_ERROR_NONE);
+		ASSERT_EQ(mv_engine_config_set_double_attribute(engine_cfg, MV_INFERENCE_MODEL_STD_VALUE, 127.5),
+					MEDIA_VISION_ERROR_NONE);
+		ASSERT_EQ(mv_engine_config_set_double_attribute(engine_cfg, MV_INFERENCE_CONFIDENCE_THRESHOLD, 0.0),
+					MEDIA_VISION_ERROR_NONE);
+		ASSERT_EQ(mv_engine_config_set_int_attribute(engine_cfg, MV_INFERENCE_INPUT_TENSOR_WIDTH, 299),
+					MEDIA_VISION_ERROR_NONE);
+		ASSERT_EQ(mv_engine_config_set_int_attribute(engine_cfg, MV_INFERENCE_INPUT_TENSOR_HEIGHT, 299),
+					MEDIA_VISION_ERROR_NONE);
+		ASSERT_EQ(mv_engine_config_set_int_attribute(engine_cfg, MV_INFERENCE_INPUT_TENSOR_CHANNELS, 3),
+					MEDIA_VISION_ERROR_NONE);
+		ASSERT_EQ(mv_engine_config_set_string_attribute(engine_cfg, MV_INFERENCE_INPUT_NODE_NAME,
+														inputNodeName), MEDIA_VISION_ERROR_NONE);
+		ASSERT_EQ(mv_engine_config_set_array_string_attribute(engine_cfg, MV_INFERENCE_OUTPUT_NODE_NAMES,
+															  outputNodeName, 1), MEDIA_VISION_ERROR_NONE);
+	}
+
+	inferenceBanana();
+}
+
 INSTANTIATE_TEST_CASE_P(Prefix, TestImageClassification,
 						::testing::Values(
 							ParamTypeOne(false),
diff --git a/test/testsuites/machine_learning/inference/test_inference_helper.cpp b/test/testsuites/machine_learning/inference/test_inference_helper.cpp
index 81a0380..9d5c95b 100644
--- a/test/testsuites/machine_learning/inference/test_inference_helper.cpp
+++ b/test/testsuites/machine_learning/inference/test_inference_helper.cpp
@@ -15,7 +15,7 @@ TestInference::~TestInference()
 	EXPECT_EQ(mv_destroy_engine_config(engine_cfg), MEDIA_VISION_ERROR_NONE);
 }
 
-void engine_config_hosted_cpu_tflite(mv_engine_config_h handle,
+void engine_config_hosted_model_config(mv_engine_config_h handle,
 									 const char *tf_weight,
 									 const bool use_json_parser)
 {
@@ -32,6 +32,14 @@ void engine_config_hosted_cpu_tflite(mv_engine_config_h handle,
 						handle, MV_INFERENCE_MODEL_META_FILE_PATH , meta_file_path.c_str()),
 				MEDIA_VISION_ERROR_NONE);
 	}
+}
+
+void engine_config_hosted_cpu_tflite_user_model(mv_engine_config_h handle,
+												const char *tf_weight,
+												const char *user_file,
+												const bool use_json_parser)
+{
+	engine_config_hosted_model_config(handle, tf_weight, use_json_parser);
 
 	EXPECT_EQ(mv_engine_config_set_int_attribute(handle,
 												 MV_INFERENCE_BACKEND_TYPE,
@@ -41,14 +49,28 @@ void engine_config_hosted_cpu_tflite(mv_engine_config_h handle,
 												 MV_INFERENCE_TARGET_TYPE,
 												 MV_INFERENCE_TARGET_CPU),
 			  MEDIA_VISION_ERROR_NONE);
+
+	EXPECT_EQ(mv_engine_config_set_string_attribute(
+					  handle, MV_INFERENCE_MODEL_USER_FILE_PATH, user_file),
+			  MEDIA_VISION_ERROR_NONE);
 }
 
-void engine_config_hosted_cpu_tflite_user_model(mv_engine_config_h handle,
+void engine_config_hosted_cpu_snpe_user_model(mv_engine_config_h handle,
 												const char *tf_weight,
 												const char *user_file,
 												const bool use_json_parser)
 {
-	engine_config_hosted_cpu_tflite(handle, tf_weight, use_json_parser);
+	engine_config_hosted_model_config(handle, tf_weight, use_json_parser);
+
+	EXPECT_EQ(mv_engine_config_set_int_attribute(handle,
+												 MV_INFERENCE_BACKEND_TYPE,
+												 MV_INFERENCE_BACKEND_SNPE),
+			  MEDIA_VISION_ERROR_NONE);
+	EXPECT_EQ(mv_engine_config_set_int_attribute(handle,
+												 MV_INFERENCE_TARGET_TYPE,
+												 MV_INFERENCE_TARGET_CPU),
+			  MEDIA_VISION_ERROR_NONE);
+
 	EXPECT_EQ(mv_engine_config_set_string_attribute(
 					  handle, MV_INFERENCE_MODEL_USER_FILE_PATH, user_file),
 			  MEDIA_VISION_ERROR_NONE);
diff --git a/test/testsuites/machine_learning/inference/test_inference_helper.hpp b/test/testsuites/machine_learning/inference/test_inference_helper.hpp
index a04fb00..3023d81 100644
--- a/test/testsuites/machine_learning/inference/test_inference_helper.hpp
+++ b/test/testsuites/machine_learning/inference/test_inference_helper.hpp
@@ -23,7 +23,7 @@ public:
 	mv_source_h mv_source;
 };
 
-void engine_config_hosted_cpu_tflite(mv_engine_config_h handle,
+void engine_config_hosted_model_config(mv_engine_config_h handle,
 									 const char *tf_weight,
 									 const bool use_json_parser);
 
@@ -32,4 +32,9 @@ void engine_config_hosted_cpu_tflite_user_model(mv_engine_config_h handle,
 												const char *user_file,
 												const bool use_json_parser);
 
+void engine_config_hosted_cpu_snpe_user_model(mv_engine_config_h handle,
+												const char *tf_weight,
+												const char *user_file,
+												const bool use_json_parser);
+
 #endif //__TEST_INFERENCE_HELPER_HPP__
diff --git a/test/testsuites/machine_learning/inference/test_pose_landmark_detection.cpp b/test/testsuites/machine_learning/inference/test_pose_landmark_detection.cpp
index 58c4b43..623903a 100644
--- a/test/testsuites/machine_learning/inference/test_pose_landmark_detection.cpp
+++ b/test/testsuites/machine_learning/inference/test_pose_landmark_detection.cpp
@@ -39,7 +39,7 @@ public:
 
 TEST_P(TestPoseLandmarkDetection, CPU_TFLITE_MobilenetV1)
 {
-	engine_config_hosted_cpu_tflite(
+	engine_config_hosted_model_config(
 			engine_cfg, PLD_TFLITE_WEIGHT_MOBILENET_V1_POSENET_257_PATH, _use_json_parser);
 
 	if (!_use_json_parser) {
-- 
2.7.4


From a32be06de5a36e685309e056c556fd9b135a8786 Mon Sep 17 00:00:00 2001
From: Inki Dae <inki.dae@samsung.com>
Date: Fri, 31 Dec 2021 10:50:53 +0900
Subject: [PATCH 09/16] mv_maching_learning: deprecate MLAPI and MAX types

[Version] : 0.11.0-0
[Issue type] : cleanup

Deprecated MV_INFERENCE_BACKEND_MLAPI and MV_INFERENCE_BACKEND_MAX types.
MV_INFERENCE_BACKEND_MLAPI is not backend type but other API framework,
and another may break the binary compatibility when adding a new enumeration
value because MAX value is different from one of the binary
So drop these two types.

Change-Id: I391cd0d4b713e3d35fe263f1567f4cea3df60630
Signed-off-by: Inki Dae <inki.dae@samsung.com>
---
 include/mv_inference_type.h                                         | 6 +++---
 .../mv_inference/inference/src/mv_inference_open.cpp                | 2 ++
 packaging/capi-media-vision.spec                                    | 2 +-
 3 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/include/mv_inference_type.h b/include/mv_inference_type.h
index 5a6572d..1794b83 100644
--- a/include/mv_inference_type.h
+++ b/include/mv_inference_type.h
@@ -48,7 +48,7 @@ extern "C" {
  *                             run efficiently without modification on Embedded hardware.
  *                             (https://developer.arm.com/ip-products/processors/machine-learning/arm-nn)
  * #MV_INFERENCE_BACKEND_MLAPI Samsung-introduced open source ML single API framework of NNStreamer, which
- *                             runs various NN models via tensor filters of NNStreamer.
+ *                             runs various NN models via tensor filters of NNStreamer. (Deprecated since 7.0)
  *                             (https://github.com/nnstreamer/nnstreamer)
  * #MV_INFERENCE_BACKEND_ONE Samsung-introduced open source inference engine called On-device Neural Engine, which
  *                           performs inference of a given NN model on various devices such as CPU, GPU, DSP and NPU.
@@ -63,11 +63,11 @@ typedef enum {
     MV_INFERENCE_BACKEND_OPENCV,    /**< OpenCV */
     MV_INFERENCE_BACKEND_TFLITE,    /**< TensorFlow-Lite */
     MV_INFERENCE_BACKEND_ARMNN,     /**< ARMNN (Since 6.0) */
-    MV_INFERENCE_BACKEND_MLAPI,     /**< ML Single API of NNStreamer (Since 6.0) */
+    MV_INFERENCE_BACKEND_MLAPI,     /**< @deprecated ML Single API of NNStreamer (Deprecated since 7.0) */
     MV_INFERENCE_BACKEND_ONE,       /**< On-device Neural Engine (Since 6.0) */
     MV_INFERENCE_BACKEND_NNTRAINER, /**< NNTrainer (Since 7.0) */
     MV_INFERENCE_BACKEND_SNPE,      /**< SNPE Engine (Since 7.0) */
-    MV_INFERENCE_BACKEND_MAX        /**< Backend MAX */
+    MV_INFERENCE_BACKEND_MAX        /**< @deprecated Backend MAX (Deprecated since 7.0) */
 } mv_inference_backend_type_e;
 
 /**
diff --git a/mv_machine_learning/mv_inference/inference/src/mv_inference_open.cpp b/mv_machine_learning/mv_inference/inference/src/mv_inference_open.cpp
index a9fd490..5faa3ad 100644
--- a/mv_machine_learning/mv_inference/inference/src/mv_inference_open.cpp
+++ b/mv_machine_learning/mv_inference/inference/src/mv_inference_open.cpp
@@ -125,6 +125,8 @@ static bool IsValidBackendType(const int backend_type)
 
 static bool IsConfigFilePathRequired(const int target_device_type, const int backend_type)
 {
+	LOGW("DEPRECATION WARNING : MV_INFERENCE_BACKEND_MLAPI type is deprecated and will be removed from next release.");
+
 	// In case of MV_INFERENCE_TARGET_DEVICE_CUSTOM via MLAPI backend, config file path is required.
 	return (backend_type == MV_INFERENCE_BACKEND_MLAPI &&
 			target_device_type & MV_INFERENCE_TARGET_DEVICE_CUSTOM);
diff --git a/packaging/capi-media-vision.spec b/packaging/capi-media-vision.spec
index 00147d8..50fcffc 100644
--- a/packaging/capi-media-vision.spec
+++ b/packaging/capi-media-vision.spec
@@ -1,6 +1,6 @@
 Name:        capi-media-vision
 Summary:     Media Vision library for Tizen Native API
-Version:     0.10.0
+Version:     0.11.0
 Release:     0
 Group:       Multimedia/Framework
 License:     Apache-2.0 and BSD-3-Clause
-- 
2.7.4


From 5f741065916c718fd6a4b30f5532552cf6ef5d44 Mon Sep 17 00:00:00 2001
From: Tae-Young Chung <ty83.chung@samsung.com>
Date: Mon, 20 Dec 2021 18:01:24 +0900
Subject: [PATCH 10/16] inference: support movenet

[Version] 0.12.0
[Issue type] new feature

Movenet models with variants, Lightning3/4 and Thunder3/4, are supported.
Models can be downloaded from tfhub.dev and
meta files for the models are added as examples.
In addition, testsuite with those Movenet models are updated.

Change-Id: I1c526cba688842e408fad1c84ba7dff4d0320e80
Signed-off-by: Tae-Young Chung <ty83.chung@samsung.com>
---
 meta-template/pld_movenet_lightning3_192x192.json  | 46 ++++++++++++
 .../pld_movenet_lightning3_192x192_int8_quant.json | 46 ++++++++++++
 meta-template/pld_movenet_lightning4_192x192.json  | 46 ++++++++++++
 .../pld_movenet_lightning4_192x192_int8_quant.json | 46 ++++++++++++
 meta-template/pld_movenet_thunder3_256x256.json    | 46 ++++++++++++
 .../pld_movenet_thunder3_256x256_int8_quant.json   | 46 ++++++++++++
 meta-template/pld_movenet_thunder4_256x256.json    | 46 ++++++++++++
 .../pld_movenet_thunder4_256x256_int8_quant.json   | 46 ++++++++++++
 .../mv_inference/inference/include/Landmark.h      |  1 +
 .../inference/include/OutputMetadataTypes.h        |  1 +
 .../mv_inference/inference/src/Inference.cpp       |  7 +-
 .../mv_inference/inference/src/OutputMetadata.cpp  |  3 +-
 .../mv_inference/inference/src/PoseDecoder.cpp     | 24 ++++++-
 packaging/capi-media-vision.spec                   |  2 +-
 .../inference/inference_test_suite.c               | 84 ++++++++++++++++++++++
 15 files changed, 484 insertions(+), 6 deletions(-)
 create mode 100644 meta-template/pld_movenet_lightning3_192x192.json
 create mode 100644 meta-template/pld_movenet_lightning3_192x192_int8_quant.json
 create mode 100644 meta-template/pld_movenet_lightning4_192x192.json
 create mode 100644 meta-template/pld_movenet_lightning4_192x192_int8_quant.json
 create mode 100644 meta-template/pld_movenet_thunder3_256x256.json
 create mode 100644 meta-template/pld_movenet_thunder3_256x256_int8_quant.json
 create mode 100644 meta-template/pld_movenet_thunder4_256x256.json
 create mode 100644 meta-template/pld_movenet_thunder4_256x256_int8_quant.json

diff --git a/meta-template/pld_movenet_lightning3_192x192.json b/meta-template/pld_movenet_lightning3_192x192.json
new file mode 100644
index 0000000..f40c6ff
--- /dev/null
+++ b/meta-template/pld_movenet_lightning3_192x192.json
@@ -0,0 +1,46 @@
+{
+    "inputmetadata" :
+    {
+        "tensor_info" : [
+            {
+                "name" : "input:0",
+                "shape_type" : "NHWC",
+                "shape_dims" : [ 1, 192, 192, 3],
+                "data_type" : "FLOAT32",
+                "color_space" : "RGB888"
+            }
+        ],
+        "preprocess" : [
+            {
+                "normalization" : [
+                    {
+                        "mean" : [0.0, 0.0, 0.0],
+                        "std" : [1.0, 1.0, 1.0]
+                    }
+                ]
+            }
+        ]
+    },
+    "outputmetadata" :
+    {
+        "score" : [
+            {
+                "name" : "Identity:0",
+                "index" : [-1, -1, -1, 1],
+                "top_number" : 1,
+                "threshold" : 0.5,
+                "score_type" : "NORMAL"
+            }
+        ],
+        "landmark" : [
+            {
+                "name" : "Identity:0",
+                "index" : [-1, -1, 1, -1],
+                "landmark_type" : "2D_SINGLE",
+                "landmark_coordinate" : "RATIO",
+                "decoding_type" : "BYPASS_MULTICHANNEL",
+                "landmark_offset" : 3
+            }
+        ]
+    }
+}
diff --git a/meta-template/pld_movenet_lightning3_192x192_int8_quant.json b/meta-template/pld_movenet_lightning3_192x192_int8_quant.json
new file mode 100644
index 0000000..f40c6ff
--- /dev/null
+++ b/meta-template/pld_movenet_lightning3_192x192_int8_quant.json
@@ -0,0 +1,46 @@
+{
+    "inputmetadata" :
+    {
+        "tensor_info" : [
+            {
+                "name" : "input:0",
+                "shape_type" : "NHWC",
+                "shape_dims" : [ 1, 192, 192, 3],
+                "data_type" : "FLOAT32",
+                "color_space" : "RGB888"
+            }
+        ],
+        "preprocess" : [
+            {
+                "normalization" : [
+                    {
+                        "mean" : [0.0, 0.0, 0.0],
+                        "std" : [1.0, 1.0, 1.0]
+                    }
+                ]
+            }
+        ]
+    },
+    "outputmetadata" :
+    {
+        "score" : [
+            {
+                "name" : "Identity:0",
+                "index" : [-1, -1, -1, 1],
+                "top_number" : 1,
+                "threshold" : 0.5,
+                "score_type" : "NORMAL"
+            }
+        ],
+        "landmark" : [
+            {
+                "name" : "Identity:0",
+                "index" : [-1, -1, 1, -1],
+                "landmark_type" : "2D_SINGLE",
+                "landmark_coordinate" : "RATIO",
+                "decoding_type" : "BYPASS_MULTICHANNEL",
+                "landmark_offset" : 3
+            }
+        ]
+    }
+}
diff --git a/meta-template/pld_movenet_lightning4_192x192.json b/meta-template/pld_movenet_lightning4_192x192.json
new file mode 100644
index 0000000..bcbf9d0
--- /dev/null
+++ b/meta-template/pld_movenet_lightning4_192x192.json
@@ -0,0 +1,46 @@
+{
+    "inputmetadata" :
+    {
+        "tensor_info" : [
+            {
+                "name" : "serving_default_input_0:0",
+                "shape_type" : "NHWC",
+                "shape_dims" : [ 1, 192, 192, 3],
+                "data_type" : "FLOAT32",
+                "color_space" : "RGB888"
+            }
+        ],
+        "preprocess" : [
+            {
+                "normalization" : [
+                    {
+                        "mean" : [0.0, 0.0, 0.0],
+                        "std" : [1.0, 1.0, 1.0]
+                    }
+                ]
+            }
+        ]
+    },
+    "outputmetadata" :
+    {
+        "score" : [
+            {
+                "name" : "StatefulPartitionedCall_0:0",
+                "index" : [-1, -1, -1, 1],
+                "top_number" : 1,
+                "threshold" : 0.5,
+                "score_type" : "NORMAL"
+            }
+        ],
+        "landmark" : [
+            {
+                "name" : "StatefulPartitionedCall_0:0",
+                "index" : [-1, -1, 1, -1],
+                "landmark_type" : "2D_SINGLE",
+                "landmark_coordinate" : "RATIO",
+                "decoding_type" : "BYPASS_MULTICHANNEL",
+                "landmark_offset" : 3
+            }
+        ]
+    }
+}
diff --git a/meta-template/pld_movenet_lightning4_192x192_int8_quant.json b/meta-template/pld_movenet_lightning4_192x192_int8_quant.json
new file mode 100644
index 0000000..bcbf9d0
--- /dev/null
+++ b/meta-template/pld_movenet_lightning4_192x192_int8_quant.json
@@ -0,0 +1,46 @@
+{
+    "inputmetadata" :
+    {
+        "tensor_info" : [
+            {
+                "name" : "serving_default_input_0:0",
+                "shape_type" : "NHWC",
+                "shape_dims" : [ 1, 192, 192, 3],
+                "data_type" : "FLOAT32",
+                "color_space" : "RGB888"
+            }
+        ],
+        "preprocess" : [
+            {
+                "normalization" : [
+                    {
+                        "mean" : [0.0, 0.0, 0.0],
+                        "std" : [1.0, 1.0, 1.0]
+                    }
+                ]
+            }
+        ]
+    },
+    "outputmetadata" :
+    {
+        "score" : [
+            {
+                "name" : "StatefulPartitionedCall_0:0",
+                "index" : [-1, -1, -1, 1],
+                "top_number" : 1,
+                "threshold" : 0.5,
+                "score_type" : "NORMAL"
+            }
+        ],
+        "landmark" : [
+            {
+                "name" : "StatefulPartitionedCall_0:0",
+                "index" : [-1, -1, 1, -1],
+                "landmark_type" : "2D_SINGLE",
+                "landmark_coordinate" : "RATIO",
+                "decoding_type" : "BYPASS_MULTICHANNEL",
+                "landmark_offset" : 3
+            }
+        ]
+    }
+}
diff --git a/meta-template/pld_movenet_thunder3_256x256.json b/meta-template/pld_movenet_thunder3_256x256.json
new file mode 100644
index 0000000..5d22c4a
--- /dev/null
+++ b/meta-template/pld_movenet_thunder3_256x256.json
@@ -0,0 +1,46 @@
+{
+    "inputmetadata" :
+    {
+        "tensor_info" : [
+            {
+                "name" : "input:0",
+                "shape_type" : "NHWC",
+                "shape_dims" : [ 1, 256, 256, 3],
+                "data_type" : "FLOAT32",
+                "color_space" : "RGB888"
+            }
+        ],
+        "preprocess" : [
+            {
+                "normalization" : [
+                    {
+                        "mean" : [0.0, 0.0, 0.0],
+                        "std" : [1.0, 1.0, 1.0]
+                    }
+                ]
+            }
+        ]
+    },
+    "outputmetadata" :
+    {
+        "score" : [
+            {
+                "name" : "Identity:0",
+                "index" : [-1, -1, -1, 1],
+                "top_number" : 1,
+                "threshold" : 0.5,
+                "score_type" : "NORMAL"
+            }
+        ],
+        "landmark" : [
+            {
+                "name" : "Identity:0",
+                "index" : [-1, -1, 1, -1],
+                "landmark_type" : "2D_SINGLE",
+                "landmark_coordinate" : "RATIO",
+                "decoding_type" : "BYPASS_MULTICHANNEL",
+                "landmark_offset" : 3
+            }
+        ]
+    }
+}
diff --git a/meta-template/pld_movenet_thunder3_256x256_int8_quant.json b/meta-template/pld_movenet_thunder3_256x256_int8_quant.json
new file mode 100644
index 0000000..5d22c4a
--- /dev/null
+++ b/meta-template/pld_movenet_thunder3_256x256_int8_quant.json
@@ -0,0 +1,46 @@
+{
+    "inputmetadata" :
+    {
+        "tensor_info" : [
+            {
+                "name" : "input:0",
+                "shape_type" : "NHWC",
+                "shape_dims" : [ 1, 256, 256, 3],
+                "data_type" : "FLOAT32",
+                "color_space" : "RGB888"
+            }
+        ],
+        "preprocess" : [
+            {
+                "normalization" : [
+                    {
+                        "mean" : [0.0, 0.0, 0.0],
+                        "std" : [1.0, 1.0, 1.0]
+                    }
+                ]
+            }
+        ]
+    },
+    "outputmetadata" :
+    {
+        "score" : [
+            {
+                "name" : "Identity:0",
+                "index" : [-1, -1, -1, 1],
+                "top_number" : 1,
+                "threshold" : 0.5,
+                "score_type" : "NORMAL"
+            }
+        ],
+        "landmark" : [
+            {
+                "name" : "Identity:0",
+                "index" : [-1, -1, 1, -1],
+                "landmark_type" : "2D_SINGLE",
+                "landmark_coordinate" : "RATIO",
+                "decoding_type" : "BYPASS_MULTICHANNEL",
+                "landmark_offset" : 3
+            }
+        ]
+    }
+}
diff --git a/meta-template/pld_movenet_thunder4_256x256.json b/meta-template/pld_movenet_thunder4_256x256.json
new file mode 100644
index 0000000..d28d5c7
--- /dev/null
+++ b/meta-template/pld_movenet_thunder4_256x256.json
@@ -0,0 +1,46 @@
+{
+    "inputmetadata" :
+    {
+        "tensor_info" : [
+            {
+                "name" : "serving_default_input:0",
+                "shape_type" : "NHWC",
+                "shape_dims" : [ 1, 256, 256, 3],
+                "data_type" : "FLOAT32",
+                "color_space" : "RGB888"
+            }
+        ],
+        "preprocess" : [
+            {
+                "normalization" : [
+                    {
+                        "mean" : [0.0, 0.0, 0.0],
+                        "std" : [1.0, 1.0, 1.0]
+                    }
+                ]
+            }
+        ]
+    },
+    "outputmetadata" :
+    {
+        "score" : [
+            {
+                "name" : "StatefulPartitionedCall:0",
+                "index" : [-1, -1, -1, 1],
+                "top_number" : 1,
+                "threshold" : 0.5,
+                "score_type" : "NORMAL"
+            }
+        ],
+        "landmark" : [
+            {
+                "name" : "StatefulPartitionedCall:0",
+                "index" : [-1, -1, 1, -1],
+                "landmark_type" : "2D_SINGLE",
+                "landmark_coordinate" : "RATIO",
+                "decoding_type" : "BYPASS_MULTICHANNEL",
+                "landmark_offset" : 3
+            }
+        ]
+    }
+}
diff --git a/meta-template/pld_movenet_thunder4_256x256_int8_quant.json b/meta-template/pld_movenet_thunder4_256x256_int8_quant.json
new file mode 100644
index 0000000..d28d5c7
--- /dev/null
+++ b/meta-template/pld_movenet_thunder4_256x256_int8_quant.json
@@ -0,0 +1,46 @@
+{
+    "inputmetadata" :
+    {
+        "tensor_info" : [
+            {
+                "name" : "serving_default_input:0",
+                "shape_type" : "NHWC",
+                "shape_dims" : [ 1, 256, 256, 3],
+                "data_type" : "FLOAT32",
+                "color_space" : "RGB888"
+            }
+        ],
+        "preprocess" : [
+            {
+                "normalization" : [
+                    {
+                        "mean" : [0.0, 0.0, 0.0],
+                        "std" : [1.0, 1.0, 1.0]
+                    }
+                ]
+            }
+        ]
+    },
+    "outputmetadata" :
+    {
+        "score" : [
+            {
+                "name" : "StatefulPartitionedCall:0",
+                "index" : [-1, -1, -1, 1],
+                "top_number" : 1,
+                "threshold" : 0.5,
+                "score_type" : "NORMAL"
+            }
+        ],
+        "landmark" : [
+            {
+                "name" : "StatefulPartitionedCall:0",
+                "index" : [-1, -1, 1, -1],
+                "landmark_type" : "2D_SINGLE",
+                "landmark_coordinate" : "RATIO",
+                "decoding_type" : "BYPASS_MULTICHANNEL",
+                "landmark_offset" : 3
+            }
+        ]
+    }
+}
diff --git a/mv_machine_learning/mv_inference/inference/include/Landmark.h b/mv_machine_learning/mv_inference/inference/include/Landmark.h
index 26dda7f..4aae027 100644
--- a/mv_machine_learning/mv_inference/inference/include/Landmark.h
+++ b/mv_machine_learning/mv_inference/inference/include/Landmark.h
@@ -99,6 +99,7 @@ namespace inference
 			supportedLandmarkCoordinateTypes.insert({"PIXEL", INFERENCE_LANDMARK_COORDINATE_TYPE_PIXEL});
 
 			supportedLandmarkDecodingTypes.insert({"BYPASS", INFERENCE_LANDMARK_DECODING_TYPE_BYPASS});
+			supportedLandmarkDecodingTypes.insert({"BYPASS_MULTICHANNEL", INFERENCE_LANDMARK_DECODING_TYPE_BYPASS_MULTICHANNEL});
 			supportedLandmarkDecodingTypes.insert({"HEATMAP", INFERENCE_LANDMARK_DECODING_TYPE_HEATMAP});
 			supportedLandmarkDecodingTypes.insert({"HEATMAP_REFINE", INFERENCE_LANDMARK_DECODING_TYPE_HEATMAP_REFINE});
 		}
diff --git a/mv_machine_learning/mv_inference/inference/include/OutputMetadataTypes.h b/mv_machine_learning/mv_inference/inference/include/OutputMetadataTypes.h
index 440fa76..7ce558b 100644
--- a/mv_machine_learning/mv_inference/inference/include/OutputMetadataTypes.h
+++ b/mv_machine_learning/mv_inference/inference/include/OutputMetadataTypes.h
@@ -67,6 +67,7 @@ namespace inference
 
 	typedef enum {
 		INFERENCE_LANDMARK_DECODING_TYPE_BYPASS,
+		INFERENCE_LANDMARK_DECODING_TYPE_BYPASS_MULTICHANNEL,
 		INFERENCE_LANDMARK_DECODING_TYPE_HEATMAP,
 		INFERENCE_LANDMARK_DECODING_TYPE_HEATMAP_REFINE
 	} inference_landmark_decoding_type_e;
diff --git a/mv_machine_learning/mv_inference/inference/src/Inference.cpp b/mv_machine_learning/mv_inference/inference/src/Inference.cpp
index fdd0560..8cb63c8 100755
--- a/mv_machine_learning/mv_inference/inference/src/Inference.cpp
+++ b/mv_machine_learning/mv_inference/inference/src/Inference.cpp
@@ -1542,6 +1542,8 @@ namespace inference
 				LOGI("landmark dim size: %zd and idx[0] is %d", channelIndexes.size(), channelIndexes[0]);
 				number_of_landmarks = mOutputLayerProperty.layers[outputMeta.GetLandmarkName()].shape[channelIndexes[0]]
 									/ outputMeta.GetLandmarkOffset();
+			} else if (outputMeta.GetLandmarkDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS_MULTICHANNEL) {
+				number_of_landmarks = mOutputLayerProperty.layers[outputMeta.GetLandmarkName()].shape[channelIndexes[0]];
 			} else {
 				heatMapWidth = mOutputLayerProperty.layers[outputMeta.GetLandmarkName()].shape[outputMeta.GetLandmarkHeatMapInfo().wIdx];
 				heatMapHeight = mOutputLayerProperty.layers[outputMeta.GetLandmarkName()].shape[outputMeta.GetLandmarkHeatMapInfo().hIdx];
@@ -1635,7 +1637,8 @@ namespace inference
 			int heatMapHeight = 0;
 			int heatMapChannel = 0;
 
-			if (outputMeta.GetLandmarkDecodingType() != INFERENCE_LANDMARK_DECODING_TYPE_BYPASS) {
+			if (outputMeta.GetLandmarkDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_HEATMAP ||
+				outputMeta.GetLandmarkDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_HEATMAP_REFINE) {
 				heatMapWidth = mOutputLayerProperty.layers[outputMeta.GetLandmarkName()].shape[outputMeta.GetLandmarkHeatMapInfo().wIdx];
 				heatMapHeight = mOutputLayerProperty.layers[outputMeta.GetLandmarkName()].shape[outputMeta.GetLandmarkHeatMapInfo().hIdx];
 				heatMapChannel = mOutputLayerProperty.layers[outputMeta.GetLandmarkName()].shape[outputMeta.GetLandmarkHeatMapInfo().cIdx];
@@ -1652,6 +1655,8 @@ namespace inference
 
 			if (outputMeta.GetLandmarkDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS)
 				landmarkChannel = mOutputLayerProperty.layers[outputMeta.GetLandmarkName()].shape[channelIndexes[0]] / outputMeta.GetLandmarkOffset();
+			else if (outputMeta.GetLandmarkDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS_MULTICHANNEL)
+				landmarkChannel = mOutputLayerProperty.layers[outputMeta.GetLandmarkName()].shape[channelIndexes[0]];
 
 			poseResult->number_of_landmarks_per_pose = mUserListName.empty() ? landmarkChannel :
 														static_cast<int>(mUserListName.size());
diff --git a/mv_machine_learning/mv_inference/inference/src/OutputMetadata.cpp b/mv_machine_learning/mv_inference/inference/src/OutputMetadata.cpp
index 738116f..391b265 100755
--- a/mv_machine_learning/mv_inference/inference/src/OutputMetadata.cpp
+++ b/mv_machine_learning/mv_inference/inference/src/OutputMetadata.cpp
@@ -468,7 +468,8 @@ namespace inference
 		}
 
 		if (!landmark.GetName().empty()) {
-			if (landmark.GetDecodingType() != INFERENCE_LANDMARK_DECODING_TYPE_BYPASS) {
+			if (landmark.GetDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_HEATMAP ||
+				landmark.GetDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_HEATMAP_REFINE) {
 				ret = landmark.ParseDecodeInfo(root, mSupportedShapeType);
 				if (ret != MEDIA_VISION_ERROR_NONE) {
 					LOGE("Fail to GetLandmarkDecodeInfo[%d]", ret);
diff --git a/mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp b/mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp
index ca04829..e1596aa 100644
--- a/mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp
+++ b/mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp
@@ -56,7 +56,8 @@ namespace inference
 			return MEDIA_VISION_ERROR_INVALID_OPERATION;
 		}
 
-		if (mMeta.GetLandmarkDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS) {
+		if (mMeta.GetLandmarkDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS ||
+			mMeta.GetLandmarkDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS_MULTICHANNEL) {
 			LOGI("Skip init");
 			return MEDIA_VISION_ERROR_NONE;
 		}
@@ -245,14 +246,16 @@ namespace inference
 			mMeta.GetLandmarkType() == INFERENCE_LANDMARK_TYPE_3D_SINGLE) {
 			mPoseLandmarks.resize(1);
 
-			if (mMeta.GetLandmarkDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS) {
+			if (mMeta.GetLandmarkDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS ||
+				mMeta.GetLandmarkDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS_MULTICHANNEL) {
 				mPoseLandmarks[0].landmarks.resize(mNumberOfLandmarks);
 			} else {
 				mPoseLandmarks[0].landmarks.resize(mHeatMapChannel);
 			}
 		}
 
-		if (mMeta.GetLandmarkDecodingType() != INFERENCE_LANDMARK_DECODING_TYPE_BYPASS) {
+		if (mMeta.GetLandmarkDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_HEATMAP ||
+			mMeta.GetLandmarkDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_HEATMAP_REFINE) {
 			while (!mCandidates.empty()) {
 
 				LandmarkPoint &root = mCandidates.front();
@@ -311,6 +314,21 @@ namespace inference
 			for (auto& pose : mPoseLandmarks) {
 				pose.score /= static_cast<float>(mHeatMapChannel);
 			}
+		} else if (mMeta.GetLandmarkDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS_MULTICHANNEL) {
+			int landmarkOffset = mMeta.GetLandmarkOffset();
+			for (int idx = 0; idx < mNumberOfLandmarks; ++idx) {
+					float py = mTensorBuffer.getValue<float>(mMeta.GetLandmarkName(), idx * landmarkOffset);
+					float px = mTensorBuffer.getValue<float>(mMeta.GetLandmarkName(), idx * landmarkOffset + 1);
+					float pscore = mTensorBuffer.getValue<float>(mMeta.GetScoreName(), idx * landmarkOffset + 2);
+
+					mPoseLandmarks[0].landmarks[idx].score = pscore;
+					mPoseLandmarks[0].landmarks[idx].heatMapLoc = cv::Point(-1, -1);
+					mPoseLandmarks[0].landmarks[idx].decodedLoc = cv::Point2f(px / scaleWidth, py / scaleHeight);
+					mPoseLandmarks[0].landmarks[idx].id = idx;
+					mPoseLandmarks[0].landmarks[idx].valid =  true;
+
+					LOGI("idx[%d]: %.4f, %.4f, score: %.4f", idx, px, py, pscore);
+			}
 		} else {
 			// multi pose is not supported
 			std::vector<int> scoreIndexes = mMeta.GetScoreDimInfo().GetValidIndexAll();
diff --git a/packaging/capi-media-vision.spec b/packaging/capi-media-vision.spec
index 50fcffc..2aeff26 100644
--- a/packaging/capi-media-vision.spec
+++ b/packaging/capi-media-vision.spec
@@ -1,6 +1,6 @@
 Name:        capi-media-vision
 Summary:     Media Vision library for Tizen Native API
-Version:     0.11.0
+Version:     0.12.0
 Release:     0
 Group:       Multimedia/Framework
 License:     Apache-2.0 and BSD-3-Clause
diff --git a/test/testsuites/machine_learning/inference/inference_test_suite.c b/test/testsuites/machine_learning/inference/inference_test_suite.c
index 188524d..681afab 100644
--- a/test/testsuites/machine_learning/inference/inference_test_suite.c
+++ b/test/testsuites/machine_learning/inference/inference_test_suite.c
@@ -224,6 +224,42 @@
 #define PLD_TFLITE_WEIGHT_INT8_MOVENET_PATH \
 	"/usr/share/capi-media-vision/models/PLD/tflite/pld_int8_movenet.tflite"
 
+#define PLD_TFLITE_WEIGHT_MOVENET_THUNDER3_256_FLOAT32_PATH \
+	"/usr/share/capi-media-vision/models/PLD/tflite/pld_movenet_thunder3_256x256.tflite"
+#define PLD_TFLITE_META_MOVENET_THUNDER3_256_FLOAT32_PATH \
+	"/usr/share/capi-media-vision/models/PLD/tflite/pld_movenet_thunder3_256x256.json"
+#define PLD_TFLITE_WEIGHT_MOVENET_LIGHTNING3_192_FLOAT32_PATH \
+	"/usr/share/capi-media-vision/models/PLD/tflite/pld_movenet_lightning3_192x192.tflite"
+#define PLD_TFLITE_META_MOVENET_LIGHTNING3_192_FLOAT32_PATH \
+	"/usr/share/capi-media-vision/models/PLD/tflite/pld_movenet_lightning3_192x192.json"
+
+#define PLD_TFLITE_WEIGHT_MOVENET_THUNDER3_256_INT8_QUANT_PATH \
+	"/usr/share/capi-media-vision/models/PLD/tflite/pld_movenet_thunder3_256x256_int8_quant.tflite"
+#define PLD_TFLITE_META_MOVENET_THUNDER3_256_INT8_QUANT_PATH \
+	"/usr/share/capi-media-vision/models/PLD/tflite/pld_movenet_thunder3_256x256_int8_quant.json"
+#define PLD_TFLITE_WEIGHT_MOVENET_LIGHTNING3_192_INT8_QUANT_PATH \
+	"/usr/share/capi-media-vision/models/PLD/tflite/pld_movenet_lightning3_192x192_int8_quant.tflite"
+#define PLD_TFLITE_META_MOVENET_LIGHTNING3_192_INT8_QUANT_PATH \
+	"/usr/share/capi-media-vision/models/PLD/tflite/pld_movenet_lightning3_192x192_int8_quant.json"
+
+#define PLD_TFLITE_WEIGHT_MOVENET_THUNDER4_256_FLOAT32_PATH \
+	"/usr/share/capi-media-vision/models/PLD/tflite/pld_movenet_thunder4_256x256.tflite"
+#define PLD_TFLITE_META_MOVENET_THUNDER4_256_FLOAT32_PATH \
+	"/usr/share/capi-media-vision/models/PLD/tflite/pld_movenet_thunder4_256x256.json"
+#define PLD_TFLITE_WEIGHT_MOVENET_LIGHTNING4_192_FLOAT32_PATH \
+	"/usr/share/capi-media-vision/models/PLD/tflite/pld_movenet_lightning4_192x192.tflite"
+#define PLD_TFLITE_META_MOVENET_LIGHTNING4_192_FLOAT32_PATH \
+	"/usr/share/capi-media-vision/models/PLD/tflite/pld_movenet_lightning4_192x192.json"
+
+#define PLD_TFLITE_WEIGHT_MOVENET_THUNDER4_256_INT8_QUANT_PATH \
+	"/usr/share/capi-media-vision/models/PLD/tflite/pld_movenet_thunder4_256x256_int8_quant.tflite"
+#define PLD_TFLITE_META_MOVENET_THUNDER4_256_INT8_QUANT_PATH \
+	"/usr/share/capi-media-vision/models/PLD/tflite/pld_movenet_thunder4_256x256_int8_quant.json"
+#define PLD_TFLITE_WEIGHT_MOVENET_LIGHTNING4_192_INT8_QUANT_PATH \
+	"/usr/share/capi-media-vision/models/PLD/tflite/pld_movenet_lightning4_192x192_int8_quant.tflite"
+#define PLD_TFLITE_META_MOVENET_LIGHTNING4_192_INT8_QUANT_PATH \
+	"/usr/share/capi-media-vision/models/PLD/tflite/pld_movenet_lightning4_192x192_int8_quant.json"
+
 #define TASK_IC 0
 #define TASK_OD 1
 #define TASK_FD 2
@@ -1622,6 +1658,14 @@ int perform_pose_landmark_detection()
 		"Hosted[o]: TFLITE(CPU) + CPM",
 		"Hosted[o]: TFLITE(CPU) + MOBILENET_V1_POSENET",
 		"Hosted[x]: TFLITE(CPU) + INT8_MOVENET",
+		"Hosted[o]: TFLITE(CPU) + FLOAT32_MOVENET_THUNDER3",
+		"Hosted[o]: TFLITE(CPU) + FLOAT32_MOVENET_LIGHTNING3",
+		"Hosted[o]: TFLITE(CPU) + INT8_QUANT_MOVENET_THUNDER3",
+		"Hosted[o]: TFLITE(CPU) + INT8_QUANT_MOVENET_LIGHTNING3",
+		"Hosted[o]: TFLITE(CPU) + FLOAT32_MOVENET_THUNDER4",
+		"Hosted[o]: TFLITE(CPU) + FLOAT32_MOVENET_LIGHTNING4",
+		"Hosted[o]: TFLITE(CPU) + INT8_QUANT_MOVENET_THUNDER4",
+		"Hosted[o]: TFLITE(CPU) + INT8_QUANT_MOVENET_LIGHTNING4",
 	};
 
 	int sel_opt = show_menu_linear("Select Action:", names, ARRAY_SIZE(names));
@@ -1655,6 +1699,46 @@ int perform_pose_landmark_detection()
 				engine_cfg, PLD_TFLITE_WEIGHT_INT8_MOVENET_PATH,
 				NULL);
 	} break;
+	case 5: {
+		err = engine_config_hosted_tflite_cpu(
+				engine_cfg, PLD_TFLITE_WEIGHT_MOVENET_THUNDER3_256_FLOAT32_PATH,
+				PLD_TFLITE_META_MOVENET_THUNDER3_256_FLOAT32_PATH);
+	} break;
+	case 6: {
+		err = engine_config_hosted_tflite_cpu(
+				engine_cfg, PLD_TFLITE_WEIGHT_MOVENET_LIGHTNING3_192_FLOAT32_PATH,
+				PLD_TFLITE_META_MOVENET_LIGHTNING3_192_FLOAT32_PATH);
+	} break;
+	case 7: {
+		err = engine_config_hosted_tflite_cpu(
+				engine_cfg, PLD_TFLITE_WEIGHT_MOVENET_THUNDER3_256_INT8_QUANT_PATH,
+				PLD_TFLITE_META_MOVENET_THUNDER3_256_INT8_QUANT_PATH);
+	} break;
+	case 8: {
+		err = engine_config_hosted_tflite_cpu(
+				engine_cfg, PLD_TFLITE_WEIGHT_MOVENET_LIGHTNING3_192_INT8_QUANT_PATH,
+				PLD_TFLITE_META_MOVENET_LIGHTNING3_192_INT8_QUANT_PATH);
+	} break;
+	case 9: {
+		err = engine_config_hosted_tflite_cpu(
+				engine_cfg, PLD_TFLITE_WEIGHT_MOVENET_THUNDER4_256_FLOAT32_PATH,
+				PLD_TFLITE_META_MOVENET_THUNDER4_256_FLOAT32_PATH);
+	} break;
+	case 10: {
+		err = engine_config_hosted_tflite_cpu(
+				engine_cfg, PLD_TFLITE_WEIGHT_MOVENET_LIGHTNING4_192_FLOAT32_PATH,
+				PLD_TFLITE_META_MOVENET_LIGHTNING4_192_FLOAT32_PATH);
+	} break;
+	case 11: {
+		err = engine_config_hosted_tflite_cpu(
+				engine_cfg, PLD_TFLITE_WEIGHT_MOVENET_THUNDER4_256_INT8_QUANT_PATH,
+				PLD_TFLITE_META_MOVENET_THUNDER4_256_INT8_QUANT_PATH);
+	} break;
+	case 12: {
+		err = engine_config_hosted_tflite_cpu(
+				engine_cfg, PLD_TFLITE_WEIGHT_MOVENET_LIGHTNING4_192_INT8_QUANT_PATH,
+				PLD_TFLITE_META_MOVENET_LIGHTNING4_192_INT8_QUANT_PATH);
+	} break;
 	}
 	if (err != MEDIA_VISION_ERROR_NONE) {
 		printf("Fail to perform config [err:%i]\n", err);
-- 
2.7.4


From 53b0a9274aae3b2d06b27e62f3b0b1070a3a0423 Mon Sep 17 00:00:00 2001
From: Inki Dae <inki.dae@samsung.com>
Date: Tue, 11 Jan 2022 20:26:14 +0900
Subject: [PATCH 11/16] test/machine_learning: fix device attribute type

[Version] : 0.12.1-0
[Issue type] : bug fix

Fixed device attribute type. Legacy device attribute type has been
deprecated so use new one.

Change-Id: Ia31a5be01595e6d39f3dff783e5b4d0982987e86
Signed-off-by: Inki Dae <inki.dae@samsung.com>
---
 packaging/capi-media-vision.spec                               |  2 +-
 .../machine_learning/inference/test_inference_helper.cpp       | 10 +++++-----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/packaging/capi-media-vision.spec b/packaging/capi-media-vision.spec
index 2aeff26..8af3d73 100644
--- a/packaging/capi-media-vision.spec
+++ b/packaging/capi-media-vision.spec
@@ -1,6 +1,6 @@
 Name:        capi-media-vision
 Summary:     Media Vision library for Tizen Native API
-Version:     0.12.0
+Version:     0.12.1
 Release:     0
 Group:       Multimedia/Framework
 License:     Apache-2.0 and BSD-3-Clause
diff --git a/test/testsuites/machine_learning/inference/test_inference_helper.cpp b/test/testsuites/machine_learning/inference/test_inference_helper.cpp
index 9d5c95b..c041f48 100644
--- a/test/testsuites/machine_learning/inference/test_inference_helper.cpp
+++ b/test/testsuites/machine_learning/inference/test_inference_helper.cpp
@@ -46,8 +46,8 @@ void engine_config_hosted_cpu_tflite_user_model(mv_engine_config_h handle,
 												 MV_INFERENCE_BACKEND_TFLITE),
 			  MEDIA_VISION_ERROR_NONE);
 	EXPECT_EQ(mv_engine_config_set_int_attribute(handle,
-												 MV_INFERENCE_TARGET_TYPE,
-												 MV_INFERENCE_TARGET_CPU),
+												 MV_INFERENCE_TARGET_DEVICE_TYPE,
+												 MV_INFERENCE_TARGET_DEVICE_CPU),
 			  MEDIA_VISION_ERROR_NONE);
 
 	EXPECT_EQ(mv_engine_config_set_string_attribute(
@@ -67,11 +67,11 @@ void engine_config_hosted_cpu_snpe_user_model(mv_engine_config_h handle,
 												 MV_INFERENCE_BACKEND_SNPE),
 			  MEDIA_VISION_ERROR_NONE);
 	EXPECT_EQ(mv_engine_config_set_int_attribute(handle,
-												 MV_INFERENCE_TARGET_TYPE,
-												 MV_INFERENCE_TARGET_CPU),
+												 MV_INFERENCE_TARGET_DEVICE_TYPE,
+												 MV_INFERENCE_TARGET_DEVICE_CPU),
 			  MEDIA_VISION_ERROR_NONE);
 
 	EXPECT_EQ(mv_engine_config_set_string_attribute(
 					  handle, MV_INFERENCE_MODEL_USER_FILE_PATH, user_file),
 			  MEDIA_VISION_ERROR_NONE);
-}
\ No newline at end of file
+}
-- 
2.7.4


From af72d3db5f2a1894756ba92a9427f5bd560aac91 Mon Sep 17 00:00:00 2001
From: Kwang Son <k.son@samsung.com>
Date: Wed, 19 Jan 2022 23:15:07 -0500
Subject: [PATCH 12/16] packaging: Change gcov object install path

[Version] : 0.12.2-0
[Issue type] : feature request

Change gcov install path with append package prefix for better
directory structure.

Change-Id: Icf586a264e1f8a37db31c134e5e2ea9f82968e21
Signed-off-by: Kwang Son <k.son@samsung.com>
---
 packaging/capi-media-vision.spec | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/packaging/capi-media-vision.spec b/packaging/capi-media-vision.spec
index 8af3d73..70a2bc1 100644
--- a/packaging/capi-media-vision.spec
+++ b/packaging/capi-media-vision.spec
@@ -1,6 +1,6 @@
 Name:        capi-media-vision
 Summary:     Media Vision library for Tizen Native API
-Version:     0.12.1
+Version:     0.12.2
 Release:     0
 Group:       Multimedia/Framework
 License:     Apache-2.0 and BSD-3-Clause
@@ -229,8 +229,8 @@ cp media-vision-config.json %{buildroot}%{_datadir}/%{name}/
 %make_install
 
 %if 0%{?gcov:1}
-mkdir -p %{buildroot}%{_datadir}/gcov/obj
-install -m 0644 gcov-obj/* %{buildroot}%{_datadir}/gcov/obj
+mkdir -p %{buildroot}%{_datadir}/gcov/obj/%{name}
+install -m 0644 gcov-obj/* %{buildroot}%{_datadir}/gcov/obj/%{name}
 %endif
 
 %post -p /sbin/ldconfig
-- 
2.7.4


From e3ebe78d9e3ac4512fca897da7c1e0eedf4752cc Mon Sep 17 00:00:00 2001
From: Tae-Young Chung <ty83.chung@samsung.com>
Date: Fri, 21 Jan 2022 09:28:32 +0900
Subject: [PATCH 13/16] face/image: Add opencv's tracking header files

[Version] 0.12.3-0
[Issue type] Update

From OpenCV-4.5.3, a tracking module is moved to contrib
and some tracking modes is changed to legacy.

Change-Id: Ibdfcc89abf5f10a8d874da81ab3fc4601cec5423
Signed-off-by: Tae-Young Chung <ty83.chung@samsung.com>
---
 mv_face/face/include/FaceTracker.h               | 3 ++-
 mv_image/image/include/Tracking/ObjectTracker.h  | 1 +
 mv_surveillance/surveillance/include/MFTracker.h | 1 +
 packaging/capi-media-vision.spec                 | 2 +-
 4 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/mv_face/face/include/FaceTracker.h b/mv_face/face/include/FaceTracker.h
index 5266fea..210cc41 100644
--- a/mv_face/face/include/FaceTracker.h
+++ b/mv_face/face/include/FaceTracker.h
@@ -44,6 +44,7 @@
 
 #include <opencv2/core.hpp>
 #include <opencv2/tracking.hpp>
+#include <opencv2/tracking/tracking_legacy.hpp>
 
 namespace cv {
 
@@ -59,7 +60,7 @@ by authors to outperform MIL). During the implementation period the code at
 <http://www.aonsquared.co.uk/node/5>, the courtesy of the author Arthur Amarra, was used for the
 reference purpose.
  */
-class FaceTracker : public TrackerMedianFlow {
+class FaceTracker : public legacy::TrackerMedianFlow {
 public:
 	struct Params {
 		/**
diff --git a/mv_image/image/include/Tracking/ObjectTracker.h b/mv_image/image/include/Tracking/ObjectTracker.h
index e3e6894..9486b51 100644
--- a/mv_image/image/include/Tracking/ObjectTracker.h
+++ b/mv_image/image/include/Tracking/ObjectTracker.h
@@ -18,6 +18,7 @@
 #define __MEDIA_VISION_OBJECTTRACKER_H__
 
 #include <opencv2/core.hpp>
+#include <opencv2/tracking.hpp>
 
 namespace MediaVision {
 namespace Image {
diff --git a/mv_surveillance/surveillance/include/MFTracker.h b/mv_surveillance/surveillance/include/MFTracker.h
index 4a58714..b93267a 100644
--- a/mv_surveillance/surveillance/include/MFTracker.h
+++ b/mv_surveillance/surveillance/include/MFTracker.h
@@ -18,6 +18,7 @@
 #define __MEDIA_VISION_MFTRACKER_H__
 
 #include <opencv2/core.hpp>
+#include <opencv2/tracking.hpp>
 
 namespace mediavision {
 namespace surveillance {
diff --git a/packaging/capi-media-vision.spec b/packaging/capi-media-vision.spec
index 70a2bc1..7bb6988 100644
--- a/packaging/capi-media-vision.spec
+++ b/packaging/capi-media-vision.spec
@@ -1,6 +1,6 @@
 Name:        capi-media-vision
 Summary:     Media Vision library for Tizen Native API
-Version:     0.12.2
+Version:     0.12.3
 Release:     0
 Group:       Multimedia/Framework
 License:     Apache-2.0 and BSD-3-Clause
-- 
2.7.4


From da602b92861bfa60532443f68a2e46f6c43e9993 Mon Sep 17 00:00:00 2001
From: Tae-Young Chung <ty83.chung@samsung.com>
Date: Tue, 22 Feb 2022 15:30:50 +0900
Subject: [PATCH 14/16] Fix bugs

[Version] 0.12.4-0
[Issue type] bug fix

1. Fix INVARIANT_RESULT.OP_ZERO
 - In case of GetLandmarkType(), its' return values are limited to the
landmark type enumeration so that checking the return value of
GetLandmarkType() is meaningless
2. Check null after malloc()
3. Initialize member variable in class ctor

Change-Id: I0e23298ffcb7760eb86e24e37347eae5e992e421
Signed-off-by: Tae-Young Chung <ty83.chung@samsung.com>
---
 mv_face/face/src/mv_face_open.cpp                  | 22 ++++++++++++----------
 .../mv_inference/inference/src/PoseDecoder.cpp     |  6 ------
 packaging/capi-media-vision.spec                   |  2 +-
 test/testsuites/image/image_test_suite.c           |  4 ++++
 .../inference/test_inference_helper.cpp            |  3 ++-
 .../surveillance/surveillance_test_suite.c         |  4 ++++
 6 files changed, 23 insertions(+), 18 deletions(-)

diff --git a/mv_face/face/src/mv_face_open.cpp b/mv_face/face/src/mv_face_open.cpp
index 443fd71..fb4496b 100644
--- a/mv_face/face/src/mv_face_open.cpp
+++ b/mv_face/face/src/mv_face_open.cpp
@@ -694,19 +694,21 @@ int mv_face_recognition_model_query_labels_open(
 				static_cast<FaceRecognitionModel*>(recognition_model);
 
 	const std::set<int>& learnedLabels = pRecModel->getFaceLabels();
-	*number_of_labels = learnedLabels.size();
-
-	if ((*number_of_labels)) {
-		(*labels) = (int*)malloc(sizeof(int) * (*number_of_labels));
-
-		std::set<int>::const_iterator it = learnedLabels.begin();
-		int i = 0;
-		for (; it != learnedLabels.end(); ++it) {
-			(*labels)[i] = *it;
-			++i;
+	auto _number_of_labels = learnedLabels.size();
+	int *_pLabels = NULL;
+	if (_number_of_labels) {
+		_pLabels = (int*)malloc(sizeof(int) * (_number_of_labels));
+		if(_pLabels == NULL) {
+			LOGE("Fail to alloc memory for %zu labels", _number_of_labels);
+			return MEDIA_VISION_ERROR_OUT_OF_MEMORY;
 		}
+
+		std::copy(learnedLabels.begin(), learnedLabels.end(), _pLabels);
 	}
 
+	*number_of_labels = _number_of_labels;
+	*labels = _pLabels;
+
 	LOGD("List of the labels learned by the recognition model has been retrieved");
 	return MEDIA_VISION_ERROR_NONE;
 }
diff --git a/mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp b/mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp
index e1596aa..09c10d8 100644
--- a/mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp
+++ b/mv_machine_learning/mv_inference/inference/src/PoseDecoder.cpp
@@ -50,12 +50,6 @@ namespace inference
 	{
 		LOGI("ENTER");
 
-		if (mMeta.GetLandmarkType() < INFERENCE_LANDMARK_TYPE_2D_SINGLE ||
-			mMeta.GetLandmarkType() > INFERENCE_LANDMARK_TYPE_3D_SINGLE) {
-			LOGE("Not supported landmark type");
-			return MEDIA_VISION_ERROR_INVALID_OPERATION;
-		}
-
 		if (mMeta.GetLandmarkDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS ||
 			mMeta.GetLandmarkDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS_MULTICHANNEL) {
 			LOGI("Skip init");
diff --git a/packaging/capi-media-vision.spec b/packaging/capi-media-vision.spec
index 7bb6988..a8cd2a9 100644
--- a/packaging/capi-media-vision.spec
+++ b/packaging/capi-media-vision.spec
@@ -1,6 +1,6 @@
 Name:        capi-media-vision
 Summary:     Media Vision library for Tizen Native API
-Version:     0.12.3
+Version:     0.12.4
 Release:     0
 Group:       Multimedia/Framework
 License:     Apache-2.0 and BSD-3-Clause
diff --git a/test/testsuites/image/image_test_suite.c b/test/testsuites/image/image_test_suite.c
index f1ff0de..a18ef5a 100644
--- a/test/testsuites/image/image_test_suite.c
+++ b/test/testsuites/image/image_test_suite.c
@@ -1742,6 +1742,10 @@ void perform_recognition_cases(GArray *image_objects)
 			}
 
 			mv_image_object_h *objects_pool = malloc(sizeof(mv_image_object_h) * image_objects->len);
+			if (objects_pool == NULL) {
+				printf("\nFail to alloc memory for %u objects.\n", image_objects->len);
+				break;
+			}
 			int index = 0;
 			for (; index < image_objects->len; ++index)
 				objects_pool[index] = g_array_index(image_objects, testing_object_h, index)->entity;
diff --git a/test/testsuites/machine_learning/inference/test_inference_helper.cpp b/test/testsuites/machine_learning/inference/test_inference_helper.cpp
index c041f48..ac427e6 100644
--- a/test/testsuites/machine_learning/inference/test_inference_helper.cpp
+++ b/test/testsuites/machine_learning/inference/test_inference_helper.cpp
@@ -2,7 +2,8 @@
 #include <image_helper.h>
 #include "test_inference_helper.hpp"
 
-TestInference::TestInference()
+TestInference::TestInference() :
+		_use_json_parser(false)
 {
 	EXPECT_EQ(mv_create_engine_config(&engine_cfg), MEDIA_VISION_ERROR_NONE);
 	EXPECT_EQ(mv_inference_create(&infer), MEDIA_VISION_ERROR_NONE);
diff --git a/test/testsuites/surveillance/surveillance_test_suite.c b/test/testsuites/surveillance/surveillance_test_suite.c
index 0a86b98..5b7810c 100644
--- a/test/testsuites/surveillance/surveillance_test_suite.c
+++ b/test/testsuites/surveillance/surveillance_test_suite.c
@@ -454,6 +454,10 @@ void add_roi_to_event(mv_surveillance_event_trigger_h event_trigger)
 		PRINT_R("Incorrect input! Try again.");
 
 	mv_point_s* roi = (mv_point_s*) malloc(sizeof(mv_point_s) * number_of_roi_points);
+	if (roi == NULL) {
+		PRINT_E("Fail to alloc roi err[%d].", MEDIA_VISION_ERROR_OUT_OF_MEMORY);
+		return;
+	}
 
 	int x = 0;
 	int y = 0;
-- 
2.7.4


From f5c8d848e03d7a629520c762a2785de2a96238af Mon Sep 17 00:00:00 2001
From: sangho park <sangho.g.park@samsung.com>
Date: Tue, 29 Mar 2022 11:07:41 +0900
Subject: [PATCH 15/16] fix cmake_minimum_required() deprecation warning.

[Version] : 0.12.5-0
[Issue type] : Bug fix

Change-Id: I6b178d31b204ebdc38860bd82ed942867782ee0f
Signed-off-by: sangho park <sangho.g.park@samsung.com>
---
 CMakeLists.txt                                            | 2 +-
 mv_barcode/barcode_detector/CMakeLists.txt                | 2 +-
 mv_barcode/barcode_generator/CMakeLists.txt               | 2 +-
 mv_common/CMakeLists.txt                                  | 2 +-
 mv_face/face/CMakeLists.txt                               | 2 +-
 mv_image/CMakeLists.txt                                   | 2 +-
 mv_image/image/CMakeLists.txt                             | 2 +-
 mv_machine_learning/mv_inference/inference/CMakeLists.txt | 2 +-
 mv_surveillance/surveillance/CMakeLists.txt               | 2 +-
 packaging/capi-media-vision.spec                          | 2 +-
 test/CMakeLists.txt                                       | 2 +-
 test/assessment/CMakeLists.txt                            | 2 +-
 test/assessment/barcode/CMakeLists.txt                    | 2 +-
 test/assessment/face/CMakeLists.txt                       | 2 +-
 test/assessment/surveillance/CMakeLists.txt               | 2 +-
 test/testsuites/CMakeLists.txt                            | 2 +-
 test/testsuites/common/image_helper/CMakeLists.txt        | 2 +-
 test/testsuites/common/testsuite_common/CMakeLists.txt    | 2 +-
 test/testsuites/common/video_helper/CMakeLists.txt        | 2 +-
 test/testsuites/face/CMakeLists.txt                       | 2 +-
 test/testsuites/image/CMakeLists.txt                      | 2 +-
 test/testsuites/machine_learning/CMakeLists.txt           | 2 +-
 test/testsuites/machine_learning/inference/CMakeLists.txt | 2 +-
 test/testsuites/surveillance/CMakeLists.txt               | 2 +-
 24 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 9421c35..fb5b84e 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,6 +1,6 @@
 set(fw_name "capi-media-vision")
 project(${fw_name})
-cmake_minimum_required(VERSION 2.6)
+cmake_minimum_required(VERSION 2.6...3.13)
 
 option(BUILD_ML_ONLY "Build mv_machine_learning only" OFF)
 
diff --git a/mv_barcode/barcode_detector/CMakeLists.txt b/mv_barcode/barcode_detector/CMakeLists.txt
index 8840abc..b67afb1 100644
--- a/mv_barcode/barcode_detector/CMakeLists.txt
+++ b/mv_barcode/barcode_detector/CMakeLists.txt
@@ -1,5 +1,5 @@
 project(${MV_BARCODE_DETECTOR_LIB_NAME})
-cmake_minimum_required(VERSION 2.6)
+cmake_minimum_required(VERSION 2.6...3.13)
 
 file(GLOB MV_BARCODE_DET_SRC_LIST "${PROJECT_SOURCE_DIR}/src/*.cpp"
                                   "${PROJECT_SOURCE_DIR}/src/*.c")
diff --git a/mv_barcode/barcode_generator/CMakeLists.txt b/mv_barcode/barcode_generator/CMakeLists.txt
index ca40696..099ae2c 100644
--- a/mv_barcode/barcode_generator/CMakeLists.txt
+++ b/mv_barcode/barcode_generator/CMakeLists.txt
@@ -1,5 +1,5 @@
 project(${MV_BARCODE_GENERATOR_LIB_NAME})
-cmake_minimum_required(VERSION 2.6)
+cmake_minimum_required(VERSION 2.6...3.13)
 
 file(GLOB MV_BARCODE_GEN_SRC_LIST "${PROJECT_SOURCE_DIR}/src/*.cpp"
                                   "${PROJECT_SOURCE_DIR}/src/*.c")
diff --git a/mv_common/CMakeLists.txt b/mv_common/CMakeLists.txt
index 9fd4d67..b802adf 100644
--- a/mv_common/CMakeLists.txt
+++ b/mv_common/CMakeLists.txt
@@ -1,5 +1,5 @@
 project(${MV_COMMON_LIB_NAME})
-cmake_minimum_required(VERSION 2.6)
+cmake_minimum_required(VERSION 2.6...3.13)
 
 file(GLOB MV_COMMON_SRC_LIST "${PROJECT_SOURCE_DIR}/src/*.cpp"
                              "${PROJECT_SOURCE_DIR}/src/*.c")
diff --git a/mv_face/face/CMakeLists.txt b/mv_face/face/CMakeLists.txt
index 74c0dce..3c30f7c 100644
--- a/mv_face/face/CMakeLists.txt
+++ b/mv_face/face/CMakeLists.txt
@@ -1,5 +1,5 @@
 project(${MV_FACE_LIB_NAME})
-cmake_minimum_required(VERSION 2.6)
+cmake_minimum_required(VERSION 2.6...3.13)
 
 file(GLOB MV_FACE_SRC_LIST "${PROJECT_SOURCE_DIR}/src/*.cpp" "${PROJECT_SOURCE_DIR}/src/*.c")
 
diff --git a/mv_image/CMakeLists.txt b/mv_image/CMakeLists.txt
index 2fe1e5d..12ea46f 100644
--- a/mv_image/CMakeLists.txt
+++ b/mv_image/CMakeLists.txt
@@ -1,4 +1,4 @@
 project(mv_image_port)
-cmake_minimum_required(VERSION 2.6)
+cmake_minimum_required(VERSION 2.6...3.13)
 
 add_subdirectory(${PROJECT_SOURCE_DIR}/image)
diff --git a/mv_image/image/CMakeLists.txt b/mv_image/image/CMakeLists.txt
index e51e9a2..b0ff1c9 100644
--- a/mv_image/image/CMakeLists.txt
+++ b/mv_image/image/CMakeLists.txt
@@ -1,5 +1,5 @@
 project(${MV_IMAGE_LIB_NAME})
-cmake_minimum_required(VERSION 2.6)
+cmake_minimum_required(VERSION 2.6...3.13)
 
 file(GLOB_RECURSE MV_IMAGE_SRC_LIST "${PROJECT_SOURCE_DIR}/src/*.cpp" "${PROJECT_SOURCE_DIR}/src/*.c")
 
diff --git a/mv_machine_learning/mv_inference/inference/CMakeLists.txt b/mv_machine_learning/mv_inference/inference/CMakeLists.txt
index cd20e57..41d6816 100644
--- a/mv_machine_learning/mv_inference/inference/CMakeLists.txt
+++ b/mv_machine_learning/mv_inference/inference/CMakeLists.txt
@@ -1,5 +1,5 @@
 project(${MV_INFERENCE_LIB_NAME})
-cmake_minimum_required(VERSION 2.6)
+cmake_minimum_required(VERSION 2.6...3.13)
 
 pkg_check_modules(${PROJECT_NAME}_DEP REQUIRED inference-engine-interface-common iniparser json-glib-1.0)
 file(GLOB MV_INFERENCE_SOURCE_LIST  "${PROJECT_SOURCE_DIR}/src/*.c" "${PROJECT_SOURCE_DIR}/src/*.cpp")
diff --git a/mv_surveillance/surveillance/CMakeLists.txt b/mv_surveillance/surveillance/CMakeLists.txt
index 4b8c191..30c61f2 100644
--- a/mv_surveillance/surveillance/CMakeLists.txt
+++ b/mv_surveillance/surveillance/CMakeLists.txt
@@ -1,5 +1,5 @@
 project(${MV_SURVEILLANCE_LIB_NAME})
-cmake_minimum_required(VERSION 2.6)
+cmake_minimum_required(VERSION 2.6...3.13)
 
 file(GLOB MV_SURVEILLANCE_SRC_LIST "${PROJECT_SOURCE_DIR}/src/*.cpp"
                                    "${PROJECT_SOURCE_DIR}/src/*.c")
diff --git a/packaging/capi-media-vision.spec b/packaging/capi-media-vision.spec
index a8cd2a9..07dd13d 100644
--- a/packaging/capi-media-vision.spec
+++ b/packaging/capi-media-vision.spec
@@ -1,6 +1,6 @@
 Name:        capi-media-vision
 Summary:     Media Vision library for Tizen Native API
-Version:     0.12.4
+Version:     0.12.5
 Release:     0
 Group:       Multimedia/Framework
 License:     Apache-2.0 and BSD-3-Clause
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 7008d14..b33a710 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -2,7 +2,7 @@ add_subdirectory(assessment)
 add_subdirectory(testsuites)
 
 project(mv_test)
-cmake_minimum_required(VERSION 2.6)
+cmake_minimum_required(VERSION 2.6...3.13)
 
 add_executable(${PROJECT_NAME}
     testsuites/barcode/test_barcode.cpp
diff --git a/test/assessment/CMakeLists.txt b/test/assessment/CMakeLists.txt
index aae4038..0b70a03 100644
--- a/test/assessment/CMakeLists.txt
+++ b/test/assessment/CMakeLists.txt
@@ -1,5 +1,5 @@
 project(mv_test_assessment)
-cmake_minimum_required(VERSION 2.6)
+cmake_minimum_required(VERSION 2.6...3.13)
 
 add_subdirectory(${PROJECT_SOURCE_DIR}/barcode)
 add_subdirectory(${PROJECT_SOURCE_DIR}/face)
diff --git a/test/assessment/barcode/CMakeLists.txt b/test/assessment/barcode/CMakeLists.txt
index fbcb2dc..e12f939 100644
--- a/test/assessment/barcode/CMakeLists.txt
+++ b/test/assessment/barcode/CMakeLists.txt
@@ -1,5 +1,5 @@
 project(mv_barcode_assessment)
-cmake_minimum_required(VERSION 2.6)
+cmake_minimum_required(VERSION 2.6...3.13)
 
 add_executable(${PROJECT_NAME} assessment_barcode.cpp)
 target_link_libraries(${PROJECT_NAME} ${MV_BARCODE_DETECTOR_LIB_NAME}
diff --git a/test/assessment/face/CMakeLists.txt b/test/assessment/face/CMakeLists.txt
index cf203b9..464acb6 100644
--- a/test/assessment/face/CMakeLists.txt
+++ b/test/assessment/face/CMakeLists.txt
@@ -1,5 +1,5 @@
 project(mv_face_assessment)
-cmake_minimum_required(VERSION 2.6)
+cmake_minimum_required(VERSION 2.6...3.13)
 
 find_package(OpenCV REQUIRED imgcodecs)
 if(NOT OpenCV_FOUND)
diff --git a/test/assessment/surveillance/CMakeLists.txt b/test/assessment/surveillance/CMakeLists.txt
index c7a30bd..7fa3163 100644
--- a/test/assessment/surveillance/CMakeLists.txt
+++ b/test/assessment/surveillance/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 2.6)
+cmake_minimum_required(VERSION 2.6...3.13)
 project(mv_surveillance_assessment)
 
 find_package(OpenCV REQUIRED videoio xfeatures2d)
diff --git a/test/testsuites/CMakeLists.txt b/test/testsuites/CMakeLists.txt
index bc195d7..1ea5af5 100644
--- a/test/testsuites/CMakeLists.txt
+++ b/test/testsuites/CMakeLists.txt
@@ -1,5 +1,5 @@
 project(mv_testsuites)
-cmake_minimum_required(VERSION 2.6)
+cmake_minimum_required(VERSION 2.6...3.13)
 
 set(INC_IMAGE_HELPER "${PROJECT_SOURCE_DIR}/common/image_helper/include")
 set(INC_VIDEO_HELPER "${PROJECT_SOURCE_DIR}/common/video_helper")
diff --git a/test/testsuites/common/image_helper/CMakeLists.txt b/test/testsuites/common/image_helper/CMakeLists.txt
index be4ec3f..4a8a008 100644
--- a/test/testsuites/common/image_helper/CMakeLists.txt
+++ b/test/testsuites/common/image_helper/CMakeLists.txt
@@ -1,5 +1,5 @@
 project(mv_image_helper)
-cmake_minimum_required(VERSION 2.6)
+cmake_minimum_required(VERSION 2.6...3.13)
 
 file(GLOB MV_IMAGE_HELPER_SRC_LIST "${PROJECT_SOURCE_DIR}/src/*.c"
                                    "${PROJECT_SOURCE_DIR}/src/*.cpp")
diff --git a/test/testsuites/common/testsuite_common/CMakeLists.txt b/test/testsuites/common/testsuite_common/CMakeLists.txt
index a7ca583..f93c508 100644
--- a/test/testsuites/common/testsuite_common/CMakeLists.txt
+++ b/test/testsuites/common/testsuite_common/CMakeLists.txt
@@ -1,5 +1,5 @@
 project(mv_testsuite_common)
-cmake_minimum_required(VERSION 2.6)
+cmake_minimum_required(VERSION 2.6...3.13)
 
 add_library(${PROJECT_NAME} SHARED mv_testsuite_common.c)
 target_link_libraries(${PROJECT_NAME} ${MV_COMMON_LIB_NAME} mv_image_helper)
diff --git a/test/testsuites/common/video_helper/CMakeLists.txt b/test/testsuites/common/video_helper/CMakeLists.txt
index 22296ee..43331fb 100644
--- a/test/testsuites/common/video_helper/CMakeLists.txt
+++ b/test/testsuites/common/video_helper/CMakeLists.txt
@@ -1,5 +1,5 @@
 project(mv_video_helper)
-cmake_minimum_required(VERSION 2.6)
+cmake_minimum_required(VERSION 2.6...3.13)
 
 pkg_check_modules(${PROJECT_NAME}_DEP REQUIRED gstreamer-1.0 gstreamer-app-1.0 gstreamer-video-1.0 capi-media-tool dlog)
 
diff --git a/test/testsuites/face/CMakeLists.txt b/test/testsuites/face/CMakeLists.txt
index ce57d1d..2e111e7 100644
--- a/test/testsuites/face/CMakeLists.txt
+++ b/test/testsuites/face/CMakeLists.txt
@@ -1,5 +1,5 @@
 project(mv_face_test_suite)
-cmake_minimum_required(VERSION 2.6)
+cmake_minimum_required(VERSION 2.6...3.13)
 
 add_executable(${PROJECT_NAME} face_test_suite.c)
 target_link_libraries(${PROJECT_NAME} ${MV_FACE_LIB_NAME}
diff --git a/test/testsuites/image/CMakeLists.txt b/test/testsuites/image/CMakeLists.txt
index aa2ecfd..6b20bce 100644
--- a/test/testsuites/image/CMakeLists.txt
+++ b/test/testsuites/image/CMakeLists.txt
@@ -1,5 +1,5 @@
 project(mv_image_test_suite)
-cmake_minimum_required(VERSION 2.6)
+cmake_minimum_required(VERSION 2.6...3.13)
 
 pkg_check_modules(GLIB_PKG glib-2.0)
 
diff --git a/test/testsuites/machine_learning/CMakeLists.txt b/test/testsuites/machine_learning/CMakeLists.txt
index 57785be..5b21eb4 100644
--- a/test/testsuites/machine_learning/CMakeLists.txt
+++ b/test/testsuites/machine_learning/CMakeLists.txt
@@ -1,4 +1,4 @@
 project(machine_learning)
-cmake_minimum_required(VERSION 2.6)
+cmake_minimum_required(VERSION 2.6...3.13)
 
 add_subdirectory(${PROJECT_SOURCE_DIR}/inference)
diff --git a/test/testsuites/machine_learning/inference/CMakeLists.txt b/test/testsuites/machine_learning/inference/CMakeLists.txt
index cd480a7..0aabbe4 100644
--- a/test/testsuites/machine_learning/inference/CMakeLists.txt
+++ b/test/testsuites/machine_learning/inference/CMakeLists.txt
@@ -1,5 +1,5 @@
 project(mv_infer_test_suite)
-cmake_minimum_required(VERSION 2.6)
+cmake_minimum_required(VERSION 2.6...3.13)
 
 add_executable(${PROJECT_NAME} inference_test_suite.c)
 
diff --git a/test/testsuites/surveillance/CMakeLists.txt b/test/testsuites/surveillance/CMakeLists.txt
index d87d645..1d731a7 100644
--- a/test/testsuites/surveillance/CMakeLists.txt
+++ b/test/testsuites/surveillance/CMakeLists.txt
@@ -1,5 +1,5 @@
 project(mv_surveillance_test_suite)
-cmake_minimum_required(VERSION 2.6)
+cmake_minimum_required(VERSION 2.6...3.13)
 
 add_executable(${PROJECT_NAME} surveillance_test_suite.c)
 target_link_libraries(${PROJECT_NAME} ${MV_SURVEILLANCE_LIB_NAME}
-- 
2.7.4


From 2728d7dd19a92dc67eac29255935086da314b275 Mon Sep 17 00:00:00 2001
From: sangho park <sangho.g.park@samsung.com>
Date: Mon, 9 May 2022 14:53:43 +0900
Subject: [PATCH 16/16] clean up manual file copy

[Version] 0.12.6-0
[Issue type] cleanup

delete hard copy commands in spec file and implement in CMakeLists.txt

Change-Id: I75841678480972f2a270b84db6fc533e006e1d5d
Signed-off-by: sangho park <sangho.g.park@samsung.com>
---
 CMakeLists.txt                   | 2 ++
 packaging/capi-media-vision.spec | 6 +-----
 2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index fb5b84e..56672ca 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -123,3 +123,5 @@ configure_file(
     @ONLY
 )
 install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/${fw_name}-inference.pc DESTINATION ${LIB_INSTALL_DIR}/pkgconfig)
+
+install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/media-vision-config.json DESTINATION ${CMAKE_INSTALL_DATADIR}/${fw_name})
diff --git a/packaging/capi-media-vision.spec b/packaging/capi-media-vision.spec
index 07dd13d..22b0ee5 100644
--- a/packaging/capi-media-vision.spec
+++ b/packaging/capi-media-vision.spec
@@ -1,6 +1,6 @@
 Name:        capi-media-vision
 Summary:     Media Vision library for Tizen Native API
-Version:     0.12.5
+Version:     0.12.6
 Release:     0
 Group:       Multimedia/Framework
 License:     Apache-2.0 and BSD-3-Clause
@@ -222,10 +222,6 @@ find . -name '*.gcno' -exec cp '{}' gcov-obj ';'
 %endif
 
 %install
-rm -rf %{buildroot}
-mkdir -p %{buildroot}%{_datadir}/%{name}
-cp media-vision-config.json %{buildroot}%{_datadir}/%{name}/
-
 %make_install
 
 %if 0%{?gcov:1}
-- 
2.7.4