From 3f9233f26ef8b41162391e7049427931f5f2bf0d Mon Sep 17 00:00:00 2001
From: Tae-Young Chung <ty83.chung@samsung.com>
Date: Wed, 17 Mar 2021 16:07:36 +0900
Subject: [PATCH] Add Metadata,  PreProcess, PostProcess class

Introduce metadata based pre-/post-process.
User can set the input/output information of a model to a metadata file(json file)
Based on information, input tensor and output tensor are preprocessed and postprocessed,
respectively.
Metadata class with InputMetadata and OutputMetadata are added.
PreProcess and PostProcess class are added.

A meta file for classification is added as an example.
It can be used as an referece for classification case.

Change-Id: Ie86b50ae32bf2f7f5d336fe4709d6ea938930c3d
Signed-off-by: Tae-Young Chung <ty83.chung@samsung.com>
---
 meta-template/README.md                          |  68 ++++++
 meta-template/image-classification-001-meta.json |  36 +++
 mv_inference/inference/CMakeLists.txt            |   4 +-
 mv_inference/inference/include/Inference.h       |  14 ++
 mv_inference/inference/include/InputMetadata.h   | 126 ++++++++++
 mv_inference/inference/include/Metadata.h        |  90 +++++++
 mv_inference/inference/include/OutputMetadata.h  |  93 ++++++++
 mv_inference/inference/include/PostProcess.h     |  82 +++++++
 mv_inference/inference/include/PreProcess.h      |  77 ++++++
 mv_inference/inference/src/Inference.cpp         | 290 ++++++++++++++++-------
 mv_inference/inference/src/InputMetadata.cpp     | 234 ++++++++++++++++++
 mv_inference/inference/src/Metadata.cpp          | 121 ++++++++++
 mv_inference/inference/src/OutputMetadata.cpp    | 112 +++++++++
 mv_inference/inference/src/PostProcess.cpp       |  84 +++++++
 mv_inference/inference/src/PreProcess.cpp        | 130 ++++++++++
 mv_inference/inference/src/mv_inference_open.cpp |  13 +
 packaging/capi-media-vision.spec                 |   2 +-
 17 files changed, 1492 insertions(+), 84 deletions(-)
 create mode 100644 meta-template/README.md
 create mode 100644 meta-template/image-classification-001-meta.json
 create mode 100644 mv_inference/inference/include/InputMetadata.h
 create mode 100644 mv_inference/inference/include/Metadata.h
 create mode 100644 mv_inference/inference/include/OutputMetadata.h
 create mode 100644 mv_inference/inference/include/PostProcess.h
 create mode 100644 mv_inference/inference/include/PreProcess.h
 create mode 100644 mv_inference/inference/src/InputMetadata.cpp
 create mode 100644 mv_inference/inference/src/Metadata.cpp
 create mode 100644 mv_inference/inference/src/OutputMetadata.cpp
 create mode 100644 mv_inference/inference/src/PostProcess.cpp
 create mode 100644 mv_inference/inference/src/PreProcess.cpp

diff --git a/meta-template/README.md b/meta-template/README.md
new file mode 100644
index 0000000..3d5d69a
--- /dev/null
+++ b/meta-template/README.md
@@ -0,0 +1,68 @@
+Introduce Meta file based Inference.
+
+A Meta file provides a model description. The Meta file is an important source of knowledge about how to run models while properly applying preprocess to input and postprocess to output.
+
+The Meta file consists of 1) inputmetadata and 2) outputmetadata. For example, a meta file for classification is shown below.
+
+```
+{
+    "inputmetadata" :
+    {
+        "tensor_info" : [
+            {
+                "name" : "input_2",
+                "shape_type" : 1,
+                "shape_dims" : [ 1, 224, 224, 3],
+                "data_type" : 0,
+                "color_space" : "RGB888"
+            }
+        ],
+        "preprocess" : [
+            {
+                "normalization" : [
+                    {
+                        "mean" : [127.5, 127.5, 127.5],
+                        "std" : [127.5, 127.5, 127.5]
+                    }
+                ]
+            }
+        ]
+    },
+    "outputmetadata" :
+    {
+        "score" : [
+            {
+                "name" : "dense_3/Softmax",
+                "index" : [-1, 1],
+                "top_number" : 5,
+                "threshold" : 0.3,
+                "score_type" : 0
+            }
+        ]
+    }
+}
+```
+
+In the classification meta file, the `inputmetadata` includes
+`tensor_info` which has knowledge of an input tensor such as
+- `name`: name to an input tensor
+- `shape_type`: shape type of the input tensor on `NCHW = 0` and `NHWC = 1`
+- `shape_dims`: shape dimensions based on the `shape_type`
+- `data_type`: data type of the input tensor on `MV_INFERENCE_DATA_FLOAT32 = 0` and `MV_INFERENCE_DATA_UINT8 = 1`
+- `color_space`: color space of the input tensor.
+
+`preprocess` which has information of preprocess such as
+- `normalization`: nVal = (fVal - `mean`)/`std`, which nVal is normalized value and fVal is input value
+  - `mean`: mean values of the input tensor
+  - `std` : standard deviation values of the input tensor
+
+The `outputmetadata` includes
+`score` which has information of postprocess to get score such as
+- `name`: name to an output tensor for score
+- `index`: index to get score from the output tensor
+- `top_number`: the top number of outputs
+- `threshold` : threshold to cut ouputs under the `threshold` value
+- `score_type` : score between 0 ~ 1 if it is 0, score which requires sigmoid
+
+The classification meta file, thus, illustrates that the model has an input which is named of `input_2`, `NHWC` shape type with `[1, 224, 224, 3]` dimensions, `MV_INFERENCE_DATA_FLOAT32` data type, and `RGB888` color space. It requires normalization with mean `[127.5, 127.5, 127.5]` and standard deviation `[127.5, 127.5, 127.5]`. But it doesn't apply quantization.
+The meta file illustrates that the model has an ouput which is named of `dense_3/Softmax`. The tensor is 2-dimensional and its' 2nd index corresponds to the score. In addition, the score is just between 0 ~ 1. The score under `threshold` 0.3 should be thrown out and the `top_number` of outputs should be given as results.
diff --git a/meta-template/image-classification-001-meta.json b/meta-template/image-classification-001-meta.json
new file mode 100644
index 0000000..5cb7bd5
--- /dev/null
+++ b/meta-template/image-classification-001-meta.json
@@ -0,0 +1,36 @@
+{
+    "inputmetadata" :
+    {
+        "tensor_info" : [
+            {
+                "name" : "input_2",
+                "shape_type" : 1,
+                "shape_dims" : [ 1, 224, 224, 3],
+                "data_type" : 0,
+                "color_space" : "RGB888"
+            }
+        ],
+        "preprocess" : [
+            {
+                "normalization" : [
+                    {
+                        "mean" : [127.5, 127.5, 127.5],
+                        "std" : [127.5, 127.5, 127.5]
+                    }
+                ]
+            }
+        ]
+    },
+    "outputmetadata" :
+    {
+        "score" : [
+            {
+                "name" : "dense_3/Softmax",
+                "index" : [-1, 1],
+				"top_number" : 5,
+                "threshold" : 0.3,
+                "score_type" : 0
+            }
+        ]
+    }
+}
diff --git a/mv_inference/inference/CMakeLists.txt b/mv_inference/inference/CMakeLists.txt
index c7f4903..05de57c 100644
--- a/mv_inference/inference/CMakeLists.txt
+++ b/mv_inference/inference/CMakeLists.txt
@@ -11,7 +11,7 @@ INCLUDE_DIRECTORIES("${INC_DIR}")
 INCLUDE_DIRECTORIES("${PROJECT_SOURCE_DIR}/include")
 INCLUDE_DIRECTORIES("${PROJECT_SOURCE_DIR}/src")
 
-SET(dependents "inference-engine-interface-common iniparser")
+SET(dependents "inference-engine-interface-common iniparser json-glib-1.0")
 INCLUDE(FindPkgConfig)
 pkg_check_modules(${fw_name} REQUIRED ${dependents})
 FOREACH(flag ${${fw_name}_CFLAGS})
@@ -39,6 +39,6 @@ else()
 	ADD_LIBRARY(${PROJECT_NAME} SHARED ${MV_INFERENCE_INCLUDE_LIST} ${MV_INFERENCE_SOURCE_LIST})
 endif()
 
-TARGET_LINK_LIBRARIES(${PROJECT_NAME} ${MV_COMMON_LIB_NAME} ${OpenCV_LIBS} inference-engine-interface-common dlog iniparser)
+TARGET_LINK_LIBRARIES(${PROJECT_NAME} ${MV_COMMON_LIB_NAME} ${OpenCV_LIBS} inference-engine-interface-common dlog iniparser json-glib-1.0)
 
 INSTALL(TARGETS ${PROJECT_NAME} DESTINATION ${LIB_INSTALL_DIR})
diff --git a/mv_inference/inference/include/Inference.h b/mv_inference/inference/include/Inference.h
index 997e476..c86e7e1 100644
--- a/mv_inference/inference/include/Inference.h
+++ b/mv_inference/inference/include/Inference.h
@@ -27,6 +27,9 @@
 #include <mv_inference_type.h>
 #include <opencv2/core.hpp>
 #include <opencv2/imgproc.hpp>
+#include "Metadata.h"
+#include "PreProcess.h"
+#include "PostProcess.h"
 
 #define HUMAN_POSE_MAX_LANDMARKS 16
 #define HUMAN_POSE_MAX_PARTS 6
@@ -201,6 +204,13 @@ namespace inference
 		void ConfigureThreshold(const double threshold);
 
 		/**
+		 * @brief   Parses the metadata file path
+		 *
+		 * @since_tizen 6.5
+		 */
+		int ParseMetadata(const std::string filePath);
+
+		/**
 		 * @brief   Bind a backend engine
 		 * @details Use this function to bind a backend engine for the inference.
 		 * 			This creates a inference engine common class object, and loads a backend
@@ -352,6 +362,10 @@ namespace inference
 
 		mv_inference_pose_s *mPoseResult;
 
+		Metadata mMetadata;
+		PreProcess mPreProc;
+		PostProcess mPostProc;
+
 	private:
 		void CheckSupportedInferenceBackend();
 		int ConvertEngineErrorToVisionError(int error);
diff --git a/mv_inference/inference/include/InputMetadata.h b/mv_inference/inference/include/InputMetadata.h
new file mode 100644
index 0000000..8b722c7
--- /dev/null
+++ b/mv_inference/inference/include/InputMetadata.h
@@ -0,0 +1,126 @@
+/**
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MEDIA_VISION_INPUTMETADATA_H__
+#define __MEDIA_VISION_INPUTMETADATA_H__
+
+#include <string>
+#include <vector>
+
+#include <mv_inference_type.h>
+#include <inference_engine_type.h>
+#include <json-glib/json-glib.h>
+
+/**
+ * @file InputMetadata.h
+ * @brief This file contains the metadata class definition which
+ *        provides metadata of a model.
+ */
+
+namespace mediavision
+{
+namespace inference
+{
+	class Options
+	{
+	public:
+		class Normalization
+		{
+		public:
+			bool use;
+			std::vector<double> mean;
+			std::vector<double> std;
+
+			Normalization() : use(false) {}
+			~Normalization() = default;
+		};
+
+		class Quantization
+		{
+		public:
+			bool use;
+			std::vector<double> scale;
+			std::vector<double> zeropoint;
+
+			Quantization() : use(false) {};
+			~Quantization() = default;
+		};
+
+		Normalization normalization;
+		Quantization  quantization;
+
+		Options() = default;
+		~Options() = default;
+	};
+
+	class LayerInfo
+	{
+	public:
+
+		std::string name;
+		std::vector<int> dims;
+		mv_colorspace_e colorSpace;
+		mv_inference_data_type_e dataType;
+		inference_tensor_shape_type_e shapeType; // TODO: define mv_inference_shape_type_e
+
+		LayerInfo() = default;
+		~LayerInfo() = default;
+
+		int GetWidth() const;
+		int GetHeight() const;
+		int GetChannel() const;
+	};
+
+	class InputMetadata
+	{
+	public:
+		bool parsed;
+		std::map<std::string, LayerInfo> layer;
+		std::map<std::string, Options> option;
+
+		/**
+		 * @brief   Creates an InputMetadata class instance.
+		 *
+		 * @since_tizen 6.5
+		 */
+		InputMetadata() : parsed(false) {};
+
+		/**
+		 * @brief   Destroys an InputMetadata class instance including
+		 *          its all resources.
+		 *
+		 * @since_tizen 6.5
+		 */
+		~InputMetadata() = default;
+
+		/**
+		 * @brief Parses an InputMetadata
+		 *
+		 * @since_tizen 6.5
+		 */
+		int Parse(JsonObject *root);
+
+	private:
+		int GetTensorInfo(JsonObject* root);
+		int GetPreProcess(JsonObject* root);
+		mv_colorspace_e ConvertTypeToMD(const std::string& type);
+
+	};
+
+} /* Inference */
+} /* MediaVision */
+
+#endif /* __MEDIA_VISION_INPUTMETADATA_H__ */
diff --git a/mv_inference/inference/include/Metadata.h b/mv_inference/inference/include/Metadata.h
new file mode 100644
index 0000000..322fd3e
--- /dev/null
+++ b/mv_inference/inference/include/Metadata.h
@@ -0,0 +1,90 @@
+/**
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MEDIA_VISION_METADATA_H__
+#define __MEDIA_VISION_METADATA_H__
+
+#include <string>
+#include <map>
+
+#include "mv_common.h"
+#include "mv_inference_private.h"
+#include <mv_inference_type.h>
+#include "InputMetadata.h"
+#include "OutputMetadata.h"
+#include <json-glib/json-glib.h>
+
+/**
+ * @file Metadata.h
+ * @brief This file contains the metadata class definition which
+ *        provides metadata of a model.
+ */
+
+namespace mediavision
+{
+namespace inference
+{
+	class Metadata
+	{
+	public:
+		/**
+		 * @brief   Creates an Metadata class instance.
+		 *
+		 * @since_tizen 6.5
+		 */
+		Metadata() = default;
+
+		/**
+		 * @brief   Destroys an Metadata class instance including
+		 *          its all resources.
+		 *
+		 * @since_tizen 6.5
+		 */
+		~Metadata() = default;
+
+		/**
+		 * @brief Initializes an Metadata class
+		 *
+		 * @since_tizen 6.5
+		 */
+		int Init(const std::string& filename);
+
+		/**
+		 * @brief Parses a metafile and set values to InputMetadata
+		 *        and OutputMetadata
+		 *
+		 * @since_tizen 6.5
+		 */
+		int Parse();
+
+		const InputMetadata& GetInputMeta();
+		const OutputMetadata& GetOutputMeta();
+
+	private:
+		int ParseInputMeta(JsonObject *object);
+		int ParseOutputMeta(JsonObject *object);
+
+	private:
+		std::string mMetafile;
+
+		InputMetadata mInputMeta;
+		OutputMetadata mOutputMeta;
+	};
+
+} /* Inference */
+} /* MediaVision */
+
+#endif /* __MEDIA_VISION_METADATA_H__ */
diff --git a/mv_inference/inference/include/OutputMetadata.h b/mv_inference/inference/include/OutputMetadata.h
new file mode 100644
index 0000000..b687917
--- /dev/null
+++ b/mv_inference/inference/include/OutputMetadata.h
@@ -0,0 +1,93 @@
+/**
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MEDIA_VISION_OUTPUTMETADATA_H__
+#define __MEDIA_VISION_OUTPUTMETADATA_H__
+
+#include <string>
+#include <vector>
+#include <map>
+
+#include <mv_inference_type.h>
+#include <json-glib/json-glib.h>
+
+/**
+ * @file OutputMetadata.h
+ * @brief This file contains the metadata class definition which
+ *        provides metadata of a model.
+ */
+
+namespace mediavision
+{
+namespace inference
+{
+	class DimInfo
+	{
+	public:
+		std::vector<int> index;
+	};
+
+	class ScoreInfo
+	{
+	public:
+		std::string name;
+		DimInfo dimInfo;
+		double threshold;
+		int type;
+		int topNumber;
+
+	public:
+		ScoreInfo() = default;
+		~ScoreInfo() = default;
+		int GetIndex() const;
+	};
+
+	class OutputMetadata
+	{
+	public:
+		bool parsed;
+		ScoreInfo score;
+
+		/**
+		 * @brief   Creates an OutputMetadata class instance.
+		 *
+		 * @since_tizen 6.5
+		 */
+		OutputMetadata() : parsed(false) {};
+
+		/**
+		 * @brief   Destroys an OutputMetadata class instance including
+		 *          its all resources.
+		 *
+		 * @since_tizen 6.5
+		 */
+		~OutputMetadata() = default;
+
+		/** @brief Parses an OutputMetadata
+		 *
+		 * @since_tizen 6.5
+		 */
+		int Parse(JsonObject *root);
+
+	private:
+		int GetScore(JsonObject *root);
+
+	};
+
+} /* Inference */
+} /* MediaVision */
+
+#endif /* __MEDIA_VISION_OUTPUTMETADATA_H__ */
diff --git a/mv_inference/inference/include/PostProcess.h b/mv_inference/inference/include/PostProcess.h
new file mode 100644
index 0000000..9f77a9f
--- /dev/null
+++ b/mv_inference/inference/include/PostProcess.h
@@ -0,0 +1,82 @@
+/**
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MEDIA_VISION_POSTPROCESS_H__
+#define __MEDIA_VISION_POSTPROCESS_H__
+
+#include <string>
+#include <map>
+#include <queue>
+
+#include "mv_common.h"
+#include "OutputMetadata.h"
+
+#include <opencv2/core.hpp>
+#include <opencv2/imgproc.hpp>
+
+
+/**
+ * @file PostProcess.h
+ * @brief This file contains the PostProcess class definition which
+ *        provides PostProcess after running inference.
+ */
+
+namespace mediavision
+{
+namespace inference
+{
+	class PostProcess
+	{
+	public:
+		/**
+		 * @brief   Creates an PostProcess class instance.
+		 *
+		 * @since_tizen 6.5
+		 */
+		PostProcess() : mMaxScoreSize(3) {};
+
+		/**
+		 * @brief   Destroys an PostProcess class instance including
+		 *           its all resources.
+		 *
+		 * @since_tizen 6.5
+		 */
+		~PostProcess() = default;
+
+		/**
+		 * @brief   Calculates sigmoid.
+		 *
+		 * @since_tizen 6.5
+		 */
+		static float sigmoid(float value);
+
+		int ScoreClear(int size);
+		int ScorePush(float value, int index);
+		int ScorePop(std::vector<std::pair<float, int>>& top);
+
+	private:
+		std::priority_queue<std::pair<float, int>,
+							std::vector<std::pair<float, int>>,
+							std::greater<std::pair<float, int>>> mScore;
+	private:
+		int mMaxScoreSize;
+
+	};
+
+} /* Inference */
+} /* MediaVision */
+
+#endif /* __MEDIA_VISION_POSTPROCESS_H__ */
diff --git a/mv_inference/inference/include/PreProcess.h b/mv_inference/inference/include/PreProcess.h
new file mode 100644
index 0000000..f4c002b
--- /dev/null
+++ b/mv_inference/inference/include/PreProcess.h
@@ -0,0 +1,77 @@
+/**
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MEDIA_VISION_PREPROCESS_H__
+#define __MEDIA_VISION_PREPORCESS_H__
+
+#include <string>
+#include <map>
+
+#include "mv_common.h"
+#include "InputMetadata.h"
+
+#include <opencv2/core.hpp>
+#include <opencv2/imgproc.hpp>
+
+
+/**
+ * @file PreProcess.h
+ * @brief This file contains the PreProcess class definition which
+ *        provides PreProcess before running inference.
+ */
+
+namespace mediavision
+{
+namespace inference
+{
+	class PreProcess
+	{
+	public:
+		/**
+		 * @brief   Creates an PreProcess class instance.
+		 *
+		 * @since_tizen 6.5
+		 */
+		PreProcess() = default;
+
+		/**
+		 * @brief   Destroys an PreProcess class instance including
+		 *           its all resources.
+		 *
+		 * @since_tizen 6.5
+		 */
+		~PreProcess() = default;
+
+		/**
+		 * @brief   Runs PreProcess with layerInfo and options
+		 *
+		 * @since_tizen 6.5
+		 */
+		int Run(cv::Mat& source, const int colorSpace, const int dataType, const LayerInfo& layerInfo,
+				const Options& options, void* buffer);
+
+	private:
+		int Resize(cv::Mat& source, cv::Mat& dest, cv::Size size);
+		int ColorConvert(cv::Mat& source, cv::Mat& dest, int sType, int dType);
+		int Normalize(cv::Mat& source, cv::Mat& dest,
+					const std::vector<double>& mean, const std::vector<double>& std);
+
+	};
+
+} /* Inference */
+} /* MediaVision */
+
+#endif /* __MEDIA_VISION_PREPROCESS_H__ */
diff --git a/mv_inference/inference/src/Inference.cpp b/mv_inference/inference/src/Inference.cpp
index fa1c5c5..48acde8 100644
--- a/mv_inference/inference/src/Inference.cpp
+++ b/mv_inference/inference/src/Inference.cpp
@@ -79,7 +79,10 @@ namespace inference
 			mInputBuffer(cv::Mat()),
 			engine_config(),
 			mBackend(),
-			mPoseResult(NULL)
+			mPoseResult(NULL),
+			mMetadata(),
+			mPreProc(),
+			mPostProc()
 	{
 		LOGI("ENTER");
 
@@ -375,35 +378,77 @@ namespace inference
 	{
 		LOGI("ENTER");
 
+		// FIXME: mConfig should be removed
 		mConfig.mTensorInfo = { width, height, dim, ch };
 		mConfig.mStdValue = stdValue;
 		mConfig.mMeanValue = meanValue;
 		mConfig.mDataType = static_cast<mv_inference_data_type_e>(dataType);
 		mConfig.mInputLayerNames = names;
 
+		const InputMetadata& inputMeta = mMetadata.GetInputMeta();
+		if (inputMeta.parsed) {
+			LOGI("use input meta");
+			auto& layerInfo = inputMeta.layer.begin()->second;
+			auto& option = inputMeta.option.begin()->second;
+			if (layerInfo.shapeType == INFERENCE_TENSOR_SHAPE_NCHW) { // NCHW
+				mConfig.mTensorInfo.ch = layerInfo.dims[1];
+				mConfig.mTensorInfo.dim = layerInfo.dims[0];
+				mConfig.mTensorInfo.width = layerInfo.dims[3];
+				mConfig.mTensorInfo.height = layerInfo.dims[2];
+			} else if (layerInfo.shapeType == INFERENCE_TENSOR_SHAPE_NHWC) {// NHWC
+				mConfig.mTensorInfo.ch = layerInfo.dims[3];
+				mConfig.mTensorInfo.dim = layerInfo.dims[0];
+				mConfig.mTensorInfo.width = layerInfo.dims[2];
+				mConfig.mTensorInfo.height = layerInfo.dims[1];
+			} else {
+				LOGE("Invalid shape type[%d]", layerInfo.shapeType);
+			}
+
+			if (option.normalization.use) {
+				mConfig.mMeanValue = option.normalization.mean[0];
+				mConfig.mStdValue = option.normalization.std[0];
+			}
+
+			mConfig.mDataType = layerInfo.dataType;
+			mConfig.mInputLayerNames.clear();
+			for (auto& layer : inputMeta.layer) {
+				mConfig.mInputLayerNames.push_back(layer.first);
+			}
+		}
+
 		inference_engine_layer_property property;
 		// In case of that a inference plugin deosn't support to get properties,
 		// the tensor info given by a user will be used.
 		// If the plugin supports that, the given info will be ignored.
-		inference_engine_tensor_info tensor_info;
-
-		tensor_info.data_type = ConvertToIE(dataType);
-
-		// In case of OpenCV, only supports NCHW
-		tensor_info.shape_type = INFERENCE_TENSOR_SHAPE_NCHW;
-		// modify to handle multiple tensor infos
-		tensor_info.shape.push_back(mConfig.mTensorInfo.dim);
-		tensor_info.shape.push_back(mConfig.mTensorInfo.ch);
-		tensor_info.shape.push_back(mConfig.mTensorInfo.height);
-		tensor_info.shape.push_back(mConfig.mTensorInfo.width);
 
-		tensor_info.size = 1;
-		for (auto& dim : tensor_info.shape) {
-			tensor_info.size *= dim;
-		}
+		for (auto& layer : inputMeta.layer) {
+			inference_engine_tensor_info tensor_info;
+			if (inputMeta.parsed) {
+				tensor_info.data_type = ConvertToIE(layer.second.dataType);
 
-		for (auto& layerName : mConfig.mInputLayerNames) {
-			property.layers.insert(std::make_pair(layerName, tensor_info));
+				tensor_info.shape_type = layer.second.shapeType;
+				tensor_info.size = 1;
+				for (auto& dim : layer.second.dims) {
+					tensor_info.shape.push_back(dim);
+					tensor_info.size *= dim;
+				}
+			} else {
+				tensor_info.data_type = ConvertToIE(dataType);
+
+				// In case of OpenCV, only supports NCHW
+				tensor_info.shape_type = INFERENCE_TENSOR_SHAPE_NCHW;
+				// modify to handle multiple tensor infos
+				tensor_info.shape.push_back(mConfig.mTensorInfo.dim);
+				tensor_info.shape.push_back(mConfig.mTensorInfo.ch);
+				tensor_info.shape.push_back(mConfig.mTensorInfo.height);
+				tensor_info.shape.push_back(mConfig.mTensorInfo.width);
+
+				tensor_info.size = 1;
+				for (auto& dim : tensor_info.shape) {
+					tensor_info.size *= dim;
+				}
+			}
+			property.layers.insert(std::make_pair(layer.first, tensor_info));
 		}
 
 		int ret = mBackend->SetInputLayerProperty(property);
@@ -542,6 +587,26 @@ namespace inference
 				MV_INFERENCE_CONFIDENCE_THRESHOLD_MIN);
 	}
 
+	int Inference::ParseMetadata(const std::string filePath)
+	{
+		LOGI("ENTER");
+		int ret = mMetadata.Init(filePath);
+		if (ret != MEDIA_VISION_ERROR_NONE) {
+			LOGE("Fail to init metadata[%d]", ret);
+			return ret;
+		}
+
+		ret = mMetadata.Parse();
+		if (ret != MEDIA_VISION_ERROR_NONE) {
+			LOGE("Fail to parse metadata[%d]", ret);
+			return ret;
+		}
+
+		LOGI("LEAVE");
+
+		return MEDIA_VISION_ERROR_NONE;
+	}
+
 	void Inference::CleanupTensorBuffers(void)
 	{
 		LOGI("ENTER");
@@ -1066,19 +1131,33 @@ namespace inference
 			return MEDIA_VISION_ERROR_INVALID_PARAMETER;
 		}
 
-		for (auto& buffer : mInputTensorBuffers) {
-			inference_engine_tensor_buffer& tensor_buffer = buffer.second;
+		const InputMetadata& inputMeta = mMetadata.GetInputMeta();
+		if (inputMeta.parsed) {
+			for (auto& buffer : mInputTensorBuffers) {
+				inference_engine_tensor_buffer& tensor_buffer = buffer.second;
+				const LayerInfo& layerInfo = inputMeta.layer.at(buffer.first);
+				const Options& opt = inputMeta.option.at(buffer.first);
 
-			int data_type = ConvertToCv(tensor_buffer.data_type);
+				int data_type = ConvertToCv(tensor_buffer.data_type);
 
-			// Convert color space of input tensor data and then normalize it.
-			ret = Preprocess(cvSource,
-							 cv::Mat(mInputSize.height, mInputSize.width,
-									 data_type, tensor_buffer.buffer),
-							 data_type);
-			if (ret != MEDIA_VISION_ERROR_NONE) {
-				LOGE("Fail to preprocess input tensor data.");
-				return ret;
+				ret = mPreProc.Run(cvSource, colorspace, data_type, layerInfo, opt, tensor_buffer.buffer);
+			}
+		} else {
+			for (auto& buffer : mInputTensorBuffers) {
+				inference_engine_tensor_buffer& tensor_buffer = buffer.second;
+
+				int data_type = ConvertToCv(tensor_buffer.data_type);
+
+				// Convert color space of input tensor data and then normalize it.
+
+				ret = Preprocess(cvSource,
+								cv::Mat(mInputSize.height, mInputSize.width,
+										data_type, tensor_buffer.buffer),
+								data_type);
+				if (ret != MEDIA_VISION_ERROR_NONE) {
+					LOGE("Fail to preprocess input tensor data.");
+					return ret;
+				}
 			}
 		}
 
@@ -1096,71 +1175,120 @@ namespace inference
 	int Inference::GetClassficationResults(
 			ImageClassificationResults *classificationResults)
 	{
-		tensor_t outputData;
+		const OutputMetadata& outputMeta = mMetadata.GetOutputMeta();
+		if (outputMeta.parsed) {
+			std::vector<std::pair<float, int>> topScore;
+			float value = 0.0f;
+			auto& info = outputMeta.score;
+
+			int index = info.GetIndex();
+			int classes = mOutputLayerProperty.layers[info.name].shape[index];
+			float *output = static_cast<float *>(mOutputTensorBuffers[info.name].buffer);
+			if (output == NULL) {
+				LOGE("output buffe is NULL");
+				return MEDIA_VISION_ERROR_INVALID_OPERATION;
+			}
 
-		// Get inference result and contain it to outputData.
-		int ret = FillOutputResult(outputData);
-		if (ret != MEDIA_VISION_ERROR_NONE) {
-			LOGE("Fail to get output result.");
-			return ret;
-		}
+			mPostProc.ScoreClear(info.topNumber);
+			for (int cId = 0; cId < classes; ++cId) {
+				value = output[cId];
 
-		// Will contain top N results in ascending order.
-		std::vector<std::pair<float, int> > top_results;
-		std::priority_queue<std::pair<float, int>,
-							std::vector<std::pair<float, int> >,
-							std::greater<std::pair<float, int> > >
-				top_result_pq;
-		float value = 0.0f;
+				if (info.type == 1) {
+					value = PostProcess::sigmoid(value);
+				}
 
-		std::vector<std::vector<int> > inferDimInfo(outputData.dimInfo);
-		std::vector<void *> inferResults(outputData.data.begin(),
-										 outputData.data.end());
+				if (value < info.threshold)
+					continue;
 
-		int count = inferDimInfo[0][1];
-		LOGI("count: %d", count);
+				LOGI("id[%d]: %.3f", cId, value);
+				mPostProc.ScorePush(value, cId);
+			}
+			mPostProc.ScorePop(topScore);
+
+			ImageClassificationResults results;
+			results.number_of_classes = 0;
+			for (auto& value : topScore) {
+				LOGI("score: %.3f, threshold: %.3f", value.first, info.threshold);
+				LOGI("idx:%d", value.second);
+				LOGI("classProb: %.3f", value.first);
+
+				results.indices.push_back(value.second);
+				results.confidences.push_back(value.first);
+				results.names.push_back(mUserListName[value.second]);
+				results.number_of_classes++;
+			}
 
-		float *prediction = reinterpret_cast<float *>(inferResults[0]);
-		for (int i = 0; i < count; ++i) {
-			value = prediction[i];
+			*classificationResults = results;
+			LOGE("Inference: GetClassificationResults: %d\n",
+				results.number_of_classes);
 
-			// Only add it if it beats the threshold and has a chance at being in
-			// the top N.
-			top_result_pq.push(std::pair<float, int>(value, i));
+		} else {
+			tensor_t outputData;
 
-			// If at capacity, kick the smallest value out.
-			if (top_result_pq.size() > mOutputNumbers) {
-				top_result_pq.pop();
+			// Get inference result and contain it to outputData.
+			int ret = FillOutputResult(outputData);
+			if (ret != MEDIA_VISION_ERROR_NONE) {
+				LOGE("Fail to get output result.");
+				return ret;
 			}
-		}
 
-		// Copy to output vector and reverse into descending order.
-		while (!top_result_pq.empty()) {
-			top_results.push_back(top_result_pq.top());
-			top_result_pq.pop();
-		}
-		std::reverse(top_results.begin(), top_results.end());
+			// Will contain top N results in ascending order.
+			std::vector<std::pair<float, int> > top_results;
+			std::priority_queue<std::pair<float, int>,
+								std::vector<std::pair<float, int> >,
+								std::greater<std::pair<float, int> > >
+					top_result_pq;
+			float value = 0.0f;
+
+			std::vector<std::vector<int> > inferDimInfo(outputData.dimInfo);
+			std::vector<void *> inferResults(outputData.data.begin(),
+											outputData.data.end());
+
+			int count = inferDimInfo[0][1];
+			LOGI("count: %d", count);
+			float *prediction = reinterpret_cast<float *>(inferResults[0]);
+			for (int i = 0; i < count; ++i) {
+				value = prediction[i];
+
+				// Only add it if it beats the threshold and has a chance at being in
+				// the top N.
+				top_result_pq.push(std::pair<float, int>(value, i));
+
+				// If at capacity, kick the smallest value out.
+				if (top_result_pq.size() > mOutputNumbers) {
+					top_result_pq.pop();
+				}
+			}
 
-		int classIdx = -1;
-		ImageClassificationResults results;
-		results.number_of_classes = 0;
-		for (int idx = 0; idx < top_results.size(); ++idx) {
-			if (top_results[idx].first < mThreshold)
-				continue;
-			LOGI("idx:%d", idx);
-			LOGI("classIdx: %d", top_results[idx].second);
-			LOGI("classProb: %f", top_results[idx].first);
+			// Copy to output vector and reverse into descending order.
+			while (!top_result_pq.empty()) {
+				top_results.push_back(top_result_pq.top());
+				top_result_pq.pop();
+			}
+			std::reverse(top_results.begin(), top_results.end());
+
+			int classIdx = -1;
+			ImageClassificationResults results;
+			results.number_of_classes = 0;
+			for (int idx = 0; idx < top_results.size(); ++idx) {
+				if (top_results[idx].first < mThreshold)
+					continue;
+				LOGI("idx:%d", idx);
+				LOGI("classIdx: %d", top_results[idx].second);
+				LOGI("classProb: %f", top_results[idx].first);
+
+				classIdx = top_results[idx].second;
+				results.indices.push_back(classIdx);
+				results.confidences.push_back(top_results[idx].first);
+				results.names.push_back(mUserListName[classIdx]);
+				results.number_of_classes++;
+			}
 
-			classIdx = top_results[idx].second;
-			results.indices.push_back(classIdx);
-			results.confidences.push_back(top_results[idx].first);
-			results.names.push_back(mUserListName[classIdx]);
-			results.number_of_classes++;
+			*classificationResults = results;
+			LOGE("Inference: GetClassificationResults: %d\n",
+				results.number_of_classes);
 		}
 
-		*classificationResults = results;
-		LOGE("Inference: GetClassificationResults: %d\n",
-			 results.number_of_classes);
 		return MEDIA_VISION_ERROR_NONE;
 	}
 
diff --git a/mv_inference/inference/src/InputMetadata.cpp b/mv_inference/inference/src/InputMetadata.cpp
new file mode 100644
index 0000000..4084328
--- /dev/null
+++ b/mv_inference/inference/src/InputMetadata.cpp
@@ -0,0 +1,234 @@
+/**
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mv_private.h"
+
+#include <unistd.h>
+#include <fstream>
+#include <string>
+#include <queue>
+#include <algorithm>
+#include "InputMetadata.h"
+#include <mv_common.h>
+
+namespace mediavision
+{
+namespace inference
+{
+	mv_colorspace_e InputMetadata::ConvertTypeToMD(const std::string& type)
+	{
+		mv_colorspace_e colorspace = MEDIA_VISION_COLORSPACE_INVALID;
+		if (type.empty()) {
+			LOGE("Invalid type[null]");
+			return colorspace;
+		}
+
+		if (type.compare("RGB888") == 0) {
+			colorspace = MEDIA_VISION_COLORSPACE_RGB888;
+		} else if (type.compare("Y800") == 0) {
+			colorspace = MEDIA_VISION_COLORSPACE_Y800;
+		} else {
+			LOGE("Not supported channel type");
+		}
+
+		return colorspace;
+	}
+
+	int InputMetadata::GetTensorInfo(JsonObject *root)
+	{
+		LOGI("ENTER");
+
+		if (json_object_has_member(root, "tensor_info") == false) {
+			LOGE("No tensor_info inputmetadata");
+			return MEDIA_VISION_ERROR_INVALID_OPERATION;
+		}
+
+		// tensor_info
+		JsonArray * rootArray = json_object_get_array_member(root, "tensor_info");
+		unsigned int elements = json_array_get_length(rootArray);
+
+		std::map<std::string, LayerInfo>().swap(layer);
+		// TODO: handling error
+		// FIXEME: LayerInfo.set()??
+		for (unsigned int elem = 0; elem < elements; ++elem) {
+			LayerInfo info;
+			JsonNode *pNode = json_array_get_element(rootArray, elem);
+			JsonObject *pObject = json_node_get_object(pNode);
+
+			info.name =
+					static_cast<const char*>(json_object_get_string_member(pObject,"name"));
+			LOGI("layer: %s", info.name.c_str());
+
+			info.shapeType =
+					static_cast<inference_tensor_shape_type_e>(json_object_get_int_member(pObject, "shape_type"));
+			LOGI("shape type: %d:%s", info.shapeType, info.shapeType == 0 ? "NCHW" : "NHWC");
+
+			info.dataType =
+						static_cast<mv_inference_data_type_e>(json_object_get_int_member(pObject, "data_type"));
+			LOGI("data type : %d:%s", info.dataType, info.dataType == 0  ? "FLOAT32" : "UINT8");
+
+			const char *colorSpace = static_cast<const char*>(json_object_get_string_member(pObject,"color_space"));
+			info.colorSpace = ConvertTypeToMD(std::string(colorSpace));
+			LOGI("color space : %d:%s", info.colorSpace, info.colorSpace == MEDIA_VISION_COLORSPACE_RGB888 ? "RGB888" : "");
+
+			// dims
+			JsonArray * array = json_object_get_array_member(pObject, "shape_dims");
+			unsigned int elements2 = json_array_get_length(array);
+			LOGI("shape dim: size[%u]", elements2);
+			for (unsigned int elem2 = 0; elem2 < elements2; ++elem2) {
+				auto dim = static_cast<int>(json_array_get_int_element(array, elem2));
+				info.dims.push_back(dim);
+				LOGI("%d", dim);
+			}
+
+			layer.insert(std::make_pair(info.name, info));
+		}
+
+		LOGI("LEAVE");
+
+		return MEDIA_VISION_ERROR_NONE;
+	}
+
+	int InputMetadata::GetPreProcess(JsonObject *root)
+	{
+		LOGI("ENTER");
+
+		if (json_object_has_member(root, "preprocess") == false) {
+			LOGE("No preprocess inputmetadata");
+			return MEDIA_VISION_ERROR_INVALID_OPERATION;
+		}
+
+		// preprocess
+		JsonArray * rootArray = json_object_get_array_member(root, "preprocess");
+		unsigned int elements = json_array_get_length(rootArray);
+
+		std::map<std::string, Options>().swap(option);
+		// TODO: iterLayer should be the same with elements.
+		auto iterLayer = layer.begin();
+		// TODO: handling error
+		for (unsigned int elem = 0; elem < elements; ++elem, ++iterLayer) {
+			Options opt;
+			JsonNode *pNode = json_array_get_element(rootArray, elem);
+			JsonObject *pObject = json_node_get_object(pNode);
+
+			// normalization
+			if (json_object_has_member(pObject, "normalization")) {
+				JsonArray * array = json_object_get_array_member(pObject, "normalization");
+				JsonNode *  node = json_array_get_element(array, 0);
+				JsonObject * object = json_node_get_object(node);
+
+				opt.normalization.use = true;
+				LOGI("use normalization");
+
+				JsonArray * arrayMean = json_object_get_array_member(object, "mean");
+				JsonArray * arrayStd = json_object_get_array_member(object, "std");
+				unsigned int elemMean = json_array_get_length(arrayMean);
+				unsigned int elemStd = json_array_get_length(arrayStd);
+				if (elemMean != elemStd) {
+					LOGE("Invalid mean and std values");
+					return MEDIA_VISION_ERROR_INVALID_OPERATION;
+				}
+
+				for (unsigned int elem = 0; elem < elemMean; ++elem) {
+					auto m = static_cast<double>(json_array_get_double_element(arrayMean, elem));
+					auto s = static_cast<double>(json_array_get_double_element(arrayStd, elem));
+					opt.normalization.mean.push_back(m);
+					opt.normalization.std.push_back(s);
+					LOGI("%u: mean[%3.2f], std[%3.2f]", elem, m, s);
+				}
+			}
+
+			if (json_object_has_member(pObject, "quantization")) {
+				JsonArray * array = json_object_get_array_member(pObject, "quantization");
+				JsonNode *  node = json_array_get_element(array, 0);
+				JsonObject * object = json_node_get_object(node);
+
+				opt.quantization.use = true;
+				LOGI("use quantization");
+
+				JsonArray * arrayScale = json_object_get_array_member(object, "scale");
+				JsonArray * arrayZero = json_object_get_array_member(object, "zeropoint");
+				unsigned int elemScale = json_array_get_length(arrayScale);
+				unsigned int elemZero= json_array_get_length(arrayZero);
+				if (elemScale != elemZero) {
+					LOGE("Invalid scale and zero values");
+					return MEDIA_VISION_ERROR_INVALID_OPERATION;
+				}
+
+				for (unsigned int elem = 0; elem < elemScale; ++elem) {
+					auto s = static_cast<double>(json_array_get_double_element(arrayScale, elem));
+					auto z = static_cast<double>(json_array_get_double_element(arrayZero, elem));
+					opt.quantization.scale.push_back(s);
+					opt.quantization.zeropoint.push_back(z);
+					LOGI("%u: scale[%3.2f], zeropoint[%3.2f]", elem, s, z);
+				}
+			}
+			option.insert(std::make_pair(iterLayer->first, opt));
+		}
+
+		LOGI("LEAVE");
+
+		return MEDIA_VISION_ERROR_NONE;
+	}
+
+	int InputMetadata::Parse(JsonObject *root)
+	{
+		LOGI("ENTER");
+
+		int ret = GetTensorInfo(root);
+		if (ret != MEDIA_VISION_ERROR_NONE) {
+			LOGE("Fail to GetTensorInfo[%d]", ret);
+			return ret;
+		}
+
+		ret = GetPreProcess(root);
+		if (ret != MEDIA_VISION_ERROR_NONE)	 {
+			LOGE("Fail to GetPreProcess[%d]", ret);
+			return ret;
+		}
+
+		parsed = true;
+		LOGI("LEAVE");
+
+		return MEDIA_VISION_ERROR_NONE;
+	}
+
+	int LayerInfo::GetWidth() const {
+		if (shapeType == INFERENCE_TENSOR_SHAPE_NCHW) {
+			return dims[3];
+		} else { // INFERENCE_TENSOR_SHAPE_NWHC
+			return dims[1];
+		}
+	}
+
+	int LayerInfo::GetHeight() const {
+		if (shapeType == INFERENCE_TENSOR_SHAPE_NCHW) {
+			return dims[2];
+		} else { // INFERENCE_TENSOR_SHAPE_NWHC
+			return dims[2];
+		}
+	}
+
+	int LayerInfo::GetChannel() const {
+		if (shapeType == INFERENCE_TENSOR_SHAPE_NCHW) {
+			return dims[1];
+		} else { // INFERENCE_TENSOR_SHAPE_NWHC
+			return dims[3];
+		}
+	}
+
+} /* Inference */
+} /* MediaVision */
diff --git a/mv_inference/inference/src/Metadata.cpp b/mv_inference/inference/src/Metadata.cpp
new file mode 100644
index 0000000..bb42557
--- /dev/null
+++ b/mv_inference/inference/src/Metadata.cpp
@@ -0,0 +1,121 @@
+/**
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mv_private.h"
+#include "Metadata.h"
+
+#include <map>
+
+#include <unistd.h>
+#include <fstream>
+#include <string>
+#include <queue>
+#include <algorithm>
+
+namespace mediavision
+{
+namespace inference
+{
+	int Metadata::Init(const std::string& filename)
+	{
+		LOGI("ENTER");
+
+		if (access(filename.c_str(), F_OK | R_OK)) {
+			LOGE("meta file is in [%s] ", filename.c_str());
+			return MEDIA_VISION_ERROR_INVALID_PATH;
+		}
+
+		mMetafile = filename;
+
+		LOGI("LEAVE");
+
+		return MEDIA_VISION_ERROR_NONE;
+	}
+
+	int Metadata::Parse()
+	{
+		LOGI("ENTER");
+
+		if (mMetafile.empty()) {
+			LOGE("meta file is empty");
+			return MEDIA_VISION_ERROR_INVALID_OPERATION;
+		}
+
+		GError *error = NULL;
+		JsonNode *node = NULL;
+		JsonObject *object = NULL;
+		int ret = MEDIA_VISION_ERROR_NONE;
+
+		JsonParser *parser = json_parser_new();
+		if (parser == NULL) {
+			LOGE("Fail to create json parser");
+			return MEDIA_VISION_ERROR_OUT_OF_MEMORY;
+		}
+
+		gboolean jsonRet = json_parser_load_from_file(parser, mMetafile.c_str(), &error);
+		if (!jsonRet) {
+			LOGE("Unable to parser file %s by %s", mMetafile.c_str(),
+								error == NULL ? "Unknown" : error->message);
+			g_error_free(error);
+			ret =  MEDIA_VISION_ERROR_INVALID_DATA;
+			goto _ERROR_;
+		}
+
+		node = json_parser_get_root(parser);
+		if (JSON_NODE_TYPE(node) != JSON_NODE_OBJECT) {
+			LOGE("Fail to json_parser_get_root. It's an incorrect markup");
+			ret =  MEDIA_VISION_ERROR_INVALID_DATA;
+			goto _ERROR_;
+		}
+
+		object = json_node_get_object(node);
+		if (!object) {
+			LOGE("Fail to json_node_get_object. object is NULL");
+			ret =  MEDIA_VISION_ERROR_INVALID_DATA;
+			goto _ERROR_;
+		}
+
+		ret = mInputMeta.Parse(json_object_get_object_member(object, "inputmetadata"));
+		if (ret != MEDIA_VISION_ERROR_NONE) {
+			LOGE("Fail to parse input Meta[%d]",ret);
+			goto _ERROR_;
+		}
+
+		ret = mOutputMeta.Parse(json_object_get_object_member(object, "outputmetadata"));
+		if (ret != MEDIA_VISION_ERROR_NONE) {
+			LOGE("Fail to parse output meta[%d]",ret);
+			goto _ERROR_;
+		}
+
+	_ERROR_ :
+		g_object_unref(parser);
+		parser = NULL;
+		LOGI("LEAVE");
+
+		return ret;
+	}
+
+	const InputMetadata& Metadata::GetInputMeta()
+	{
+		return mInputMeta;
+	}
+
+	const OutputMetadata& Metadata::GetOutputMeta()
+	{
+		return mOutputMeta;
+	}
+} /* Inference */
+} /* MediaVision */
diff --git a/mv_inference/inference/src/OutputMetadata.cpp b/mv_inference/inference/src/OutputMetadata.cpp
new file mode 100644
index 0000000..36fe1e7
--- /dev/null
+++ b/mv_inference/inference/src/OutputMetadata.cpp
@@ -0,0 +1,112 @@
+/**
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mv_private.h"
+
+#include <unistd.h>
+#include <fstream>
+#include <string>
+#include <queue>
+#include <algorithm>
+#include "OutputMetadata.h"
+
+namespace mediavision
+{
+namespace inference
+{
+	int OutputMetadata::GetScore(JsonObject *root)
+	{
+		LOGI("ENTER");
+
+		if (json_object_has_member(root, "score") == false) {
+			LOGI("No score outputmetadata");
+			LOGI("LEAVE");
+			return MEDIA_VISION_ERROR_NONE;
+		}
+
+		// score
+		JsonArray * rootArray = json_object_get_array_member(root, "score");
+		unsigned int elements = json_array_get_length(rootArray);
+
+		// TODO: handling error
+		// FIXEME: ScoreInfo.set()??
+		for (unsigned int elem = 0; elem < elements; ++elem) {
+
+			JsonNode *pNode = json_array_get_element(rootArray, elem);
+			JsonObject *pObject = json_node_get_object(pNode);
+
+			score.name =
+						static_cast<const char*>(json_object_get_string_member(pObject,"name"));
+			LOGI("layer: %s", score.name.c_str());
+
+			JsonArray * array = json_object_get_array_member(pObject, "index");
+			unsigned int elements2 = json_array_get_length(array);
+			LOGI("range dim: size[%u]", elements2);
+			for (unsigned int elem2 = 0; elem2 < elements2; ++elem2) {
+				auto index = static_cast<int>(json_array_get_int_element(array, elem2));
+				score.dimInfo.index.push_back(index);
+				LOGI("%d", index);
+			}
+
+			score.topNumber = static_cast<int>(json_object_get_int_member(pObject, "top_number"));
+			LOGI("top number: %d", score.topNumber);
+
+			score.threshold = static_cast<double>(json_object_get_double_member(pObject, "threshold"));
+			LOGI("threshold: %1.3f", score.threshold);
+
+			score.type = static_cast<int>(json_object_get_int_member(pObject, "score_type"));
+			LOGI("score type: %d", score.type);
+		}
+
+		LOGI("LEAVE");
+		return MEDIA_VISION_ERROR_NONE;
+	}
+
+	int OutputMetadata::Parse(JsonObject *root)
+	{
+		LOGI("ENTER");
+
+		int ret = GetScore(root);
+		if (ret != MEDIA_VISION_ERROR_NONE) {
+			LOGE("Fail to GetScore[%d]", ret);
+			return ret;
+		}
+
+		parsed = true;
+
+		LOGI("LEAVE");
+
+		return MEDIA_VISION_ERROR_NONE;
+	}
+
+	int ScoreInfo::GetIndex() const
+	{
+		LOGI("ENTER");
+
+		int ret = 0;
+		for (auto& index : dimInfo.index) {
+			if (index > 0) {
+				break;
+			}
+			ret++;
+		}
+
+		LOGI("LEAVE");
+
+		return ret;
+	}
+} /* Inference */
+} /* MediaVision */
diff --git a/mv_inference/inference/src/PostProcess.cpp b/mv_inference/inference/src/PostProcess.cpp
new file mode 100644
index 0000000..a135097
--- /dev/null
+++ b/mv_inference/inference/src/PostProcess.cpp
@@ -0,0 +1,84 @@
+/**
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mv_private.h"
+
+#include <unistd.h>
+#include <fstream>
+#include <string>
+#include <cmath>
+#include "PostProcess.h"
+
+namespace mediavision
+{
+namespace inference
+{
+	float PostProcess::sigmoid(float value)
+	{
+		LOGI("ENTER");
+
+		LOGI("LEAVE");
+		return 1.0/(1.0+ exp(-value));
+
+	}
+
+	int PostProcess::ScoreClear(int size)
+	{
+		LOGI("ENTER");
+
+		std::priority_queue<std::pair<float, int>,
+							std::vector<std::pair<float, int>>,
+							std::greater<std::pair<float, int>>>().swap(mScore);
+		mMaxScoreSize = size;
+
+		LOGI("LEAVE");
+
+		return MEDIA_VISION_ERROR_NONE;
+	}
+
+	int PostProcess::ScorePush(float value, int index)
+	{
+		LOGI("ENTER");
+
+		mScore.push(std::pair<float, int>(value, index));
+		if (mScore.size() > mMaxScoreSize) {
+			mScore.pop();
+		}
+
+		LOGI("LEAVE");
+
+		return MEDIA_VISION_ERROR_NONE;
+	}
+
+	int PostProcess::ScorePop(std::vector<std::pair<float, int>>& top)
+	{
+		LOGI("ENTER");
+
+		top.clear();
+		while (mScore.empty() == false) {
+			top.push_back(mScore.top());
+			LOGI("%.3f", mScore.top().first);
+			mScore.pop();
+		}
+
+		std::reverse(top.begin(), top.end());
+
+		LOGI("LEAVE");
+
+		return MEDIA_VISION_ERROR_NONE;
+	}
+} /* Inference */
+} /* MediaVision */
diff --git a/mv_inference/inference/src/PreProcess.cpp b/mv_inference/inference/src/PreProcess.cpp
new file mode 100644
index 0000000..fa65ced
--- /dev/null
+++ b/mv_inference/inference/src/PreProcess.cpp
@@ -0,0 +1,130 @@
+/**
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mv_private.h"
+
+#include <unistd.h>
+#include <fstream>
+#include <string>
+#include <queue>
+#include <algorithm>
+#include "PreProcess.h"
+
+const int colorConvertTable[][12] = {
+	{ 0,  0,                  0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+	{ 0, -1,                  0, 0, 0, 0, 0, 0, 0, cv::COLOR_GRAY2BGR565, cv::COLOR_GRAY2RGB,     cv::COLOR_GRAY2RGBA },
+	{ 0, cv::COLOR_YUV2GRAY_I420, -1, 0, 0, 0, 0, 0, 0, 0,                cv::COLOR_RGBA2GRAY,    cv::COLOR_YUV2RGBA_I420 },
+	{ 0, cv::COLOR_YUV2GRAY_NV12, 0, -1, 0, 0, 0, 0, 0, 0,                cv::COLOR_YUV2RGB_NV12, cv::COLOR_YUV2RGBA_NV12 },
+	{ 0, cv::COLOR_YUV2GRAY_YV12, 0, 0, -1, 0, 0, 0, 0, 0,                cv::COLOR_YUV2RGB_YV12, cv::COLOR_YUV2RGBA_YV12 },
+	{ 0, cv::COLOR_YUV2GRAY_NV21, 0, 0, 0, -1, 0, 0, 0, 0,                cv::COLOR_YUV2RGB_NV21, cv::COLOR_YUV2RGBA_NV21 },
+	{ 0, cv::COLOR_YUV2GRAY_YUYV, 0, 0, 0, 0, -1, 0, 0, 0,                cv::COLOR_YUV2RGB_YUYV, cv::COLOR_YUV2RGBA_YUYV },
+	{ 0, cv::COLOR_YUV2GRAY_UYVY, 0, 0, 0, 0, 0, -1, 0, 0,                cv::COLOR_YUV2BGR_UYVY, cv::COLOR_YUV2BGRA_UYVY },
+	{ 0, cv::COLOR_YUV2GRAY_Y422, 0, 0, 0, 0, 0, 0, -1, 0,                cv::COLOR_YUV2RGB_Y422, cv::COLOR_YUV2RGBA_Y422 },
+	{ 0, cv::COLOR_BGR5652GRAY,   0, 0, 0, 0, 0, 0, 0, -1,                cv::COLOR_BGR5652BGR,   cv::COLOR_BGR5652BGRA   },
+	{ 0, cv::COLOR_RGB2GRAY,      0, 0, 0, 0, 0, 0, 0,  0,               -1,                      cv::COLOR_RGB2RGBA      },
+	{ 0, cv::COLOR_RGBA2GRAY,     0, 0, 0, 0, 0, 0, 0, cv::COLOR_BGRA2BGR565, cv::COLOR_RGBA2RGB,   -1}
+};
+
+namespace mediavision
+{
+namespace inference
+{
+	int PreProcess::Resize(cv::Mat& source, cv::Mat& dest, cv::Size size)
+	{
+		LOGI("ENTER");
+
+		try {
+			cv::resize(source, dest, size);
+		} catch (cv::Exception& e) {
+			LOGE("Fail to resize with msg: %s", e.what());
+			return MEDIA_VISION_ERROR_INVALID_OPERATION;
+		}
+
+		LOGI("LEAVE");
+
+		return MEDIA_VISION_ERROR_NONE;
+	}
+
+
+	int PreProcess::ColorConvert(cv::Mat& source, cv::Mat& dest, int sType, int dType)
+	{
+		LOGI("ENTER");
+
+		auto conversionColor = static_cast<int>(colorConvertTable[sType][dType]);
+		if (conversionColor == -1) {/* Don't need conversion */
+			dest = source;
+		} else if (conversionColor > 0) {
+			/* Class for representation the given image as cv::Mat before conversion */
+			cv::cvtColor(source, dest, conversionColor);
+		} else {
+			LOGE("Fail to ColorConvert");
+			return MEDIA_VISION_ERROR_INVALID_OPERATION;
+		}
+
+		LOGI("LEAVE");
+
+		return MEDIA_VISION_ERROR_NONE;
+	}
+
+	int PreProcess::Normalize(cv::Mat& source, cv::Mat& dest,
+							const std::vector<double>& mean, const std::vector<double>& std)
+	{
+		LOGI("ENTER");
+		try {
+			cv::subtract(source, cv::Scalar(mean[0], mean[1], mean[2]), dest);
+			source = dest;
+			cv::divide(source, cv::Scalar(std[0], std[1], std[2]), dest);
+		} catch (cv::Exception& e) {
+			LOGE("Fail to substract/divide with msg: %s", e.what());
+			return MEDIA_VISION_ERROR_INVALID_OPERATION;
+		}
+
+		LOGI("LEAVE");
+
+		return MEDIA_VISION_ERROR_NONE;
+	}
+
+	int PreProcess::Run(cv::Mat& source, const int colorSpace,
+							const int dataType, const LayerInfo& layerInfo,
+							const Options& options, void* buffer)
+	{
+		LOGI("ENTER");
+
+		// dest is a wrapper of the buffer
+		cv::Mat dest(cv::Size(layerInfo.GetWidth(), layerInfo.GetHeight()),
+					dataType, buffer);
+
+		cv::Mat cvSource, cvDest;
+		// cvSource has new allocation with dest.size()
+		Resize(source, cvSource, dest.size());
+
+		// cvDest has new allocation if it's colorSpace is not RGB888
+		// cvDest share the data with cvSource it's colorSpace is RGB888
+		ColorConvert(cvSource, cvDest, colorSpace, layerInfo.colorSpace);
+
+		cvDest.convertTo(dest, dest.type());
+
+		if (options.normalization.use) {
+			Normalize(dest, dest, options.normalization.mean, options.normalization.std);
+		}
+
+		LOGI("LEAVE");
+
+		return MEDIA_VISION_ERROR_NONE;
+	}
+
+} /* Inference */
+} /* MediaVision */
diff --git a/mv_inference/inference/src/mv_inference_open.cpp b/mv_inference/inference/src/mv_inference_open.cpp
index c2011b4..1c4eb7e 100644
--- a/mv_inference/inference/src/mv_inference_open.cpp
+++ b/mv_inference/inference/src/mv_inference_open.cpp
@@ -127,6 +127,9 @@ int mv_inference_configure_model_open(mv_inference_h infer,
 	int backendType = 0;
 	size_t userFileLength = 0;
 
+	// TODO: a temporal variable, later, it should be removed.
+	std::string metaFilePath;
+
 	ret = mv_engine_config_get_string_attribute(
 			engine_config, MV_INFERENCE_MODEL_CONFIGURATION_FILE_PATH,
 			&modelConfigFilePath);
@@ -195,6 +198,16 @@ int mv_inference_configure_model_open(mv_inference_h infer,
 	pInfer->ConfigureModelFiles(std::string(modelConfigFilePath),
 								std::string(modelWeightFilePath),
 								std::string(modelUserFilePath));
+	/* FIXME
+	 * temporal code lines to get a metafile, which has the same name
+	 * with modelsWeightFilePath except the extension.
+	 * Later, it should get a metafilename and the below lines should be
+	 * removed.
+	 */
+	metaFilePath = std::string(modelWeightFilePath).substr(0,
+					std::string(modelWeightFilePath).find_last_of('.')) + ".json";
+	LOGI("metaFilePath: %s", metaFilePath.c_str());
+	pInfer->ParseMetadata(metaFilePath);
 
 _ERROR_:
 	if (modelConfigFilePath)
diff --git a/packaging/capi-media-vision.spec b/packaging/capi-media-vision.spec
index 03b9a3f..1d72b2d 100644
--- a/packaging/capi-media-vision.spec
+++ b/packaging/capi-media-vision.spec
@@ -1,7 +1,7 @@
 Name:        capi-media-vision
 Summary:     Media Vision library for Tizen Native API
 Version:     0.7.0
-Release:     2
+Release:     3
 Group:       Multimedia/Framework
 License:     Apache-2.0 and BSD-3-Clause
 Source0:     %{name}-%{version}.tar.gz
-- 
2.7.4