From c9690688cc66286ef765d3115ec886d3f254ef9a Mon Sep 17 00:00:00 2001
From: Tae-Young Chung <ty83.chung@samsung.com>
Date: Mon, 5 Apr 2021 17:31:48 +0900
Subject: [PATCH] Add dequantization to postprocess and fix bug when there is
 no preprocess

Change-Id: I8bb520a7977ab61e9a8c4ecfc3349197a134b04a
Signed-off-by: Tae-Young Chung <ty83.chung@samsung.com>
---
 meta-template/README.md                            | 56 ++++++++++++++++++++++
 meta-template/image-classification-001-meta.json   |  2 +-
 .../image-classification-quant-001-meta.json       | 32 +++++++++++++
 mv_inference/inference/include/Inference.h         |  2 +
 mv_inference/inference/include/OutputMetadata.h    | 12 +++++
 mv_inference/inference/include/PostProcess.h       |  1 +
 mv_inference/inference/src/Inference.cpp           | 48 +++++++++++++++----
 mv_inference/inference/src/InputMetadata.cpp       |  4 +-
 mv_inference/inference/src/OutputMetadata.cpp      | 10 ++++
 mv_inference/inference/src/PostProcess.cpp         | 10 +++-
 10 files changed, 165 insertions(+), 12 deletions(-)
 create mode 100644 meta-template/image-classification-quant-001-meta.json

diff --git a/meta-template/README.md b/meta-template/README.md
index 3d5d69a..e6927d6 100644
--- a/meta-template/README.md
+++ b/meta-template/README.md
@@ -45,6 +45,7 @@ The Meta file consists of 1) inputmetadata and 2) outputmetadata. For example, a
 
 In the classification meta file, the `inputmetadata` includes
 `tensor_info` which has knowledge of an input tensor such as
+
 - `name`: name to an input tensor
 - `shape_type`: shape type of the input tensor on `NCHW = 0` and `NHWC = 1`
 - `shape_dims`: shape dimensions based on the `shape_type`
@@ -52,12 +53,14 @@ In the classification meta file, the `inputmetadata` includes
 - `color_space`: color space of the input tensor.
 
 `preprocess` which has information of preprocess such as
+
 - `normalization`: nVal = (fVal - `mean`)/`std`, which nVal is normalized value and fVal is input value
   - `mean`: mean values of the input tensor
   - `std` : standard deviation values of the input tensor
 
 The `outputmetadata` includes
 `score` which has information of postprocess to get score such as
+
 - `name`: name to an output tensor for score
 - `index`: index to get score from the output tensor
 - `top_number`: the top number of outputs
@@ -66,3 +69,56 @@ The `outputmetadata` includes
 
 The classification meta file, thus, illustrates that the model has an input which is named of `input_2`, `NHWC` shape type with `[1, 224, 224, 3]` dimensions, `MV_INFERENCE_DATA_FLOAT32` data type, and `RGB888` color space. It requires normalization with mean `[127.5, 127.5, 127.5]` and standard deviation `[127.5, 127.5, 127.5]`. But it doesn't apply quantization.
 The meta file illustrates that the model has an ouput which is named of `dense_3/Softmax`. The tensor is 2-dimensional and its' 2nd index corresponds to the score. In addition, the score is just between 0 ~ 1. The score under `threshold` 0.3 should be thrown out and the `top_number` of outputs should be given as results.
+
+A meta file, however, for classification with quantized model is shown below.
+
+```
+{
+    "inputmetadata" :
+    {
+        "tensor_info" : [
+            {
+                "name" : "input",
+                "shape_type" : 1,
+                "shape_dims" : [ 1, 224, 224, 3],
+                "data_type" : 1,
+                "color_space" : "RGB888"
+            }
+        ]
+    },
+    "outputmetadata" :
+    {
+        "score" : [
+            {
+                "name" : "MobilenetV1/Predictions/Reshape_1",
+                "index" : [-1, 1],
+                "top_number" : 5,
+                "threshold" : 0.3,
+                "score_type" : 0,
+                "dequantization" : [
+                    {
+                        "scale" : 255.0,
+                        "zeropoint" : 0.0
+                    }
+                ]
+            }
+        ]
+    }
+}
+
+```
+
+In the classification meta file, the `inputmetadata` includes
+`tensor_info` only because it requires any preprocess such as normalization and quantization.
+
+But, the `outputmetadata` includes additional `dequantization` such as
+
+- `scale`: scale value (1.0 ~ 255.0)
+- `zeropoint`: zeropoint value
+
+in `score`. You can get real value `value` :
+
+* `value` = `value8` / `scale`+ `zeropoint`
+
+The classification meta file, thus, illustrates that the model has an input which is named of `input`, `NHWC` shape type with `[1, 224, 224, 3]` dimensions, `MV_INFERENCE_DATA_UINT8` data type, and `RGB888` color space. It requires any preprocess.
+The meta file illustrates that the model has an ouput which is named of `MobilenetV1/Predictions/Reshape_1`. The tensor is 2-dimensional and its' 2nd index corresponds to the score. In addition, the score is just between 0 ~ 1, but the value requires dequantization with scale and zeropoint values.  The score after dequantizing under `threshold`0.3 should be thrown out and the `top_number` of outputs should be given as results.
diff --git a/meta-template/image-classification-001-meta.json b/meta-template/image-classification-001-meta.json
index 5cb7bd5..151908c 100644
--- a/meta-template/image-classification-001-meta.json
+++ b/meta-template/image-classification-001-meta.json
@@ -27,7 +27,7 @@
             {
                 "name" : "dense_3/Softmax",
                 "index" : [-1, 1],
-				"top_number" : 5,
+                "top_number" : 5,
                 "threshold" : 0.3,
                 "score_type" : 0
             }
diff --git a/meta-template/image-classification-quant-001-meta.json b/meta-template/image-classification-quant-001-meta.json
new file mode 100644
index 0000000..9a44eab
--- /dev/null
+++ b/meta-template/image-classification-quant-001-meta.json
@@ -0,0 +1,32 @@
+{
+    "inputmetadata" :
+    {
+        "tensor_info" : [
+            {
+                "name" : "input",
+                "shape_type" : 1,
+                "shape_dims" : [ 1, 224, 224, 3],
+                "data_type" : 1,
+                "color_space" : "RGB888"
+            }
+        ]
+    },
+    "outputmetadata" :
+    {
+        "score" : [
+            {
+                "name" : "MobilenetV1/Predictions/Reshape_1",
+                "index" : [-1, 1],
+                "top_number" : 5,
+                "threshold" : 0.3,
+                "score_type" : 0,
+                "dequantization" : [
+                    {
+                        "scale" : 255.0,
+                        "zeropoint" : 0.0
+                    }
+                ]
+            }
+        ]
+    }
+}
diff --git a/mv_inference/inference/include/Inference.h b/mv_inference/inference/include/Inference.h
index c86e7e1..16816a8 100644
--- a/mv_inference/inference/include/Inference.h
+++ b/mv_inference/inference/include/Inference.h
@@ -377,6 +377,8 @@ namespace inference
 		void CleanupTensorBuffers(void);
 		int SetUserFile(std::string filename);
 		int FillOutputResult(tensor_t &outputData);
+
+		float getValFloat(inference_engine_tensor_buffer& buffer, int idx);
 	};
 
 } /* Inference */
diff --git a/mv_inference/inference/include/OutputMetadata.h b/mv_inference/inference/include/OutputMetadata.h
index b687917..107f7c2 100644
--- a/mv_inference/inference/include/OutputMetadata.h
+++ b/mv_inference/inference/include/OutputMetadata.h
@@ -20,6 +20,7 @@
 #include <string>
 #include <vector>
 #include <map>
+#include <memory>
 
 #include <mv_inference_type.h>
 #include <json-glib/json-glib.h>
@@ -40,6 +41,16 @@ namespace inference
 		std::vector<int> index;
 	};
 
+	class DeQuantization
+	{
+	public:
+		double scale;
+		double zeropoint;
+
+		DeQuantization(double s, double z) : scale(s), zeropoint(z) {};
+		~DeQuantization() = default;
+	};
+
 	class ScoreInfo
 	{
 	public:
@@ -48,6 +59,7 @@ namespace inference
 		double threshold;
 		int type;
 		int topNumber;
+		std::unique_ptr<DeQuantization> deQuantization;
 
 	public:
 		ScoreInfo() = default;
diff --git a/mv_inference/inference/include/PostProcess.h b/mv_inference/inference/include/PostProcess.h
index 9f77a9f..b9b4cd6 100644
--- a/mv_inference/inference/include/PostProcess.h
+++ b/mv_inference/inference/include/PostProcess.h
@@ -62,6 +62,7 @@ namespace inference
 		 * @since_tizen 6.5
 		 */
 		static float sigmoid(float value);
+		static float dequant(float value, float scale, float zeropoint);
 
 		int ScoreClear(int size);
 		int ScorePush(float value, int index);
diff --git a/mv_inference/inference/src/Inference.cpp b/mv_inference/inference/src/Inference.cpp
index 48acde8..60f70c3 100644
--- a/mv_inference/inference/src/Inference.cpp
+++ b/mv_inference/inference/src/Inference.cpp
@@ -389,7 +389,6 @@ namespace inference
 		if (inputMeta.parsed) {
 			LOGI("use input meta");
 			auto& layerInfo = inputMeta.layer.begin()->second;
-			auto& option = inputMeta.option.begin()->second;
 			if (layerInfo.shapeType == INFERENCE_TENSOR_SHAPE_NCHW) { // NCHW
 				mConfig.mTensorInfo.ch = layerInfo.dims[1];
 				mConfig.mTensorInfo.dim = layerInfo.dims[0];
@@ -404,9 +403,12 @@ namespace inference
 				LOGE("Invalid shape type[%d]", layerInfo.shapeType);
 			}
 
-			if (option.normalization.use) {
-				mConfig.mMeanValue = option.normalization.mean[0];
-				mConfig.mStdValue = option.normalization.std[0];
+			if (!inputMeta.option.empty()) {
+				auto& option = inputMeta.option.begin()->second;
+				if (option.normalization.use) {
+					mConfig.mMeanValue = option.normalization.mean[0];
+					mConfig.mStdValue = option.normalization.std[0];
+				}
 			}
 
 			mConfig.mDataType = layerInfo.dataType;
@@ -1136,7 +1138,7 @@ namespace inference
 			for (auto& buffer : mInputTensorBuffers) {
 				inference_engine_tensor_buffer& tensor_buffer = buffer.second;
 				const LayerInfo& layerInfo = inputMeta.layer.at(buffer.first);
-				const Options& opt = inputMeta.option.at(buffer.first);
+				const Options& opt = inputMeta.option.empty() ? Options() : inputMeta.option.at(buffer.first);
 
 				int data_type = ConvertToCv(tensor_buffer.data_type);
 
@@ -1172,6 +1174,31 @@ namespace inference
 		return mSupportedInferenceBackend[backend];
 	}
 
+	float Inference::getValFloat(inference_engine_tensor_buffer& buffer, int idx)
+	{
+		switch (buffer.data_type)
+		{
+		case INFERENCE_TENSOR_DATA_TYPE_FLOAT32:
+			return static_cast<float*>(buffer.buffer)[idx];
+		case INFERENCE_TENSOR_DATA_TYPE_INT64:
+			return static_cast<float>(
+					static_cast<long long*>(buffer.buffer)[idx]);
+		case INFERENCE_TENSOR_DATA_TYPE_UINT32:
+			return static_cast<float>(
+					static_cast<unsigned int*>(buffer.buffer)[idx]);
+		case INFERENCE_TENSOR_DATA_TYPE_UINT8:
+			return static_cast<float>(
+					static_cast<unsigned char*>(buffer.buffer)[idx]);
+		case INFERENCE_TENSOR_DATA_TYPE_UINT16:
+			return static_cast<float>(
+					static_cast<unsigned short*>(buffer.buffer)[idx]);
+		default:
+			break;
+		}
+
+		return 0.0f;
+	}
+
 	int Inference::GetClassficationResults(
 			ImageClassificationResults *classificationResults)
 	{
@@ -1183,16 +1210,21 @@ namespace inference
 
 			int index = info.GetIndex();
 			int classes = mOutputLayerProperty.layers[info.name].shape[index];
-			float *output = static_cast<float *>(mOutputTensorBuffers[info.name].buffer);
-			if (output == NULL) {
+
+			if (mOutputTensorBuffers[info.name].buffer == NULL) {
 				LOGE("output buffe is NULL");
 				return MEDIA_VISION_ERROR_INVALID_OPERATION;
 			}
 
 			mPostProc.ScoreClear(info.topNumber);
 			for (int cId = 0; cId < classes; ++cId) {
-				value = output[cId];
+				value = getValFloat(mOutputTensorBuffers[info.name], cId);
 
+				if (info.deQuantization) {
+					value = PostProcess::dequant(value,
+											info.deQuantization->scale,
+											info.deQuantization->zeropoint);
+				}
 				if (info.type == 1) {
 					value = PostProcess::sigmoid(value);
 				}
diff --git a/mv_inference/inference/src/InputMetadata.cpp b/mv_inference/inference/src/InputMetadata.cpp
index 4084328..28edbe3 100644
--- a/mv_inference/inference/src/InputMetadata.cpp
+++ b/mv_inference/inference/src/InputMetadata.cpp
@@ -107,8 +107,8 @@ namespace inference
 		LOGI("ENTER");
 
 		if (json_object_has_member(root, "preprocess") == false) {
-			LOGE("No preprocess inputmetadata");
-			return MEDIA_VISION_ERROR_INVALID_OPERATION;
+			LOGI("No preprocess inputmetadata");
+			return MEDIA_VISION_ERROR_NONE;
 		}
 
 		// preprocess
diff --git a/mv_inference/inference/src/OutputMetadata.cpp b/mv_inference/inference/src/OutputMetadata.cpp
index 36fe1e7..621b67b 100644
--- a/mv_inference/inference/src/OutputMetadata.cpp
+++ b/mv_inference/inference/src/OutputMetadata.cpp
@@ -69,6 +69,16 @@ namespace inference
 
 			score.type = static_cast<int>(json_object_get_int_member(pObject, "score_type"));
 			LOGI("score type: %d", score.type);
+
+			if (json_object_has_member(pObject, "dequantization")) {
+				array = json_object_get_array_member(pObject, "dequantization");
+				JsonNode *node = json_array_get_element(array, 0);
+				JsonObject *object = json_node_get_object(node);
+
+				score.deQuantization = std::make_unique<DeQuantization>(
+					json_object_get_double_member(object, "scale"),
+					json_object_get_double_member(object, "zeropoint"));
+			}
 		}
 
 		LOGI("LEAVE");
diff --git a/mv_inference/inference/src/PostProcess.cpp b/mv_inference/inference/src/PostProcess.cpp
index a135097..f1faff9 100644
--- a/mv_inference/inference/src/PostProcess.cpp
+++ b/mv_inference/inference/src/PostProcess.cpp
@@ -35,6 +35,14 @@ namespace inference
 
 	}
 
+	float PostProcess::dequant(float value, float scale, float zeropoint)
+	{
+		LOGI("ENTER");
+
+		LOGI("LEAVE");
+		return value/scale + zeropoint;
+	}
+
 	int PostProcess::ScoreClear(int size)
 	{
 		LOGI("ENTER");
@@ -68,7 +76,7 @@ namespace inference
 		LOGI("ENTER");
 
 		top.clear();
-		while (mScore.empty() == false) {
+		while (!mScore.empty()) {
 			top.push_back(mScore.top());
 			LOGI("%.3f", mScore.top().first);
 			mScore.pop();
-- 
2.7.4