From c9690688cc66286ef765d3115ec886d3f254ef9a Mon Sep 17 00:00:00 2001 From: Tae-Young Chung Date: Mon, 5 Apr 2021 17:31:48 +0900 Subject: [PATCH] Add dequantization to postprocess and fix bug when there is no preprocess Change-Id: I8bb520a7977ab61e9a8c4ecfc3349197a134b04a Signed-off-by: Tae-Young Chung --- meta-template/README.md | 56 ++++++++++++++++++++++ meta-template/image-classification-001-meta.json | 2 +- .../image-classification-quant-001-meta.json | 32 +++++++++++++ mv_inference/inference/include/Inference.h | 2 + mv_inference/inference/include/OutputMetadata.h | 12 +++++ mv_inference/inference/include/PostProcess.h | 1 + mv_inference/inference/src/Inference.cpp | 48 +++++++++++++++---- mv_inference/inference/src/InputMetadata.cpp | 4 +- mv_inference/inference/src/OutputMetadata.cpp | 10 ++++ mv_inference/inference/src/PostProcess.cpp | 10 +++- 10 files changed, 165 insertions(+), 12 deletions(-) create mode 100644 meta-template/image-classification-quant-001-meta.json diff --git a/meta-template/README.md b/meta-template/README.md index 3d5d69a..e6927d6 100644 --- a/meta-template/README.md +++ b/meta-template/README.md @@ -45,6 +45,7 @@ The Meta file consists of 1) inputmetadata and 2) outputmetadata. For example, a In the classification meta file, the `inputmetadata` includes `tensor_info` which has knowledge of an input tensor such as + - `name`: name to an input tensor - `shape_type`: shape type of the input tensor on `NCHW = 0` and `NHWC = 1` - `shape_dims`: shape dimensions based on the `shape_type` @@ -52,12 +53,14 @@ In the classification meta file, the `inputmetadata` includes - `color_space`: color space of the input tensor. `preprocess` which has information of preprocess such as + - `normalization`: nVal = (fVal - `mean`)/`std`, which nVal is normalized value and fVal is input value - `mean`: mean values of the input tensor - `std` : standard deviation values of the input tensor The `outputmetadata` includes `score` which has information of postprocess to get score such as + - `name`: name to an output tensor for score - `index`: index to get score from the output tensor - `top_number`: the top number of outputs @@ -66,3 +69,56 @@ The `outputmetadata` includes The classification meta file, thus, illustrates that the model has an input which is named of `input_2`, `NHWC` shape type with `[1, 224, 224, 3]` dimensions, `MV_INFERENCE_DATA_FLOAT32` data type, and `RGB888` color space. It requires normalization with mean `[127.5, 127.5, 127.5]` and standard deviation `[127.5, 127.5, 127.5]`. But it doesn't apply quantization. The meta file illustrates that the model has an ouput which is named of `dense_3/Softmax`. The tensor is 2-dimensional and its' 2nd index corresponds to the score. In addition, the score is just between 0 ~ 1. The score under `threshold` 0.3 should be thrown out and the `top_number` of outputs should be given as results. + +A meta file, however, for classification with quantized model is shown below. + +``` +{ + "inputmetadata" : + { + "tensor_info" : [ + { + "name" : "input", + "shape_type" : 1, + "shape_dims" : [ 1, 224, 224, 3], + "data_type" : 1, + "color_space" : "RGB888" + } + ] + }, + "outputmetadata" : + { + "score" : [ + { + "name" : "MobilenetV1/Predictions/Reshape_1", + "index" : [-1, 1], + "top_number" : 5, + "threshold" : 0.3, + "score_type" : 0, + "dequantization" : [ + { + "scale" : 255.0, + "zeropoint" : 0.0 + } + ] + } + ] + } +} + +``` + +In the classification meta file, the `inputmetadata` includes +`tensor_info` only because it requires any preprocess such as normalization and quantization. + +But, the `outputmetadata` includes additional `dequantization` such as + +- `scale`: scale value (1.0 ~ 255.0) +- `zeropoint`: zeropoint value + +in `score`. You can get real value `value` : + +* `value` = `value8` / `scale`+ `zeropoint` + +The classification meta file, thus, illustrates that the model has an input which is named of `input`, `NHWC` shape type with `[1, 224, 224, 3]` dimensions, `MV_INFERENCE_DATA_UINT8` data type, and `RGB888` color space. It requires any preprocess. +The meta file illustrates that the model has an ouput which is named of `MobilenetV1/Predictions/Reshape_1`. The tensor is 2-dimensional and its' 2nd index corresponds to the score. In addition, the score is just between 0 ~ 1, but the value requires dequantization with scale and zeropoint values. The score after dequantizing under `threshold`0.3 should be thrown out and the `top_number` of outputs should be given as results. diff --git a/meta-template/image-classification-001-meta.json b/meta-template/image-classification-001-meta.json index 5cb7bd5..151908c 100644 --- a/meta-template/image-classification-001-meta.json +++ b/meta-template/image-classification-001-meta.json @@ -27,7 +27,7 @@ { "name" : "dense_3/Softmax", "index" : [-1, 1], - "top_number" : 5, + "top_number" : 5, "threshold" : 0.3, "score_type" : 0 } diff --git a/meta-template/image-classification-quant-001-meta.json b/meta-template/image-classification-quant-001-meta.json new file mode 100644 index 0000000..9a44eab --- /dev/null +++ b/meta-template/image-classification-quant-001-meta.json @@ -0,0 +1,32 @@ +{ + "inputmetadata" : + { + "tensor_info" : [ + { + "name" : "input", + "shape_type" : 1, + "shape_dims" : [ 1, 224, 224, 3], + "data_type" : 1, + "color_space" : "RGB888" + } + ] + }, + "outputmetadata" : + { + "score" : [ + { + "name" : "MobilenetV1/Predictions/Reshape_1", + "index" : [-1, 1], + "top_number" : 5, + "threshold" : 0.3, + "score_type" : 0, + "dequantization" : [ + { + "scale" : 255.0, + "zeropoint" : 0.0 + } + ] + } + ] + } +} diff --git a/mv_inference/inference/include/Inference.h b/mv_inference/inference/include/Inference.h index c86e7e1..16816a8 100644 --- a/mv_inference/inference/include/Inference.h +++ b/mv_inference/inference/include/Inference.h @@ -377,6 +377,8 @@ namespace inference void CleanupTensorBuffers(void); int SetUserFile(std::string filename); int FillOutputResult(tensor_t &outputData); + + float getValFloat(inference_engine_tensor_buffer& buffer, int idx); }; } /* Inference */ diff --git a/mv_inference/inference/include/OutputMetadata.h b/mv_inference/inference/include/OutputMetadata.h index b687917..107f7c2 100644 --- a/mv_inference/inference/include/OutputMetadata.h +++ b/mv_inference/inference/include/OutputMetadata.h @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -40,6 +41,16 @@ namespace inference std::vector index; }; + class DeQuantization + { + public: + double scale; + double zeropoint; + + DeQuantization(double s, double z) : scale(s), zeropoint(z) {}; + ~DeQuantization() = default; + }; + class ScoreInfo { public: @@ -48,6 +59,7 @@ namespace inference double threshold; int type; int topNumber; + std::unique_ptr deQuantization; public: ScoreInfo() = default; diff --git a/mv_inference/inference/include/PostProcess.h b/mv_inference/inference/include/PostProcess.h index 9f77a9f..b9b4cd6 100644 --- a/mv_inference/inference/include/PostProcess.h +++ b/mv_inference/inference/include/PostProcess.h @@ -62,6 +62,7 @@ namespace inference * @since_tizen 6.5 */ static float sigmoid(float value); + static float dequant(float value, float scale, float zeropoint); int ScoreClear(int size); int ScorePush(float value, int index); diff --git a/mv_inference/inference/src/Inference.cpp b/mv_inference/inference/src/Inference.cpp index 48acde8..60f70c3 100644 --- a/mv_inference/inference/src/Inference.cpp +++ b/mv_inference/inference/src/Inference.cpp @@ -389,7 +389,6 @@ namespace inference if (inputMeta.parsed) { LOGI("use input meta"); auto& layerInfo = inputMeta.layer.begin()->second; - auto& option = inputMeta.option.begin()->second; if (layerInfo.shapeType == INFERENCE_TENSOR_SHAPE_NCHW) { // NCHW mConfig.mTensorInfo.ch = layerInfo.dims[1]; mConfig.mTensorInfo.dim = layerInfo.dims[0]; @@ -404,9 +403,12 @@ namespace inference LOGE("Invalid shape type[%d]", layerInfo.shapeType); } - if (option.normalization.use) { - mConfig.mMeanValue = option.normalization.mean[0]; - mConfig.mStdValue = option.normalization.std[0]; + if (!inputMeta.option.empty()) { + auto& option = inputMeta.option.begin()->second; + if (option.normalization.use) { + mConfig.mMeanValue = option.normalization.mean[0]; + mConfig.mStdValue = option.normalization.std[0]; + } } mConfig.mDataType = layerInfo.dataType; @@ -1136,7 +1138,7 @@ namespace inference for (auto& buffer : mInputTensorBuffers) { inference_engine_tensor_buffer& tensor_buffer = buffer.second; const LayerInfo& layerInfo = inputMeta.layer.at(buffer.first); - const Options& opt = inputMeta.option.at(buffer.first); + const Options& opt = inputMeta.option.empty() ? Options() : inputMeta.option.at(buffer.first); int data_type = ConvertToCv(tensor_buffer.data_type); @@ -1172,6 +1174,31 @@ namespace inference return mSupportedInferenceBackend[backend]; } + float Inference::getValFloat(inference_engine_tensor_buffer& buffer, int idx) + { + switch (buffer.data_type) + { + case INFERENCE_TENSOR_DATA_TYPE_FLOAT32: + return static_cast(buffer.buffer)[idx]; + case INFERENCE_TENSOR_DATA_TYPE_INT64: + return static_cast( + static_cast(buffer.buffer)[idx]); + case INFERENCE_TENSOR_DATA_TYPE_UINT32: + return static_cast( + static_cast(buffer.buffer)[idx]); + case INFERENCE_TENSOR_DATA_TYPE_UINT8: + return static_cast( + static_cast(buffer.buffer)[idx]); + case INFERENCE_TENSOR_DATA_TYPE_UINT16: + return static_cast( + static_cast(buffer.buffer)[idx]); + default: + break; + } + + return 0.0f; + } + int Inference::GetClassficationResults( ImageClassificationResults *classificationResults) { @@ -1183,16 +1210,21 @@ namespace inference int index = info.GetIndex(); int classes = mOutputLayerProperty.layers[info.name].shape[index]; - float *output = static_cast(mOutputTensorBuffers[info.name].buffer); - if (output == NULL) { + + if (mOutputTensorBuffers[info.name].buffer == NULL) { LOGE("output buffe is NULL"); return MEDIA_VISION_ERROR_INVALID_OPERATION; } mPostProc.ScoreClear(info.topNumber); for (int cId = 0; cId < classes; ++cId) { - value = output[cId]; + value = getValFloat(mOutputTensorBuffers[info.name], cId); + if (info.deQuantization) { + value = PostProcess::dequant(value, + info.deQuantization->scale, + info.deQuantization->zeropoint); + } if (info.type == 1) { value = PostProcess::sigmoid(value); } diff --git a/mv_inference/inference/src/InputMetadata.cpp b/mv_inference/inference/src/InputMetadata.cpp index 4084328..28edbe3 100644 --- a/mv_inference/inference/src/InputMetadata.cpp +++ b/mv_inference/inference/src/InputMetadata.cpp @@ -107,8 +107,8 @@ namespace inference LOGI("ENTER"); if (json_object_has_member(root, "preprocess") == false) { - LOGE("No preprocess inputmetadata"); - return MEDIA_VISION_ERROR_INVALID_OPERATION; + LOGI("No preprocess inputmetadata"); + return MEDIA_VISION_ERROR_NONE; } // preprocess diff --git a/mv_inference/inference/src/OutputMetadata.cpp b/mv_inference/inference/src/OutputMetadata.cpp index 36fe1e7..621b67b 100644 --- a/mv_inference/inference/src/OutputMetadata.cpp +++ b/mv_inference/inference/src/OutputMetadata.cpp @@ -69,6 +69,16 @@ namespace inference score.type = static_cast(json_object_get_int_member(pObject, "score_type")); LOGI("score type: %d", score.type); + + if (json_object_has_member(pObject, "dequantization")) { + array = json_object_get_array_member(pObject, "dequantization"); + JsonNode *node = json_array_get_element(array, 0); + JsonObject *object = json_node_get_object(node); + + score.deQuantization = std::make_unique( + json_object_get_double_member(object, "scale"), + json_object_get_double_member(object, "zeropoint")); + } } LOGI("LEAVE"); diff --git a/mv_inference/inference/src/PostProcess.cpp b/mv_inference/inference/src/PostProcess.cpp index a135097..f1faff9 100644 --- a/mv_inference/inference/src/PostProcess.cpp +++ b/mv_inference/inference/src/PostProcess.cpp @@ -35,6 +35,14 @@ namespace inference } + float PostProcess::dequant(float value, float scale, float zeropoint) + { + LOGI("ENTER"); + + LOGI("LEAVE"); + return value/scale + zeropoint; + } + int PostProcess::ScoreClear(int size) { LOGI("ENTER"); @@ -68,7 +76,7 @@ namespace inference LOGI("ENTER"); top.clear(); - while (mScore.empty() == false) { + while (!mScore.empty()) { top.push_back(mScore.top()); LOGI("%.3f", mScore.top().first); mScore.pop(); -- 2.7.4