From 3f9233f26ef8b41162391e7049427931f5f2bf0d Mon Sep 17 00:00:00 2001 From: Tae-Young Chung Date: Wed, 17 Mar 2021 16:07:36 +0900 Subject: [PATCH] Add Metadata, PreProcess, PostProcess class Introduce metadata based pre-/post-process. User can set the input/output information of a model to a metadata file(json file) Based on information, input tensor and output tensor are preprocessed and postprocessed, respectively. Metadata class with InputMetadata and OutputMetadata are added. PreProcess and PostProcess class are added. A meta file for classification is added as an example. It can be used as an referece for classification case. Change-Id: Ie86b50ae32bf2f7f5d336fe4709d6ea938930c3d Signed-off-by: Tae-Young Chung --- meta-template/README.md | 68 ++++++ meta-template/image-classification-001-meta.json | 36 +++ mv_inference/inference/CMakeLists.txt | 4 +- mv_inference/inference/include/Inference.h | 14 ++ mv_inference/inference/include/InputMetadata.h | 126 ++++++++++ mv_inference/inference/include/Metadata.h | 90 +++++++ mv_inference/inference/include/OutputMetadata.h | 93 ++++++++ mv_inference/inference/include/PostProcess.h | 82 +++++++ mv_inference/inference/include/PreProcess.h | 77 ++++++ mv_inference/inference/src/Inference.cpp | 290 ++++++++++++++++------- mv_inference/inference/src/InputMetadata.cpp | 234 ++++++++++++++++++ mv_inference/inference/src/Metadata.cpp | 121 ++++++++++ mv_inference/inference/src/OutputMetadata.cpp | 112 +++++++++ mv_inference/inference/src/PostProcess.cpp | 84 +++++++ mv_inference/inference/src/PreProcess.cpp | 130 ++++++++++ mv_inference/inference/src/mv_inference_open.cpp | 13 + packaging/capi-media-vision.spec | 2 +- 17 files changed, 1492 insertions(+), 84 deletions(-) create mode 100644 meta-template/README.md create mode 100644 meta-template/image-classification-001-meta.json create mode 100644 mv_inference/inference/include/InputMetadata.h create mode 100644 mv_inference/inference/include/Metadata.h create mode 100644 mv_inference/inference/include/OutputMetadata.h create mode 100644 mv_inference/inference/include/PostProcess.h create mode 100644 mv_inference/inference/include/PreProcess.h create mode 100644 mv_inference/inference/src/InputMetadata.cpp create mode 100644 mv_inference/inference/src/Metadata.cpp create mode 100644 mv_inference/inference/src/OutputMetadata.cpp create mode 100644 mv_inference/inference/src/PostProcess.cpp create mode 100644 mv_inference/inference/src/PreProcess.cpp diff --git a/meta-template/README.md b/meta-template/README.md new file mode 100644 index 0000000..3d5d69a --- /dev/null +++ b/meta-template/README.md @@ -0,0 +1,68 @@ +Introduce Meta file based Inference. + +A Meta file provides a model description. The Meta file is an important source of knowledge about how to run models while properly applying preprocess to input and postprocess to output. + +The Meta file consists of 1) inputmetadata and 2) outputmetadata. For example, a meta file for classification is shown below. + +``` +{ + "inputmetadata" : + { + "tensor_info" : [ + { + "name" : "input_2", + "shape_type" : 1, + "shape_dims" : [ 1, 224, 224, 3], + "data_type" : 0, + "color_space" : "RGB888" + } + ], + "preprocess" : [ + { + "normalization" : [ + { + "mean" : [127.5, 127.5, 127.5], + "std" : [127.5, 127.5, 127.5] + } + ] + } + ] + }, + "outputmetadata" : + { + "score" : [ + { + "name" : "dense_3/Softmax", + "index" : [-1, 1], + "top_number" : 5, + "threshold" : 0.3, + "score_type" : 0 + } + ] + } +} +``` + +In the classification meta file, the `inputmetadata` includes +`tensor_info` which has knowledge of an input tensor such as +- `name`: name to an input tensor +- `shape_type`: shape type of the input tensor on `NCHW = 0` and `NHWC = 1` +- `shape_dims`: shape dimensions based on the `shape_type` +- `data_type`: data type of the input tensor on `MV_INFERENCE_DATA_FLOAT32 = 0` and `MV_INFERENCE_DATA_UINT8 = 1` +- `color_space`: color space of the input tensor. + +`preprocess` which has information of preprocess such as +- `normalization`: nVal = (fVal - `mean`)/`std`, which nVal is normalized value and fVal is input value + - `mean`: mean values of the input tensor + - `std` : standard deviation values of the input tensor + +The `outputmetadata` includes +`score` which has information of postprocess to get score such as +- `name`: name to an output tensor for score +- `index`: index to get score from the output tensor +- `top_number`: the top number of outputs +- `threshold` : threshold to cut ouputs under the `threshold` value +- `score_type` : score between 0 ~ 1 if it is 0, score which requires sigmoid + +The classification meta file, thus, illustrates that the model has an input which is named of `input_2`, `NHWC` shape type with `[1, 224, 224, 3]` dimensions, `MV_INFERENCE_DATA_FLOAT32` data type, and `RGB888` color space. It requires normalization with mean `[127.5, 127.5, 127.5]` and standard deviation `[127.5, 127.5, 127.5]`. But it doesn't apply quantization. +The meta file illustrates that the model has an ouput which is named of `dense_3/Softmax`. The tensor is 2-dimensional and its' 2nd index corresponds to the score. In addition, the score is just between 0 ~ 1. The score under `threshold` 0.3 should be thrown out and the `top_number` of outputs should be given as results. diff --git a/meta-template/image-classification-001-meta.json b/meta-template/image-classification-001-meta.json new file mode 100644 index 0000000..5cb7bd5 --- /dev/null +++ b/meta-template/image-classification-001-meta.json @@ -0,0 +1,36 @@ +{ + "inputmetadata" : + { + "tensor_info" : [ + { + "name" : "input_2", + "shape_type" : 1, + "shape_dims" : [ 1, 224, 224, 3], + "data_type" : 0, + "color_space" : "RGB888" + } + ], + "preprocess" : [ + { + "normalization" : [ + { + "mean" : [127.5, 127.5, 127.5], + "std" : [127.5, 127.5, 127.5] + } + ] + } + ] + }, + "outputmetadata" : + { + "score" : [ + { + "name" : "dense_3/Softmax", + "index" : [-1, 1], + "top_number" : 5, + "threshold" : 0.3, + "score_type" : 0 + } + ] + } +} diff --git a/mv_inference/inference/CMakeLists.txt b/mv_inference/inference/CMakeLists.txt index c7f4903..05de57c 100644 --- a/mv_inference/inference/CMakeLists.txt +++ b/mv_inference/inference/CMakeLists.txt @@ -11,7 +11,7 @@ INCLUDE_DIRECTORIES("${INC_DIR}") INCLUDE_DIRECTORIES("${PROJECT_SOURCE_DIR}/include") INCLUDE_DIRECTORIES("${PROJECT_SOURCE_DIR}/src") -SET(dependents "inference-engine-interface-common iniparser") +SET(dependents "inference-engine-interface-common iniparser json-glib-1.0") INCLUDE(FindPkgConfig) pkg_check_modules(${fw_name} REQUIRED ${dependents}) FOREACH(flag ${${fw_name}_CFLAGS}) @@ -39,6 +39,6 @@ else() ADD_LIBRARY(${PROJECT_NAME} SHARED ${MV_INFERENCE_INCLUDE_LIST} ${MV_INFERENCE_SOURCE_LIST}) endif() -TARGET_LINK_LIBRARIES(${PROJECT_NAME} ${MV_COMMON_LIB_NAME} ${OpenCV_LIBS} inference-engine-interface-common dlog iniparser) +TARGET_LINK_LIBRARIES(${PROJECT_NAME} ${MV_COMMON_LIB_NAME} ${OpenCV_LIBS} inference-engine-interface-common dlog iniparser json-glib-1.0) INSTALL(TARGETS ${PROJECT_NAME} DESTINATION ${LIB_INSTALL_DIR}) diff --git a/mv_inference/inference/include/Inference.h b/mv_inference/inference/include/Inference.h index 997e476..c86e7e1 100644 --- a/mv_inference/inference/include/Inference.h +++ b/mv_inference/inference/include/Inference.h @@ -27,6 +27,9 @@ #include #include #include +#include "Metadata.h" +#include "PreProcess.h" +#include "PostProcess.h" #define HUMAN_POSE_MAX_LANDMARKS 16 #define HUMAN_POSE_MAX_PARTS 6 @@ -201,6 +204,13 @@ namespace inference void ConfigureThreshold(const double threshold); /** + * @brief Parses the metadata file path + * + * @since_tizen 6.5 + */ + int ParseMetadata(const std::string filePath); + + /** * @brief Bind a backend engine * @details Use this function to bind a backend engine for the inference. * This creates a inference engine common class object, and loads a backend @@ -352,6 +362,10 @@ namespace inference mv_inference_pose_s *mPoseResult; + Metadata mMetadata; + PreProcess mPreProc; + PostProcess mPostProc; + private: void CheckSupportedInferenceBackend(); int ConvertEngineErrorToVisionError(int error); diff --git a/mv_inference/inference/include/InputMetadata.h b/mv_inference/inference/include/InputMetadata.h new file mode 100644 index 0000000..8b722c7 --- /dev/null +++ b/mv_inference/inference/include/InputMetadata.h @@ -0,0 +1,126 @@ +/** + * Copyright (c) 2021 Samsung Electronics Co., Ltd All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __MEDIA_VISION_INPUTMETADATA_H__ +#define __MEDIA_VISION_INPUTMETADATA_H__ + +#include +#include + +#include +#include +#include + +/** + * @file InputMetadata.h + * @brief This file contains the metadata class definition which + * provides metadata of a model. + */ + +namespace mediavision +{ +namespace inference +{ + class Options + { + public: + class Normalization + { + public: + bool use; + std::vector mean; + std::vector std; + + Normalization() : use(false) {} + ~Normalization() = default; + }; + + class Quantization + { + public: + bool use; + std::vector scale; + std::vector zeropoint; + + Quantization() : use(false) {}; + ~Quantization() = default; + }; + + Normalization normalization; + Quantization quantization; + + Options() = default; + ~Options() = default; + }; + + class LayerInfo + { + public: + + std::string name; + std::vector dims; + mv_colorspace_e colorSpace; + mv_inference_data_type_e dataType; + inference_tensor_shape_type_e shapeType; // TODO: define mv_inference_shape_type_e + + LayerInfo() = default; + ~LayerInfo() = default; + + int GetWidth() const; + int GetHeight() const; + int GetChannel() const; + }; + + class InputMetadata + { + public: + bool parsed; + std::map layer; + std::map option; + + /** + * @brief Creates an InputMetadata class instance. + * + * @since_tizen 6.5 + */ + InputMetadata() : parsed(false) {}; + + /** + * @brief Destroys an InputMetadata class instance including + * its all resources. + * + * @since_tizen 6.5 + */ + ~InputMetadata() = default; + + /** + * @brief Parses an InputMetadata + * + * @since_tizen 6.5 + */ + int Parse(JsonObject *root); + + private: + int GetTensorInfo(JsonObject* root); + int GetPreProcess(JsonObject* root); + mv_colorspace_e ConvertTypeToMD(const std::string& type); + + }; + +} /* Inference */ +} /* MediaVision */ + +#endif /* __MEDIA_VISION_INPUTMETADATA_H__ */ diff --git a/mv_inference/inference/include/Metadata.h b/mv_inference/inference/include/Metadata.h new file mode 100644 index 0000000..322fd3e --- /dev/null +++ b/mv_inference/inference/include/Metadata.h @@ -0,0 +1,90 @@ +/** + * Copyright (c) 2021 Samsung Electronics Co., Ltd All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __MEDIA_VISION_METADATA_H__ +#define __MEDIA_VISION_METADATA_H__ + +#include +#include + +#include "mv_common.h" +#include "mv_inference_private.h" +#include +#include "InputMetadata.h" +#include "OutputMetadata.h" +#include + +/** + * @file Metadata.h + * @brief This file contains the metadata class definition which + * provides metadata of a model. + */ + +namespace mediavision +{ +namespace inference +{ + class Metadata + { + public: + /** + * @brief Creates an Metadata class instance. + * + * @since_tizen 6.5 + */ + Metadata() = default; + + /** + * @brief Destroys an Metadata class instance including + * its all resources. + * + * @since_tizen 6.5 + */ + ~Metadata() = default; + + /** + * @brief Initializes an Metadata class + * + * @since_tizen 6.5 + */ + int Init(const std::string& filename); + + /** + * @brief Parses a metafile and set values to InputMetadata + * and OutputMetadata + * + * @since_tizen 6.5 + */ + int Parse(); + + const InputMetadata& GetInputMeta(); + const OutputMetadata& GetOutputMeta(); + + private: + int ParseInputMeta(JsonObject *object); + int ParseOutputMeta(JsonObject *object); + + private: + std::string mMetafile; + + InputMetadata mInputMeta; + OutputMetadata mOutputMeta; + }; + +} /* Inference */ +} /* MediaVision */ + +#endif /* __MEDIA_VISION_METADATA_H__ */ diff --git a/mv_inference/inference/include/OutputMetadata.h b/mv_inference/inference/include/OutputMetadata.h new file mode 100644 index 0000000..b687917 --- /dev/null +++ b/mv_inference/inference/include/OutputMetadata.h @@ -0,0 +1,93 @@ +/** + * Copyright (c) 2021 Samsung Electronics Co., Ltd All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __MEDIA_VISION_OUTPUTMETADATA_H__ +#define __MEDIA_VISION_OUTPUTMETADATA_H__ + +#include +#include +#include + +#include +#include + +/** + * @file OutputMetadata.h + * @brief This file contains the metadata class definition which + * provides metadata of a model. + */ + +namespace mediavision +{ +namespace inference +{ + class DimInfo + { + public: + std::vector index; + }; + + class ScoreInfo + { + public: + std::string name; + DimInfo dimInfo; + double threshold; + int type; + int topNumber; + + public: + ScoreInfo() = default; + ~ScoreInfo() = default; + int GetIndex() const; + }; + + class OutputMetadata + { + public: + bool parsed; + ScoreInfo score; + + /** + * @brief Creates an OutputMetadata class instance. + * + * @since_tizen 6.5 + */ + OutputMetadata() : parsed(false) {}; + + /** + * @brief Destroys an OutputMetadata class instance including + * its all resources. + * + * @since_tizen 6.5 + */ + ~OutputMetadata() = default; + + /** @brief Parses an OutputMetadata + * + * @since_tizen 6.5 + */ + int Parse(JsonObject *root); + + private: + int GetScore(JsonObject *root); + + }; + +} /* Inference */ +} /* MediaVision */ + +#endif /* __MEDIA_VISION_OUTPUTMETADATA_H__ */ diff --git a/mv_inference/inference/include/PostProcess.h b/mv_inference/inference/include/PostProcess.h new file mode 100644 index 0000000..9f77a9f --- /dev/null +++ b/mv_inference/inference/include/PostProcess.h @@ -0,0 +1,82 @@ +/** + * Copyright (c) 2021 Samsung Electronics Co., Ltd All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __MEDIA_VISION_POSTPROCESS_H__ +#define __MEDIA_VISION_POSTPROCESS_H__ + +#include +#include +#include + +#include "mv_common.h" +#include "OutputMetadata.h" + +#include +#include + + +/** + * @file PostProcess.h + * @brief This file contains the PostProcess class definition which + * provides PostProcess after running inference. + */ + +namespace mediavision +{ +namespace inference +{ + class PostProcess + { + public: + /** + * @brief Creates an PostProcess class instance. + * + * @since_tizen 6.5 + */ + PostProcess() : mMaxScoreSize(3) {}; + + /** + * @brief Destroys an PostProcess class instance including + * its all resources. + * + * @since_tizen 6.5 + */ + ~PostProcess() = default; + + /** + * @brief Calculates sigmoid. + * + * @since_tizen 6.5 + */ + static float sigmoid(float value); + + int ScoreClear(int size); + int ScorePush(float value, int index); + int ScorePop(std::vector>& top); + + private: + std::priority_queue, + std::vector>, + std::greater>> mScore; + private: + int mMaxScoreSize; + + }; + +} /* Inference */ +} /* MediaVision */ + +#endif /* __MEDIA_VISION_POSTPROCESS_H__ */ diff --git a/mv_inference/inference/include/PreProcess.h b/mv_inference/inference/include/PreProcess.h new file mode 100644 index 0000000..f4c002b --- /dev/null +++ b/mv_inference/inference/include/PreProcess.h @@ -0,0 +1,77 @@ +/** + * Copyright (c) 2021 Samsung Electronics Co., Ltd All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __MEDIA_VISION_PREPROCESS_H__ +#define __MEDIA_VISION_PREPORCESS_H__ + +#include +#include + +#include "mv_common.h" +#include "InputMetadata.h" + +#include +#include + + +/** + * @file PreProcess.h + * @brief This file contains the PreProcess class definition which + * provides PreProcess before running inference. + */ + +namespace mediavision +{ +namespace inference +{ + class PreProcess + { + public: + /** + * @brief Creates an PreProcess class instance. + * + * @since_tizen 6.5 + */ + PreProcess() = default; + + /** + * @brief Destroys an PreProcess class instance including + * its all resources. + * + * @since_tizen 6.5 + */ + ~PreProcess() = default; + + /** + * @brief Runs PreProcess with layerInfo and options + * + * @since_tizen 6.5 + */ + int Run(cv::Mat& source, const int colorSpace, const int dataType, const LayerInfo& layerInfo, + const Options& options, void* buffer); + + private: + int Resize(cv::Mat& source, cv::Mat& dest, cv::Size size); + int ColorConvert(cv::Mat& source, cv::Mat& dest, int sType, int dType); + int Normalize(cv::Mat& source, cv::Mat& dest, + const std::vector& mean, const std::vector& std); + + }; + +} /* Inference */ +} /* MediaVision */ + +#endif /* __MEDIA_VISION_PREPROCESS_H__ */ diff --git a/mv_inference/inference/src/Inference.cpp b/mv_inference/inference/src/Inference.cpp index fa1c5c5..48acde8 100644 --- a/mv_inference/inference/src/Inference.cpp +++ b/mv_inference/inference/src/Inference.cpp @@ -79,7 +79,10 @@ namespace inference mInputBuffer(cv::Mat()), engine_config(), mBackend(), - mPoseResult(NULL) + mPoseResult(NULL), + mMetadata(), + mPreProc(), + mPostProc() { LOGI("ENTER"); @@ -375,35 +378,77 @@ namespace inference { LOGI("ENTER"); + // FIXME: mConfig should be removed mConfig.mTensorInfo = { width, height, dim, ch }; mConfig.mStdValue = stdValue; mConfig.mMeanValue = meanValue; mConfig.mDataType = static_cast(dataType); mConfig.mInputLayerNames = names; + const InputMetadata& inputMeta = mMetadata.GetInputMeta(); + if (inputMeta.parsed) { + LOGI("use input meta"); + auto& layerInfo = inputMeta.layer.begin()->second; + auto& option = inputMeta.option.begin()->second; + if (layerInfo.shapeType == INFERENCE_TENSOR_SHAPE_NCHW) { // NCHW + mConfig.mTensorInfo.ch = layerInfo.dims[1]; + mConfig.mTensorInfo.dim = layerInfo.dims[0]; + mConfig.mTensorInfo.width = layerInfo.dims[3]; + mConfig.mTensorInfo.height = layerInfo.dims[2]; + } else if (layerInfo.shapeType == INFERENCE_TENSOR_SHAPE_NHWC) {// NHWC + mConfig.mTensorInfo.ch = layerInfo.dims[3]; + mConfig.mTensorInfo.dim = layerInfo.dims[0]; + mConfig.mTensorInfo.width = layerInfo.dims[2]; + mConfig.mTensorInfo.height = layerInfo.dims[1]; + } else { + LOGE("Invalid shape type[%d]", layerInfo.shapeType); + } + + if (option.normalization.use) { + mConfig.mMeanValue = option.normalization.mean[0]; + mConfig.mStdValue = option.normalization.std[0]; + } + + mConfig.mDataType = layerInfo.dataType; + mConfig.mInputLayerNames.clear(); + for (auto& layer : inputMeta.layer) { + mConfig.mInputLayerNames.push_back(layer.first); + } + } + inference_engine_layer_property property; // In case of that a inference plugin deosn't support to get properties, // the tensor info given by a user will be used. // If the plugin supports that, the given info will be ignored. - inference_engine_tensor_info tensor_info; - - tensor_info.data_type = ConvertToIE(dataType); - - // In case of OpenCV, only supports NCHW - tensor_info.shape_type = INFERENCE_TENSOR_SHAPE_NCHW; - // modify to handle multiple tensor infos - tensor_info.shape.push_back(mConfig.mTensorInfo.dim); - tensor_info.shape.push_back(mConfig.mTensorInfo.ch); - tensor_info.shape.push_back(mConfig.mTensorInfo.height); - tensor_info.shape.push_back(mConfig.mTensorInfo.width); - tensor_info.size = 1; - for (auto& dim : tensor_info.shape) { - tensor_info.size *= dim; - } + for (auto& layer : inputMeta.layer) { + inference_engine_tensor_info tensor_info; + if (inputMeta.parsed) { + tensor_info.data_type = ConvertToIE(layer.second.dataType); - for (auto& layerName : mConfig.mInputLayerNames) { - property.layers.insert(std::make_pair(layerName, tensor_info)); + tensor_info.shape_type = layer.second.shapeType; + tensor_info.size = 1; + for (auto& dim : layer.second.dims) { + tensor_info.shape.push_back(dim); + tensor_info.size *= dim; + } + } else { + tensor_info.data_type = ConvertToIE(dataType); + + // In case of OpenCV, only supports NCHW + tensor_info.shape_type = INFERENCE_TENSOR_SHAPE_NCHW; + // modify to handle multiple tensor infos + tensor_info.shape.push_back(mConfig.mTensorInfo.dim); + tensor_info.shape.push_back(mConfig.mTensorInfo.ch); + tensor_info.shape.push_back(mConfig.mTensorInfo.height); + tensor_info.shape.push_back(mConfig.mTensorInfo.width); + + tensor_info.size = 1; + for (auto& dim : tensor_info.shape) { + tensor_info.size *= dim; + } + } + property.layers.insert(std::make_pair(layer.first, tensor_info)); } int ret = mBackend->SetInputLayerProperty(property); @@ -542,6 +587,26 @@ namespace inference MV_INFERENCE_CONFIDENCE_THRESHOLD_MIN); } + int Inference::ParseMetadata(const std::string filePath) + { + LOGI("ENTER"); + int ret = mMetadata.Init(filePath); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to init metadata[%d]", ret); + return ret; + } + + ret = mMetadata.Parse(); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to parse metadata[%d]", ret); + return ret; + } + + LOGI("LEAVE"); + + return MEDIA_VISION_ERROR_NONE; + } + void Inference::CleanupTensorBuffers(void) { LOGI("ENTER"); @@ -1066,19 +1131,33 @@ namespace inference return MEDIA_VISION_ERROR_INVALID_PARAMETER; } - for (auto& buffer : mInputTensorBuffers) { - inference_engine_tensor_buffer& tensor_buffer = buffer.second; + const InputMetadata& inputMeta = mMetadata.GetInputMeta(); + if (inputMeta.parsed) { + for (auto& buffer : mInputTensorBuffers) { + inference_engine_tensor_buffer& tensor_buffer = buffer.second; + const LayerInfo& layerInfo = inputMeta.layer.at(buffer.first); + const Options& opt = inputMeta.option.at(buffer.first); - int data_type = ConvertToCv(tensor_buffer.data_type); + int data_type = ConvertToCv(tensor_buffer.data_type); - // Convert color space of input tensor data and then normalize it. - ret = Preprocess(cvSource, - cv::Mat(mInputSize.height, mInputSize.width, - data_type, tensor_buffer.buffer), - data_type); - if (ret != MEDIA_VISION_ERROR_NONE) { - LOGE("Fail to preprocess input tensor data."); - return ret; + ret = mPreProc.Run(cvSource, colorspace, data_type, layerInfo, opt, tensor_buffer.buffer); + } + } else { + for (auto& buffer : mInputTensorBuffers) { + inference_engine_tensor_buffer& tensor_buffer = buffer.second; + + int data_type = ConvertToCv(tensor_buffer.data_type); + + // Convert color space of input tensor data and then normalize it. + + ret = Preprocess(cvSource, + cv::Mat(mInputSize.height, mInputSize.width, + data_type, tensor_buffer.buffer), + data_type); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to preprocess input tensor data."); + return ret; + } } } @@ -1096,71 +1175,120 @@ namespace inference int Inference::GetClassficationResults( ImageClassificationResults *classificationResults) { - tensor_t outputData; + const OutputMetadata& outputMeta = mMetadata.GetOutputMeta(); + if (outputMeta.parsed) { + std::vector> topScore; + float value = 0.0f; + auto& info = outputMeta.score; + + int index = info.GetIndex(); + int classes = mOutputLayerProperty.layers[info.name].shape[index]; + float *output = static_cast(mOutputTensorBuffers[info.name].buffer); + if (output == NULL) { + LOGE("output buffe is NULL"); + return MEDIA_VISION_ERROR_INVALID_OPERATION; + } - // Get inference result and contain it to outputData. - int ret = FillOutputResult(outputData); - if (ret != MEDIA_VISION_ERROR_NONE) { - LOGE("Fail to get output result."); - return ret; - } + mPostProc.ScoreClear(info.topNumber); + for (int cId = 0; cId < classes; ++cId) { + value = output[cId]; - // Will contain top N results in ascending order. - std::vector > top_results; - std::priority_queue, - std::vector >, - std::greater > > - top_result_pq; - float value = 0.0f; + if (info.type == 1) { + value = PostProcess::sigmoid(value); + } - std::vector > inferDimInfo(outputData.dimInfo); - std::vector inferResults(outputData.data.begin(), - outputData.data.end()); + if (value < info.threshold) + continue; - int count = inferDimInfo[0][1]; - LOGI("count: %d", count); + LOGI("id[%d]: %.3f", cId, value); + mPostProc.ScorePush(value, cId); + } + mPostProc.ScorePop(topScore); + + ImageClassificationResults results; + results.number_of_classes = 0; + for (auto& value : topScore) { + LOGI("score: %.3f, threshold: %.3f", value.first, info.threshold); + LOGI("idx:%d", value.second); + LOGI("classProb: %.3f", value.first); + + results.indices.push_back(value.second); + results.confidences.push_back(value.first); + results.names.push_back(mUserListName[value.second]); + results.number_of_classes++; + } - float *prediction = reinterpret_cast(inferResults[0]); - for (int i = 0; i < count; ++i) { - value = prediction[i]; + *classificationResults = results; + LOGE("Inference: GetClassificationResults: %d\n", + results.number_of_classes); - // Only add it if it beats the threshold and has a chance at being in - // the top N. - top_result_pq.push(std::pair(value, i)); + } else { + tensor_t outputData; - // If at capacity, kick the smallest value out. - if (top_result_pq.size() > mOutputNumbers) { - top_result_pq.pop(); + // Get inference result and contain it to outputData. + int ret = FillOutputResult(outputData); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to get output result."); + return ret; } - } - // Copy to output vector and reverse into descending order. - while (!top_result_pq.empty()) { - top_results.push_back(top_result_pq.top()); - top_result_pq.pop(); - } - std::reverse(top_results.begin(), top_results.end()); + // Will contain top N results in ascending order. + std::vector > top_results; + std::priority_queue, + std::vector >, + std::greater > > + top_result_pq; + float value = 0.0f; + + std::vector > inferDimInfo(outputData.dimInfo); + std::vector inferResults(outputData.data.begin(), + outputData.data.end()); + + int count = inferDimInfo[0][1]; + LOGI("count: %d", count); + float *prediction = reinterpret_cast(inferResults[0]); + for (int i = 0; i < count; ++i) { + value = prediction[i]; + + // Only add it if it beats the threshold and has a chance at being in + // the top N. + top_result_pq.push(std::pair(value, i)); + + // If at capacity, kick the smallest value out. + if (top_result_pq.size() > mOutputNumbers) { + top_result_pq.pop(); + } + } - int classIdx = -1; - ImageClassificationResults results; - results.number_of_classes = 0; - for (int idx = 0; idx < top_results.size(); ++idx) { - if (top_results[idx].first < mThreshold) - continue; - LOGI("idx:%d", idx); - LOGI("classIdx: %d", top_results[idx].second); - LOGI("classProb: %f", top_results[idx].first); + // Copy to output vector and reverse into descending order. + while (!top_result_pq.empty()) { + top_results.push_back(top_result_pq.top()); + top_result_pq.pop(); + } + std::reverse(top_results.begin(), top_results.end()); + + int classIdx = -1; + ImageClassificationResults results; + results.number_of_classes = 0; + for (int idx = 0; idx < top_results.size(); ++idx) { + if (top_results[idx].first < mThreshold) + continue; + LOGI("idx:%d", idx); + LOGI("classIdx: %d", top_results[idx].second); + LOGI("classProb: %f", top_results[idx].first); + + classIdx = top_results[idx].second; + results.indices.push_back(classIdx); + results.confidences.push_back(top_results[idx].first); + results.names.push_back(mUserListName[classIdx]); + results.number_of_classes++; + } - classIdx = top_results[idx].second; - results.indices.push_back(classIdx); - results.confidences.push_back(top_results[idx].first); - results.names.push_back(mUserListName[classIdx]); - results.number_of_classes++; + *classificationResults = results; + LOGE("Inference: GetClassificationResults: %d\n", + results.number_of_classes); } - *classificationResults = results; - LOGE("Inference: GetClassificationResults: %d\n", - results.number_of_classes); return MEDIA_VISION_ERROR_NONE; } diff --git a/mv_inference/inference/src/InputMetadata.cpp b/mv_inference/inference/src/InputMetadata.cpp new file mode 100644 index 0000000..4084328 --- /dev/null +++ b/mv_inference/inference/src/InputMetadata.cpp @@ -0,0 +1,234 @@ +/** + * Copyright (c) 2021 Samsung Electronics Co., Ltd All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "mv_private.h" + +#include +#include +#include +#include +#include +#include "InputMetadata.h" +#include + +namespace mediavision +{ +namespace inference +{ + mv_colorspace_e InputMetadata::ConvertTypeToMD(const std::string& type) + { + mv_colorspace_e colorspace = MEDIA_VISION_COLORSPACE_INVALID; + if (type.empty()) { + LOGE("Invalid type[null]"); + return colorspace; + } + + if (type.compare("RGB888") == 0) { + colorspace = MEDIA_VISION_COLORSPACE_RGB888; + } else if (type.compare("Y800") == 0) { + colorspace = MEDIA_VISION_COLORSPACE_Y800; + } else { + LOGE("Not supported channel type"); + } + + return colorspace; + } + + int InputMetadata::GetTensorInfo(JsonObject *root) + { + LOGI("ENTER"); + + if (json_object_has_member(root, "tensor_info") == false) { + LOGE("No tensor_info inputmetadata"); + return MEDIA_VISION_ERROR_INVALID_OPERATION; + } + + // tensor_info + JsonArray * rootArray = json_object_get_array_member(root, "tensor_info"); + unsigned int elements = json_array_get_length(rootArray); + + std::map().swap(layer); + // TODO: handling error + // FIXEME: LayerInfo.set()?? + for (unsigned int elem = 0; elem < elements; ++elem) { + LayerInfo info; + JsonNode *pNode = json_array_get_element(rootArray, elem); + JsonObject *pObject = json_node_get_object(pNode); + + info.name = + static_cast(json_object_get_string_member(pObject,"name")); + LOGI("layer: %s", info.name.c_str()); + + info.shapeType = + static_cast(json_object_get_int_member(pObject, "shape_type")); + LOGI("shape type: %d:%s", info.shapeType, info.shapeType == 0 ? "NCHW" : "NHWC"); + + info.dataType = + static_cast(json_object_get_int_member(pObject, "data_type")); + LOGI("data type : %d:%s", info.dataType, info.dataType == 0 ? "FLOAT32" : "UINT8"); + + const char *colorSpace = static_cast(json_object_get_string_member(pObject,"color_space")); + info.colorSpace = ConvertTypeToMD(std::string(colorSpace)); + LOGI("color space : %d:%s", info.colorSpace, info.colorSpace == MEDIA_VISION_COLORSPACE_RGB888 ? "RGB888" : ""); + + // dims + JsonArray * array = json_object_get_array_member(pObject, "shape_dims"); + unsigned int elements2 = json_array_get_length(array); + LOGI("shape dim: size[%u]", elements2); + for (unsigned int elem2 = 0; elem2 < elements2; ++elem2) { + auto dim = static_cast(json_array_get_int_element(array, elem2)); + info.dims.push_back(dim); + LOGI("%d", dim); + } + + layer.insert(std::make_pair(info.name, info)); + } + + LOGI("LEAVE"); + + return MEDIA_VISION_ERROR_NONE; + } + + int InputMetadata::GetPreProcess(JsonObject *root) + { + LOGI("ENTER"); + + if (json_object_has_member(root, "preprocess") == false) { + LOGE("No preprocess inputmetadata"); + return MEDIA_VISION_ERROR_INVALID_OPERATION; + } + + // preprocess + JsonArray * rootArray = json_object_get_array_member(root, "preprocess"); + unsigned int elements = json_array_get_length(rootArray); + + std::map().swap(option); + // TODO: iterLayer should be the same with elements. + auto iterLayer = layer.begin(); + // TODO: handling error + for (unsigned int elem = 0; elem < elements; ++elem, ++iterLayer) { + Options opt; + JsonNode *pNode = json_array_get_element(rootArray, elem); + JsonObject *pObject = json_node_get_object(pNode); + + // normalization + if (json_object_has_member(pObject, "normalization")) { + JsonArray * array = json_object_get_array_member(pObject, "normalization"); + JsonNode * node = json_array_get_element(array, 0); + JsonObject * object = json_node_get_object(node); + + opt.normalization.use = true; + LOGI("use normalization"); + + JsonArray * arrayMean = json_object_get_array_member(object, "mean"); + JsonArray * arrayStd = json_object_get_array_member(object, "std"); + unsigned int elemMean = json_array_get_length(arrayMean); + unsigned int elemStd = json_array_get_length(arrayStd); + if (elemMean != elemStd) { + LOGE("Invalid mean and std values"); + return MEDIA_VISION_ERROR_INVALID_OPERATION; + } + + for (unsigned int elem = 0; elem < elemMean; ++elem) { + auto m = static_cast(json_array_get_double_element(arrayMean, elem)); + auto s = static_cast(json_array_get_double_element(arrayStd, elem)); + opt.normalization.mean.push_back(m); + opt.normalization.std.push_back(s); + LOGI("%u: mean[%3.2f], std[%3.2f]", elem, m, s); + } + } + + if (json_object_has_member(pObject, "quantization")) { + JsonArray * array = json_object_get_array_member(pObject, "quantization"); + JsonNode * node = json_array_get_element(array, 0); + JsonObject * object = json_node_get_object(node); + + opt.quantization.use = true; + LOGI("use quantization"); + + JsonArray * arrayScale = json_object_get_array_member(object, "scale"); + JsonArray * arrayZero = json_object_get_array_member(object, "zeropoint"); + unsigned int elemScale = json_array_get_length(arrayScale); + unsigned int elemZero= json_array_get_length(arrayZero); + if (elemScale != elemZero) { + LOGE("Invalid scale and zero values"); + return MEDIA_VISION_ERROR_INVALID_OPERATION; + } + + for (unsigned int elem = 0; elem < elemScale; ++elem) { + auto s = static_cast(json_array_get_double_element(arrayScale, elem)); + auto z = static_cast(json_array_get_double_element(arrayZero, elem)); + opt.quantization.scale.push_back(s); + opt.quantization.zeropoint.push_back(z); + LOGI("%u: scale[%3.2f], zeropoint[%3.2f]", elem, s, z); + } + } + option.insert(std::make_pair(iterLayer->first, opt)); + } + + LOGI("LEAVE"); + + return MEDIA_VISION_ERROR_NONE; + } + + int InputMetadata::Parse(JsonObject *root) + { + LOGI("ENTER"); + + int ret = GetTensorInfo(root); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to GetTensorInfo[%d]", ret); + return ret; + } + + ret = GetPreProcess(root); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to GetPreProcess[%d]", ret); + return ret; + } + + parsed = true; + LOGI("LEAVE"); + + return MEDIA_VISION_ERROR_NONE; + } + + int LayerInfo::GetWidth() const { + if (shapeType == INFERENCE_TENSOR_SHAPE_NCHW) { + return dims[3]; + } else { // INFERENCE_TENSOR_SHAPE_NWHC + return dims[1]; + } + } + + int LayerInfo::GetHeight() const { + if (shapeType == INFERENCE_TENSOR_SHAPE_NCHW) { + return dims[2]; + } else { // INFERENCE_TENSOR_SHAPE_NWHC + return dims[2]; + } + } + + int LayerInfo::GetChannel() const { + if (shapeType == INFERENCE_TENSOR_SHAPE_NCHW) { + return dims[1]; + } else { // INFERENCE_TENSOR_SHAPE_NWHC + return dims[3]; + } + } + +} /* Inference */ +} /* MediaVision */ diff --git a/mv_inference/inference/src/Metadata.cpp b/mv_inference/inference/src/Metadata.cpp new file mode 100644 index 0000000..bb42557 --- /dev/null +++ b/mv_inference/inference/src/Metadata.cpp @@ -0,0 +1,121 @@ +/** + * Copyright (c) 2021 Samsung Electronics Co., Ltd All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "mv_private.h" +#include "Metadata.h" + +#include + +#include +#include +#include +#include +#include + +namespace mediavision +{ +namespace inference +{ + int Metadata::Init(const std::string& filename) + { + LOGI("ENTER"); + + if (access(filename.c_str(), F_OK | R_OK)) { + LOGE("meta file is in [%s] ", filename.c_str()); + return MEDIA_VISION_ERROR_INVALID_PATH; + } + + mMetafile = filename; + + LOGI("LEAVE"); + + return MEDIA_VISION_ERROR_NONE; + } + + int Metadata::Parse() + { + LOGI("ENTER"); + + if (mMetafile.empty()) { + LOGE("meta file is empty"); + return MEDIA_VISION_ERROR_INVALID_OPERATION; + } + + GError *error = NULL; + JsonNode *node = NULL; + JsonObject *object = NULL; + int ret = MEDIA_VISION_ERROR_NONE; + + JsonParser *parser = json_parser_new(); + if (parser == NULL) { + LOGE("Fail to create json parser"); + return MEDIA_VISION_ERROR_OUT_OF_MEMORY; + } + + gboolean jsonRet = json_parser_load_from_file(parser, mMetafile.c_str(), &error); + if (!jsonRet) { + LOGE("Unable to parser file %s by %s", mMetafile.c_str(), + error == NULL ? "Unknown" : error->message); + g_error_free(error); + ret = MEDIA_VISION_ERROR_INVALID_DATA; + goto _ERROR_; + } + + node = json_parser_get_root(parser); + if (JSON_NODE_TYPE(node) != JSON_NODE_OBJECT) { + LOGE("Fail to json_parser_get_root. It's an incorrect markup"); + ret = MEDIA_VISION_ERROR_INVALID_DATA; + goto _ERROR_; + } + + object = json_node_get_object(node); + if (!object) { + LOGE("Fail to json_node_get_object. object is NULL"); + ret = MEDIA_VISION_ERROR_INVALID_DATA; + goto _ERROR_; + } + + ret = mInputMeta.Parse(json_object_get_object_member(object, "inputmetadata")); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to parse input Meta[%d]",ret); + goto _ERROR_; + } + + ret = mOutputMeta.Parse(json_object_get_object_member(object, "outputmetadata")); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to parse output meta[%d]",ret); + goto _ERROR_; + } + + _ERROR_ : + g_object_unref(parser); + parser = NULL; + LOGI("LEAVE"); + + return ret; + } + + const InputMetadata& Metadata::GetInputMeta() + { + return mInputMeta; + } + + const OutputMetadata& Metadata::GetOutputMeta() + { + return mOutputMeta; + } +} /* Inference */ +} /* MediaVision */ diff --git a/mv_inference/inference/src/OutputMetadata.cpp b/mv_inference/inference/src/OutputMetadata.cpp new file mode 100644 index 0000000..36fe1e7 --- /dev/null +++ b/mv_inference/inference/src/OutputMetadata.cpp @@ -0,0 +1,112 @@ +/** + * Copyright (c) 2021 Samsung Electronics Co., Ltd All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "mv_private.h" + +#include +#include +#include +#include +#include +#include "OutputMetadata.h" + +namespace mediavision +{ +namespace inference +{ + int OutputMetadata::GetScore(JsonObject *root) + { + LOGI("ENTER"); + + if (json_object_has_member(root, "score") == false) { + LOGI("No score outputmetadata"); + LOGI("LEAVE"); + return MEDIA_VISION_ERROR_NONE; + } + + // score + JsonArray * rootArray = json_object_get_array_member(root, "score"); + unsigned int elements = json_array_get_length(rootArray); + + // TODO: handling error + // FIXEME: ScoreInfo.set()?? + for (unsigned int elem = 0; elem < elements; ++elem) { + + JsonNode *pNode = json_array_get_element(rootArray, elem); + JsonObject *pObject = json_node_get_object(pNode); + + score.name = + static_cast(json_object_get_string_member(pObject,"name")); + LOGI("layer: %s", score.name.c_str()); + + JsonArray * array = json_object_get_array_member(pObject, "index"); + unsigned int elements2 = json_array_get_length(array); + LOGI("range dim: size[%u]", elements2); + for (unsigned int elem2 = 0; elem2 < elements2; ++elem2) { + auto index = static_cast(json_array_get_int_element(array, elem2)); + score.dimInfo.index.push_back(index); + LOGI("%d", index); + } + + score.topNumber = static_cast(json_object_get_int_member(pObject, "top_number")); + LOGI("top number: %d", score.topNumber); + + score.threshold = static_cast(json_object_get_double_member(pObject, "threshold")); + LOGI("threshold: %1.3f", score.threshold); + + score.type = static_cast(json_object_get_int_member(pObject, "score_type")); + LOGI("score type: %d", score.type); + } + + LOGI("LEAVE"); + return MEDIA_VISION_ERROR_NONE; + } + + int OutputMetadata::Parse(JsonObject *root) + { + LOGI("ENTER"); + + int ret = GetScore(root); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to GetScore[%d]", ret); + return ret; + } + + parsed = true; + + LOGI("LEAVE"); + + return MEDIA_VISION_ERROR_NONE; + } + + int ScoreInfo::GetIndex() const + { + LOGI("ENTER"); + + int ret = 0; + for (auto& index : dimInfo.index) { + if (index > 0) { + break; + } + ret++; + } + + LOGI("LEAVE"); + + return ret; + } +} /* Inference */ +} /* MediaVision */ diff --git a/mv_inference/inference/src/PostProcess.cpp b/mv_inference/inference/src/PostProcess.cpp new file mode 100644 index 0000000..a135097 --- /dev/null +++ b/mv_inference/inference/src/PostProcess.cpp @@ -0,0 +1,84 @@ +/** + * Copyright (c) 2021 Samsung Electronics Co., Ltd All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "mv_private.h" + +#include +#include +#include +#include +#include "PostProcess.h" + +namespace mediavision +{ +namespace inference +{ + float PostProcess::sigmoid(float value) + { + LOGI("ENTER"); + + LOGI("LEAVE"); + return 1.0/(1.0+ exp(-value)); + + } + + int PostProcess::ScoreClear(int size) + { + LOGI("ENTER"); + + std::priority_queue, + std::vector>, + std::greater>>().swap(mScore); + mMaxScoreSize = size; + + LOGI("LEAVE"); + + return MEDIA_VISION_ERROR_NONE; + } + + int PostProcess::ScorePush(float value, int index) + { + LOGI("ENTER"); + + mScore.push(std::pair(value, index)); + if (mScore.size() > mMaxScoreSize) { + mScore.pop(); + } + + LOGI("LEAVE"); + + return MEDIA_VISION_ERROR_NONE; + } + + int PostProcess::ScorePop(std::vector>& top) + { + LOGI("ENTER"); + + top.clear(); + while (mScore.empty() == false) { + top.push_back(mScore.top()); + LOGI("%.3f", mScore.top().first); + mScore.pop(); + } + + std::reverse(top.begin(), top.end()); + + LOGI("LEAVE"); + + return MEDIA_VISION_ERROR_NONE; + } +} /* Inference */ +} /* MediaVision */ diff --git a/mv_inference/inference/src/PreProcess.cpp b/mv_inference/inference/src/PreProcess.cpp new file mode 100644 index 0000000..fa65ced --- /dev/null +++ b/mv_inference/inference/src/PreProcess.cpp @@ -0,0 +1,130 @@ +/** + * Copyright (c) 2021 Samsung Electronics Co., Ltd All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "mv_private.h" + +#include +#include +#include +#include +#include +#include "PreProcess.h" + +const int colorConvertTable[][12] = { + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, + { 0, -1, 0, 0, 0, 0, 0, 0, 0, cv::COLOR_GRAY2BGR565, cv::COLOR_GRAY2RGB, cv::COLOR_GRAY2RGBA }, + { 0, cv::COLOR_YUV2GRAY_I420, -1, 0, 0, 0, 0, 0, 0, 0, cv::COLOR_RGBA2GRAY, cv::COLOR_YUV2RGBA_I420 }, + { 0, cv::COLOR_YUV2GRAY_NV12, 0, -1, 0, 0, 0, 0, 0, 0, cv::COLOR_YUV2RGB_NV12, cv::COLOR_YUV2RGBA_NV12 }, + { 0, cv::COLOR_YUV2GRAY_YV12, 0, 0, -1, 0, 0, 0, 0, 0, cv::COLOR_YUV2RGB_YV12, cv::COLOR_YUV2RGBA_YV12 }, + { 0, cv::COLOR_YUV2GRAY_NV21, 0, 0, 0, -1, 0, 0, 0, 0, cv::COLOR_YUV2RGB_NV21, cv::COLOR_YUV2RGBA_NV21 }, + { 0, cv::COLOR_YUV2GRAY_YUYV, 0, 0, 0, 0, -1, 0, 0, 0, cv::COLOR_YUV2RGB_YUYV, cv::COLOR_YUV2RGBA_YUYV }, + { 0, cv::COLOR_YUV2GRAY_UYVY, 0, 0, 0, 0, 0, -1, 0, 0, cv::COLOR_YUV2BGR_UYVY, cv::COLOR_YUV2BGRA_UYVY }, + { 0, cv::COLOR_YUV2GRAY_Y422, 0, 0, 0, 0, 0, 0, -1, 0, cv::COLOR_YUV2RGB_Y422, cv::COLOR_YUV2RGBA_Y422 }, + { 0, cv::COLOR_BGR5652GRAY, 0, 0, 0, 0, 0, 0, 0, -1, cv::COLOR_BGR5652BGR, cv::COLOR_BGR5652BGRA }, + { 0, cv::COLOR_RGB2GRAY, 0, 0, 0, 0, 0, 0, 0, 0, -1, cv::COLOR_RGB2RGBA }, + { 0, cv::COLOR_RGBA2GRAY, 0, 0, 0, 0, 0, 0, 0, cv::COLOR_BGRA2BGR565, cv::COLOR_RGBA2RGB, -1} +}; + +namespace mediavision +{ +namespace inference +{ + int PreProcess::Resize(cv::Mat& source, cv::Mat& dest, cv::Size size) + { + LOGI("ENTER"); + + try { + cv::resize(source, dest, size); + } catch (cv::Exception& e) { + LOGE("Fail to resize with msg: %s", e.what()); + return MEDIA_VISION_ERROR_INVALID_OPERATION; + } + + LOGI("LEAVE"); + + return MEDIA_VISION_ERROR_NONE; + } + + + int PreProcess::ColorConvert(cv::Mat& source, cv::Mat& dest, int sType, int dType) + { + LOGI("ENTER"); + + auto conversionColor = static_cast(colorConvertTable[sType][dType]); + if (conversionColor == -1) {/* Don't need conversion */ + dest = source; + } else if (conversionColor > 0) { + /* Class for representation the given image as cv::Mat before conversion */ + cv::cvtColor(source, dest, conversionColor); + } else { + LOGE("Fail to ColorConvert"); + return MEDIA_VISION_ERROR_INVALID_OPERATION; + } + + LOGI("LEAVE"); + + return MEDIA_VISION_ERROR_NONE; + } + + int PreProcess::Normalize(cv::Mat& source, cv::Mat& dest, + const std::vector& mean, const std::vector& std) + { + LOGI("ENTER"); + try { + cv::subtract(source, cv::Scalar(mean[0], mean[1], mean[2]), dest); + source = dest; + cv::divide(source, cv::Scalar(std[0], std[1], std[2]), dest); + } catch (cv::Exception& e) { + LOGE("Fail to substract/divide with msg: %s", e.what()); + return MEDIA_VISION_ERROR_INVALID_OPERATION; + } + + LOGI("LEAVE"); + + return MEDIA_VISION_ERROR_NONE; + } + + int PreProcess::Run(cv::Mat& source, const int colorSpace, + const int dataType, const LayerInfo& layerInfo, + const Options& options, void* buffer) + { + LOGI("ENTER"); + + // dest is a wrapper of the buffer + cv::Mat dest(cv::Size(layerInfo.GetWidth(), layerInfo.GetHeight()), + dataType, buffer); + + cv::Mat cvSource, cvDest; + // cvSource has new allocation with dest.size() + Resize(source, cvSource, dest.size()); + + // cvDest has new allocation if it's colorSpace is not RGB888 + // cvDest share the data with cvSource it's colorSpace is RGB888 + ColorConvert(cvSource, cvDest, colorSpace, layerInfo.colorSpace); + + cvDest.convertTo(dest, dest.type()); + + if (options.normalization.use) { + Normalize(dest, dest, options.normalization.mean, options.normalization.std); + } + + LOGI("LEAVE"); + + return MEDIA_VISION_ERROR_NONE; + } + +} /* Inference */ +} /* MediaVision */ diff --git a/mv_inference/inference/src/mv_inference_open.cpp b/mv_inference/inference/src/mv_inference_open.cpp index c2011b4..1c4eb7e 100644 --- a/mv_inference/inference/src/mv_inference_open.cpp +++ b/mv_inference/inference/src/mv_inference_open.cpp @@ -127,6 +127,9 @@ int mv_inference_configure_model_open(mv_inference_h infer, int backendType = 0; size_t userFileLength = 0; + // TODO: a temporal variable, later, it should be removed. + std::string metaFilePath; + ret = mv_engine_config_get_string_attribute( engine_config, MV_INFERENCE_MODEL_CONFIGURATION_FILE_PATH, &modelConfigFilePath); @@ -195,6 +198,16 @@ int mv_inference_configure_model_open(mv_inference_h infer, pInfer->ConfigureModelFiles(std::string(modelConfigFilePath), std::string(modelWeightFilePath), std::string(modelUserFilePath)); + /* FIXME + * temporal code lines to get a metafile, which has the same name + * with modelsWeightFilePath except the extension. + * Later, it should get a metafilename and the below lines should be + * removed. + */ + metaFilePath = std::string(modelWeightFilePath).substr(0, + std::string(modelWeightFilePath).find_last_of('.')) + ".json"; + LOGI("metaFilePath: %s", metaFilePath.c_str()); + pInfer->ParseMetadata(metaFilePath); _ERROR_: if (modelConfigFilePath) diff --git a/packaging/capi-media-vision.spec b/packaging/capi-media-vision.spec index 03b9a3f..1d72b2d 100644 --- a/packaging/capi-media-vision.spec +++ b/packaging/capi-media-vision.spec @@ -1,7 +1,7 @@ Name: capi-media-vision Summary: Media Vision library for Tizen Native API Version: 0.7.0 -Release: 2 +Release: 3 Group: Multimedia/Framework License: Apache-2.0 and BSD-3-Clause Source0: %{name}-%{version}.tar.gz -- 2.7.4