From: Inki Dae Date: Wed, 12 Feb 2020 02:05:33 +0000 (+0900) Subject: vision: Drop all OpenCV dependencies X-Git-Tag: submit/tizen/20200423.063253~60 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=3f263657544e3bb4a955f37ad92d53ed427ad987;p=platform%2Fcore%2Fmultimedia%2Finference-engine-interface.git vision: Drop all OpenCV dependencies This patch drops OpenCV dependent code. The dropped code will be moved to Inference layer. This is a step to remove inference-engine-vision layer. Change-Id: I3fc29071ec7f7a0e4b0dec72ed7568bcc638462e Signed-off-by: Inki Dae --- diff --git a/include/inference_engine_vision_impl.h b/include/inference_engine_vision_impl.h index ff327ae..c8d0e40 100755 --- a/include/inference_engine_vision_impl.h +++ b/include/inference_engine_vision_impl.h @@ -22,38 +22,9 @@ #include "inference_engine_common_impl.h" #include "inference_engine_common.h" #include "inference_engine_type.h" -#include -#include - using namespace InferenceEngineInterface::Common; -typedef struct _ImageClassficationResults { - int number_of_classes; - std::vector indices; - std::vector names; - std::vector confidences; -} ImageClassificationResults; /**< structure ImageClassificationResults */ - -typedef struct _ObjectDetectionResults { - int number_of_objects; - std::vector indices; - std::vector names; - std::vector confidences; - std::vector locations; -} ObjectDetectionResults; /**< structure ObjectDetectionResults */ - -typedef struct _FaceDetectionResults { - int number_of_faces; - std::vector confidences; - std::vector locations; -} FaceDetectionResults; /**< structure FaceDetectionResults */ - -typedef struct _FacialLandMarkDetectionResults { - int number_of_landmarks; - std::vector locations; -} FacialLandMarkDetectionResults; /**< structure FacialLandMarkDetectionResults */ - namespace InferenceEngineInterface { namespace Vision { @@ -75,20 +46,6 @@ public: */ int SetInputTensorParamNode(std::string node); - /** - * @brief Set an input image's information. Deprecated. - * - * @since_tizen 5.5 - */ - int SetInputTensorParamInput(int width, int height, int dim, int ch); - - /** - * @brief Set mean and deviation values. Deprecated. - * - * @since_tizen 5.5 - */ - int SetInputTensorParamNorm(double deviation, double mean); - /** * @brief Set output nodes' names. Deprecated. * @@ -96,19 +53,7 @@ public: */ int SetOutputTensorParamNodes(std::vector nodes); - /** - * @brief Set threshold value. Deprecated. - * - * @since_tizen 5.5 - */ - int SetOutputTensorParamThresHold(double threshold); - - /** - * @brief Set the number of outputs. Deprecated. - * - * @since_tizen 5.5 - */ - int SetOutputTensorParamNumbers(int number); + int SetInputDataBuffer(tensor_t data); /** * @brief Get an input layer property information from a given backend engine. @@ -124,6 +69,10 @@ public: */ int GetOutputTensorProperty(inference_engine_layer_property *property); + void *GetInputDataPtr(); + + int GetInputLayerAttrType(); + /** * @brief Set an input layer property information to a given backend engine. * @@ -165,7 +114,7 @@ public: * * @since_tizen 5.5 */ - int Run(cv::Mat tensor); + int Run(); /** * @brief Run an inference with user-given input and output buffers. @@ -176,72 +125,15 @@ public: std::vector &output_buffers); /** - * @brief Get inference results of image classification. - * - * @since_tizen 5.5 - */ - int GetInferenceResult(ImageClassificationResults& results); - - /** - * @brief Get inference results of object detection. + * @brief Get inference results from a backend engine. * - * @since_tizen 5.5 - */ - int GetInferenceResult(ObjectDetectionResults& results); - - /** - * @brief Get inference results of face detection. - * - * @since_tizen 5.5 - */ - int GetInferenceResult(FaceDetectionResults& results); - - /** - * @brief Get inference results of facial landmark detection. - * - * @since_tizen 5.5 + * @since_tizen 6.0 */ - int GetInferenceResult(FacialLandMarkDetectionResults& results); + int GetInferenceResult(tensor_t& results); - /** - * @brief Set SetUserFile. - * @details Image Classification and Object Detection needs a user file - * including labels of classification and objects. - * @since_tizen 5.5 - */ - int SetUserFile(std::string filename); int SetLibraryPath(std::string path); -protected: - /** - * @brief Set an input with a type of cv::Mat - * @details Set an input and pre-process the input - * - * @since_tizen 5.5 - */ - int SetInput(cv::Mat cvImg); - - /** - * @brief Set user list name. - * - * @since_tizen 5.5 - */ - void SetUserListName(std::string userlist); private: std::string mBackendLibName; - std::vector mUserListName; - - int mCh; - int mDim; - cv::Size mInputSize; - - double mDeviation; - double mMean; - double mThreshold; - int mOutputNumbers; - cv::Size mSourceSize; - - cv::Mat mInputBuffer; - int mMatType; InferenceEngineCommon *mCommonEngine; /**< InferenceEngineCommon is used to do typical process */ diff --git a/vision/inference_engine_vision_impl.cpp b/vision/inference_engine_vision_impl.cpp index b6100c6..503df22 100755 --- a/vision/inference_engine_vision_impl.cpp +++ b/vision/inference_engine_vision_impl.cpp @@ -36,29 +36,9 @@ extern "C" { #define LOG_TAG "INFERENCE_ENGINE_VISION" } -typedef enum { - InputAttrNoType = 0, - InputAttrFloat32 = 1, - InputAttrInt32 = 2, - InputAttrUInt8 = 3, - InputAttrInt64 = 4, - InputAttrString = 5, - InputAttrBool = 6, -} InputAttrType; - namespace InferenceEngineInterface { namespace Vision { InferenceEngineVision::InferenceEngineVision(inference_engine_config *config) : - mCh(0), - mDim(0), - mInputSize(cv::Size()), - mDeviation(0.0), - mMean(0.0), - mThreshold(0.0), - mOutputNumbers(0), - mSourceSize(cv::Size()), - mInputBuffer(cv::Mat()), - mMatType(0), mCommonEngine(nullptr) { LOGE("ENTER"); @@ -69,16 +49,6 @@ InferenceEngineVision::InferenceEngineVision(inference_engine_config *config) : } InferenceEngineVision::InferenceEngineVision(std::string backend) : - mCh(0), - mDim(0), - mInputSize(cv::Size()), - mDeviation(0.0), - mMean(0.0), - mThreshold(0.0), - mOutputNumbers(0), - mSourceSize(cv::Size()), - mInputBuffer(cv::Mat()), - mMatType(0), mCommonEngine(nullptr) { LOGE("ENTER"); @@ -118,51 +88,6 @@ void InferenceEngineVision::UnbindBackend(void) LOGI("LEAVE"); } -int InferenceEngineVision::SetUserFile(std::string filename) -{ - std::ifstream fp(filename.c_str()); - if (!fp.is_open()) { - return INFERENCE_ENGINE_ERROR_INVALID_PATH; - } - - std::string userListName; - while (!fp.eof()) { - std::getline(fp, userListName); - if (userListName.length()) - SetUserListName(userListName); - } - - fp.close(); - - return INFERENCE_ENGINE_ERROR_NONE; -} - -void InferenceEngineVision::SetUserListName(std::string userlist) -{ - mUserListName.push_back(userlist); -} - -int InferenceEngineVision::SetInputTensorParamInput(int width, int height, int dim, int ch) -{ - mCh = ch; - mDim = dim; - mInputSize = cv::Size(width, height); - - LOGI("InputSize is %d x %d\n", mInputSize.width, mInputSize.height); - - return INFERENCE_ENGINE_ERROR_NONE; -} - -int InferenceEngineVision::SetInputTensorParamNorm(double deviation, double mean) -{ - mDeviation = deviation; - mMean = mean; - - LOGI("mean %.4f, deviation %.4f", mMean, mDeviation); - - return INFERENCE_ENGINE_ERROR_NONE; -} - int InferenceEngineVision::SetInputTensorParamNode(std::string node) { LOGE("ENTER"); @@ -183,6 +108,11 @@ int InferenceEngineVision::GetInputTensorProperty(inference_engine_layer_propert return ret; } +void *InferenceEngineVision::GetInputDataPtr() +{ + return mCommonEngine->GetInputDataPtr(); +} + int InferenceEngineVision::GetOutputTensorProperty(inference_engine_layer_property *property) { LOGE("ENTER"); @@ -193,6 +123,11 @@ int InferenceEngineVision::GetOutputTensorProperty(inference_engine_layer_proper return ret; } +int InferenceEngineVision::SetInputDataBuffer(tensor_t data) +{ + return mCommonEngine->SetInputDataBuffer(data); +} + int InferenceEngineVision::SetInputTensorProperty(inference_engine_layer_property &property) { LOGE("ENTER"); @@ -222,24 +157,6 @@ int InferenceEngineVision::SetTargetDevices(int types) return ret; } -int InferenceEngineVision::SetOutputTensorParamThresHold(double threshold) -{ - mThreshold = threshold; - - LOGI("threshold %.4f", mThreshold); - - return INFERENCE_ENGINE_ERROR_NONE; -} - -int InferenceEngineVision::SetOutputTensorParamNumbers(int numbers) -{ - mOutputNumbers = numbers; - - LOGI("outputNumber %d", mOutputNumbers); - - return INFERENCE_ENGINE_ERROR_NONE; -} - int InferenceEngineVision::SetOutputTensorParamNodes(std::vector nodes) { LOGI("ENTER"); @@ -254,89 +171,23 @@ int InferenceEngineVision::Load(std::vector model_paths, inference_ { LOGI("ENTER"); - std::string label_file; - if (model_format == INFERENCE_MODEL_TFLITE || model_format == INFERENCE_MODEL_TORCH) { - label_file = model_paths[1]; - } else { - label_file = model_paths[2]; - } - - size_t userFileLength = label_file.length(); - if (userFileLength > 0 && access(label_file.c_str(), F_OK)) { - LOGE("Label file path in [%s] ", label_file.c_str()); - return INFERENCE_ENGINE_ERROR_INVALID_PARAMETER; - } - - int ret = (userFileLength > 0) ? SetUserFile(label_file) : INFERENCE_ENGINE_ERROR_NONE; - if (ret != INFERENCE_ENGINE_ERROR_NONE) { - LOGE("Fail to load label file."); - return ret; - } - // Load model files. - ret = mCommonEngine->Load(model_paths, model_format); + int ret = mCommonEngine->Load(model_paths, model_format); if (ret != INFERENCE_ENGINE_ERROR_NONE) { LOGE("Fail to load InferenceEngineVision"); return ret; } - //get type and allocate memory to mInputBuffer; - InputAttrType attrType = static_cast(mCommonEngine->GetInputLayerAttrType()); - if (attrType == InputAttrUInt8) { - LOGI("InputType is %d ch with UINT8", mCh); - if (mCh == 1) { - mMatType = CV_8UC1; - } else if (mCh == 3) { - mMatType = CV_8UC3; - } else { - LOGE("Not supported"); - return INFERENCE_ENGINE_ERROR_NOT_SUPPORTED_FORMAT; - } - } - else if (attrType == InputAttrFloat32) { - LOGI("InputType is %d ch with FLOAT32", mCh); - if (mCh == 1) { - mMatType = CV_32FC1; - } else if (mCh == 3) { - mMatType = CV_32FC3; - } else { - LOGE("Not supported"); - return INFERENCE_ENGINE_ERROR_NOT_SUPPORTED_FORMAT; - } - } - else { - LOGE("Not supported"); - ret = INFERENCE_ENGINE_ERROR_NOT_SUPPORTED_FORMAT; - return ret; - } - - tensor_t inputData; - std::vector info{1, mMatType, mInputSize.height, mInputSize.width}; - inputData.dimInfo.push_back(info); - - // some plug-in (opencv) doesn't allocate memory for input while loading models - // But, others (tflite) allcate memory while loading. - // Thus, the SetInputData() will be implemented in plug-in such as OpenCV, but - // just leave empty in plug-in such as tflite. - ret = mCommonEngine->SetInputDataBuffer(inputData); - if (ret != INFERENCE_ENGINE_ERROR_NONE) { - LOGE("Fail to SetInputData"); - return ret; - } - - void *dataPtr = mCommonEngine->GetInputDataPtr(); - if (dataPtr == nullptr) { - LOGE("input data address is null"); - return INFERENCE_ENGINE_ERROR_INTERNAL; - } - - mInputBuffer = cv::Mat(mInputSize.height, mInputSize.width, mMatType, dataPtr); - LOGI("LEAVE"); return ret; } +int InferenceEngineVision::GetInputLayerAttrType() +{ + return mCommonEngine->GetInputLayerAttrType(); +} + int InferenceEngineVision::GetBackendCapacity(inference_engine_capacity *capacity) { LOGI("ENTER"); @@ -352,58 +203,11 @@ int InferenceEngineVision::GetBackendCapacity(inference_engine_capacity *capacit return ret; } -int InferenceEngineVision::SetInput(cv::Mat cvImg) -{ - mSourceSize = cvImg.size(); - int width = mInputSize.width; - int height = mInputSize.height; - - //PreProcess(); - cv::Mat sample; - if (cvImg.channels() == 3 && mCh == 1) - cv::cvtColor(cvImg, sample, cv::COLOR_BGR2GRAY); - else - sample = cvImg; - - // size - cv::Mat sampleResized; - if (sample.size() != cv::Size(width, height)) - cv::resize(sample, sampleResized, cv::Size(width, height)); - else - sampleResized = sample; - - // type - cv::Mat sampleFloat; - if (mCh == 3) - sampleResized.convertTo(sampleFloat, CV_32FC3); - else - sampleResized.convertTo(sampleFloat, CV_32FC1); - - // normalize - cv::Mat sampleNormalized; - cv::Mat meanMat; - if (mCh == 3) - meanMat = cv::Mat(sampleFloat.size(), CV_32FC3, cv::Scalar((float)mMean, (float)mMean, (float)mMean)); - else - meanMat = cv::Mat(sampleFloat.size(), CV_32FC1, cv::Scalar((float)mMean)); - - cv::subtract(sampleFloat, meanMat, sampleNormalized); - - sampleNormalized /= (float)mDeviation; - - sampleNormalized.convertTo(mInputBuffer, mMatType); - - return INFERENCE_ENGINE_ERROR_NONE; -} - -int InferenceEngineVision::Run(cv::Mat tensor) +int InferenceEngineVision::Run() { LOGI("ENTER"); - int ret = SetInput(tensor); - if (ret != INFERENCE_ENGINE_ERROR_NONE) - LOGE("Fail to SetInput InferenceEngineVision"); - ret = mCommonEngine->Run(); + int ret = mCommonEngine->Run(); if (ret != INFERENCE_ENGINE_ERROR_NONE) LOGE("Fail to run InferenceEngineVision"); @@ -423,200 +227,9 @@ int InferenceEngineVision::Run(std::vector &inpu return ret; } -int InferenceEngineVision::GetInferenceResult(ImageClassificationResults& results) -{ - LOGI("ENTER"); - - // Will contain top N results in ascending order. - std::vector> top_results; - std::priority_queue, - std::vector>, - std::greater>> top_result_pq; - float value; - - tensor_t outputData; - int ret = mCommonEngine->GetInferenceResult(outputData); - if (ret != INFERENCE_ENGINE_ERROR_NONE) { - LOGE("fail to GetInferenceResults with ImageClassificationResults"); - return ret; - } - - std::vector> inferDimInfo(outputData.dimInfo); - std::vector inferResults(outputData.data.begin(), outputData.data.end()); - - long count = inferDimInfo[0][1]; - LOGI("count: %ld", count); - - float *prediction = reinterpret_cast(inferResults[0]); - for (int i = 0; i < count; ++i) { - value = prediction[i]; - // Only add it if it beats the threshold and has a chance at being in - // the top N. - top_result_pq.push(std::pair(value, i)); - - // If at capacity, kick the smallest value out. - if (top_result_pq.size() > mOutputNumbers) { - top_result_pq.pop(); - } - } - - // Copy to output vector and reverse into descending order. - while (!top_result_pq.empty()) { - top_results.push_back(top_result_pq.top()); - top_result_pq.pop(); - } - std::reverse(top_results.begin(), top_results.end()); - - int classIdx = -1; - results.number_of_classes = 0; - for (int idx = 0; idx < top_results.size(); ++idx) { - if (top_results[idx].first < mThreshold) - continue; - LOGI("idx:%d", idx); - LOGI("classIdx: %d", top_results[idx].second); - LOGI("classProb: %f", top_results[idx].first); - - classIdx = top_results[idx].second; - results.indices.push_back(classIdx); - results.confidences.push_back(top_results[idx].first); - results.names.push_back(mUserListName[classIdx]); - results.number_of_classes++; - } - - LOGI("LEAVE"); - - return ret; -} - -int InferenceEngineVision::GetInferenceResult(ObjectDetectionResults& results) +int InferenceEngineVision::GetInferenceResult(tensor_t &results) { - tensor_t outputData; - int ret = mCommonEngine->GetInferenceResult(outputData); - if (ret != INFERENCE_ENGINE_ERROR_NONE) { - LOGE("fail to GetInferenceResults with ObjectDetectionResults"); - return ret; - } - - std::vector> inferDimInfo(outputData.dimInfo); - std::vector inferResults(outputData.data.begin(), outputData.data.end()); - - float* boxes = reinterpret_cast(inferResults[0]); - float* classes = reinterpret_cast(inferResults[1]); - float* scores = reinterpret_cast(inferResults[2]); - int number_of_detections = (int)(*reinterpret_cast(inferResults[3])); - - int left, top, right, bottom; - cv::Rect loc; - - results.number_of_objects = 0; - for (int idx = 0; idx < number_of_detections; ++idx) { - if (scores[idx] < mThreshold) - continue; - - left = (int)(boxes[idx*4 + 1] * mSourceSize.width); - top = (int)(boxes[idx*4 + 0] * mSourceSize.height); - right = (int)(boxes[idx*4 + 3] * mSourceSize.width); - bottom = (int)(boxes[idx*4 + 2] * mSourceSize.height); - - loc.x = left; - loc.y = top; - loc.width = right -left + 1; - loc.height = bottom - top + 1; - - results.indices.push_back((int)classes[idx]); - results.confidences.push_back(scores[idx]); - results.names.push_back(mUserListName[(int)classes[idx]]); - results.locations.push_back(loc); - results.number_of_objects++; - - LOGI("objectClass: %d", (int)classes[idx]); - LOGI("confidence:%f", scores[idx]); - LOGI("left:%d, top:%d, right:%d, bottom:%d", left, top, right, bottom); - } - - return ret; -} - -int InferenceEngineVision::GetInferenceResult(FaceDetectionResults& results) -{ - tensor_t outputData; - int ret = mCommonEngine->GetInferenceResult(outputData); - if (ret != INFERENCE_ENGINE_ERROR_NONE) { - LOGE("fail to GetInferenceResults with FaceDetectionResults"); - return ret; - } - - std::vector> inferDimInfo(outputData.dimInfo); - std::vector inferResults(outputData.data.begin(), outputData.data.end()); - - float* boxes = reinterpret_cast(inferResults[0]); - float* classes = reinterpret_cast(inferResults[1]); - float* scores = reinterpret_cast(inferResults[2]); - - int number_of_detections = (int)(*reinterpret_cast(inferResults[3])); - int left, top, right, bottom; - cv::Rect loc; - - results.number_of_faces = 0; - for (int idx = 0; idx < number_of_detections; ++idx) { - if (scores[idx] < mThreshold) - continue; - - left = (int)(boxes[idx*4 + 1] * mSourceSize.width); - top = (int)(boxes[idx*4 + 0] * mSourceSize.height); - right = (int)(boxes[idx*4 + 3] * mSourceSize.width); - bottom = (int)(boxes[idx*4 + 2] * mSourceSize.height); - - loc.x = left; - loc.y = top; - loc.width = right -left + 1; - loc.height = bottom - top + 1; - - results.confidences.push_back(scores[idx]); - results.locations.push_back(loc); - results.number_of_faces++; - - LOGI("confidence:%f", scores[idx]); - LOGI("class: %f", classes[idx]); - LOGI("left:%f, top:%f, right:%f, bottom:%f", boxes[idx*4 + 1], boxes[idx*4 + 0], boxes[idx*4 + 3], boxes[idx*4 + 2]); - LOGI("left:%d, top:%d, right:%d, bottom:%d", left, top, right, bottom); - } - - return ret; -} - -int InferenceEngineVision::GetInferenceResult(FacialLandMarkDetectionResults& results) -{ - - tensor_t outputData; - int ret = mCommonEngine->GetInferenceResult(outputData); - if (ret != INFERENCE_ENGINE_ERROR_NONE) { - LOGE("fail to GetInferenceResults with FacialLandMarkDetectionResults"); - return ret; - } - - std::vector> inferDimInfo(outputData.dimInfo); - std::vector inferResults(outputData.data.begin(), outputData.data.end()); - - long number_of_detections = inferDimInfo[0][1]; - float* loc = reinterpret_cast(inferResults[0]); - - results.number_of_landmarks = 0; - - cv::Point point(0,0); - results.number_of_landmarks = 0; - LOGI("imgW:%d, imgH:%d", mSourceSize.width, mSourceSize.height); - for (int idx = 0; idx < number_of_detections; idx+=2) { - point.x = (int)(loc[idx] * mSourceSize.width); - point.y = (int)(loc[idx+1] * mSourceSize.height); - - results.locations.push_back(point); - results.number_of_landmarks++; - - LOGI("x:%d, y:%d", point.x, point.y); - } - - return ret; + return mCommonEngine->GetInferenceResult(results); } int InferenceEngineVision::SetLibraryPath(std::string path)