mv_inference: Pass input tensor buffers and consider multiple tensors
authorInki Dae <inki.dae@samsung.com>
Thu, 13 Feb 2020 07:37:30 +0000 (16:37 +0900)
committerInki Dae <inki.dae@samsung.com>
Tue, 14 Apr 2020 00:40:31 +0000 (09:40 +0900)
This patch sends input tensor buffers allocated by Inference layer
to a backend engine for the inference, and considers using multiple
tensors. As for this, it calls new callbacks and drops old ones.

Change-Id: I4156c88bd5f18b0ba36b186ca08e3d4c04ceada3
Signed-off-by: Inki Dae <inki.dae@samsung.com>
mv_inference/inference/include/Inference.h
mv_inference/inference/src/Inference.cpp
mv_inference/inference/src/mv_inference_open.cpp

index 177a5d9ec2ca86fccea2a603e043bd686ec01f7a..f20af31638e016a5e688093d2f04a5f660137062 100755 (executable)
@@ -232,16 +232,16 @@ public:
        int Load();
 
        /**
-        * @brief       Runs inference with the roi of a given image
+        * @brief       Runs inference with a region of a given image
         * @details Use this function to run forward pass with the given image.
-        *          The given image is preprocessed and the roi of the image is
+        *          The given image is preprocessed and the region of the image is
         *          thrown to neural network. Then, the output tensor is returned.
         *          If roi is NULL, then full source will be analyzed.
         *
         * @since_tizen 5.5
         * @return @c true on success, otherwise a negative error value
         */
-       int Run(mv_source_h mvSource, mv_rectangle_s *roi = NULL);
+       int Run(std::vector<mv_source_h> &mvSources, std::vector<mv_rectangle_s> &rects);
 
        /**
         * @brief       Gets that given engine is supported or not
@@ -312,11 +312,14 @@ private:
        std::map<std::string, int> mModelFormats;
     std::vector<std::string> mUserListName;
 
+       std::vector<inference_engine_tensor_buffer> mInputTensorBuffers;
+       inference_engine_layer_property mInputLayerProperty;
+
 private:
        void CheckSupportedInferenceBackend();
        int ConvertEngineErrorToVisionError(int error);
        int ConvertTargetTypes(int given_types);
-       int DoPreprocess(cv::Mat cvImg);
+       int Preprocess(cv::Mat cvImg, cv::Mat cvDst);
        int SetUserFile(std::string filename);
 };
 
index b3273c7ad7e5a291107cee1348e054b2a505ba35..5f3c5d1d00aa39d1a3640b6bfb1443c8f68ff3f8 100755 (executable)
@@ -78,6 +78,9 @@ Inference::Inference() :
 {
        LOGI("ENTER");
 
+       mInputTensorBuffers.clear();
+       mInputLayerProperty.tensor_infos.clear();
+
        mBackend = NULL;
        mSupportedInferenceBackend.insert(std::make_pair(MV_INFERENCE_BACKEND_OPENCV, std::make_pair("opencv", false)));
        mSupportedInferenceBackend.insert(std::make_pair(MV_INFERENCE_BACKEND_TFLITE, std::make_pair("tflite", false)));
@@ -103,6 +106,8 @@ Inference::Inference() :
 
 Inference::~Inference()
 {
+       std::vector<inference_engine_tensor_buffer>().swap(mInputTensorBuffers);
+       std::vector<inference_engine_tensor_info>().swap(mInputLayerProperty.tensor_infos);
        mModelFormats.clear();
 
        // Release backend engine.
@@ -199,7 +204,7 @@ int Inference::ConvertTargetTypes(int given_types)
        return target_types;
 }
 
-int Inference::DoPreprocess(cv::Mat cvImg)
+int Inference::Preprocess(cv::Mat cvImg, cv::Mat cvDst)
 {
     mSourceSize = cvImg.size();
     int width = mInputSize.width;
@@ -237,7 +242,7 @@ int Inference::DoPreprocess(cv::Mat cvImg)
 
     sampleNormalized /= (float)mDeviation;
 
-    sampleNormalized.convertTo(mInputBuffer, mMatType);
+    sampleNormalized.convertTo(cvDst, mMatType);
 
     return MEDIA_VISION_ERROR_NONE;
 }
@@ -390,16 +395,12 @@ int Inference::Prepare(void)
        mMean = mConfig.mMeanValue;
        LOGI("mean %.4f, deviation %.4f", mMean, mDeviation);
 
-       mBackend->SetInputTensorParamNode(mConfig.mInputNodeName);
-
        mOutputNumbers = mConfig.mMaxOutputNumbers;
        LOGI("outputNumber %d", mOutputNumbers);
 
        mThreshold = mConfig.mConfidenceThresHold;
        LOGI("threshold %.4f", mThreshold);
 
-       mBackend->SetOutputTensorParamNodes(mConfig.mOutputNodeNames);
-
        // Check if backend supports a given target device/devices or not.
        if (mConfig.mTargetTypes & MV_INFERENCE_TARGET_CPU) {
                if (!(mBackendCapacity.supported_accel_devices & INFERENCE_TARGET_CPU)) {
@@ -488,71 +489,50 @@ int Inference::Load(void)
                return ConvertEngineErrorToVisionError(ret);
        }
 
-       tensor_t inputData;
-       std::vector<int> info{1, mCh, mInputSize.height, mInputSize.width};
-       inputData.dimInfo.push_back(info);
+       std::vector<std::string>().swap(models);
 
-       // some plug-in (opencv) doesn't allocate memory for input while loading models
-       // But, others (tflite) allcate memory while loading.
-       // Thus, the SetInputData() will be implemented in plug-in such as OpenCV, but
-       // just leave empty in plug-in such as tflite.
-       ret = mBackend->SetInputDataBuffer(inputData);
+       // Get tensor buffers from a backend engine if the backend engine allocated input tensor buffers.
+       ret = mBackend->GetInputTensorBuffers(mInputTensorBuffers);
        if (ret != INFERENCE_ENGINE_ERROR_NONE) {
-               LOGE("Fail to SetInputData");
-               return ConvertEngineErrorToVisionError(ret);;
+               LOGE("Fail to get input tensor buffers from backend engine.");
+               return ConvertEngineErrorToVisionError(ret);
        }
 
-       void *dataPtr = mBackend->GetInputDataPtr();
-       if (dataPtr == nullptr) {
-               LOGE("input data address is null");
-               std::vector<std::string>().swap(models);
-               return MEDIA_VISION_ERROR_INTERNAL;
+       ret = mBackend->GetInputLayerProperty(mInputLayerProperty);
+       if (ret != INFERENCE_ENGINE_ERROR_NONE) {
+               LOGE("Fail to get input layer property from backend engine.");
+               return ConvertEngineErrorToVisionError(ret);
        }
 
-       //get type and allocate memory to mInputBuffer;
-       InputAttrType attrType = static_cast<InputAttrType>(mBackend->GetInputLayerAttrType());
-       if (attrType == InputAttrUInt8) {
-               LOGI("InputType is %d ch with UINT8", mCh);
-               if (mCh == 1) {
-                       mMatType = CV_8UC1;
-               } else if (mCh == 3) {
-                       mMatType = CV_8UC3;
-               } else {
-                       LOGE("Not supported");
-                       std::vector<std::string>().swap(models);
-                       return ConvertEngineErrorToVisionError(ret);;
+       // If the backend engine isn't able to allocate input tensor buffers internally,
+       // then allocate the buffers at here.
+       if (mInputTensorBuffers.empty()) {
+               for (int i = 0; i < mInputLayerProperty.tensor_infos.size(); ++i) {
+                       inference_engine_tensor_info tensor_info = mInputLayerProperty.tensor_infos[i];
+                       inference_engine_tensor_buffer tensor_buffer;
+                       if (tensor_info.data_type == TENSOR_DATA_TYPE_FLOAT32) {
+                               tensor_buffer.buffer = new float[tensor_info.size];
+                       } else if (tensor_info.data_type == TENSOR_DATA_TYPE_UINT8) {
+                               tensor_buffer.buffer = new char[tensor_info.size];
+                       } else {
+                               LOGE("Fail to allocate input tensor buffer.");
+                               return MEDIA_VISION_ERROR_INVALID_OPERATION;
+                       }
+
+                       mInputTensorBuffers.push_back(tensor_buffer);
                }
        }
-       else if (attrType == InputAttrFloat32) {
-               LOGI("InputType is %d ch with FLOAT32", mCh);
-               if (mCh == 1) {
-                       mMatType = CV_32FC1;
-               } else if (mCh == 3) {
-                       mMatType = CV_32FC3;
-               } else {
-                       LOGE("Not supported");
-                       std::vector<std::string>().swap(models);
-                       return MEDIA_VISION_ERROR_NOT_SUPPORTED_FORMAT;
-               }
-       }
-       else {
-               LOGE("Not supported");
-               std::vector<std::string>().swap(models);
-               return MEDIA_VISION_ERROR_NOT_SUPPORTED_FORMAT;
-       }
 
-       mInputBuffer = cv::Mat(mInputSize.height, mInputSize.width, mMatType, dataPtr);
+       LOGI("Input tensor buffer count is %d", mInputTensorBuffers.size());
 
        mCanRun = true;
 
-       std::vector<std::string>().swap(models);
-
        LOGI("LEAVE");
 
        return ConvertEngineErrorToVisionError(ret);
 }
 
-int Inference::Run(mv_source_h mvSource, mv_rectangle_s *roi)
+int Inference::Run(std::vector<mv_source_h> &mvSources, std::vector<mv_rectangle_s> &rects)
 {
        int ret = INFERENCE_ENGINE_ERROR_NONE;
 
@@ -568,6 +548,21 @@ int Inference::Run(mv_source_h mvSource, mv_rectangle_s *roi)
        unsigned int bufferSize = 0;
        unsigned char *buffer = NULL;
 
+       if (mvSources.empty()) {
+               LOGE("mvSources should contain only one cv source.");
+               return MEDIA_VISION_ERROR_INVALID_PARAMETER;
+       }
+
+       // We are able to request Only one input data for the inference as of now.
+       if (mvSources.size() > 1) {
+               LOGE("It allows only one mv source for the inference.");
+               return MEDIA_VISION_ERROR_INVALID_PARAMETER;
+       }
+
+       // TODO. Consider multiple sources.
+       mv_source_h mvSource = mvSources.front();
+       mv_rectangle_s *roi = rects.empty() ? NULL : &(rects.front());
+
        mv_colorspace_e colorspace = MEDIA_VISION_COLORSPACE_INVALID;
 
        if (mv_source_get_width(mvSource, &width) != MEDIA_VISION_ERROR_NONE ||
@@ -576,6 +571,8 @@ int Inference::Run(mv_source_h mvSource, mv_rectangle_s *roi)
                        mv_source_get_buffer(mvSource, &buffer, &bufferSize))
                return MEDIA_VISION_ERROR_INTERNAL;
 
+       // TODO. Let's support various color spaces.
+
        if (colorspace != MEDIA_VISION_COLORSPACE_RGB888) {
                LOGE("Not Supported format!\n");
                return MEDIA_VISION_ERROR_NOT_SUPPORTED_FORMAT;
@@ -593,14 +590,36 @@ int Inference::Run(mv_source_h mvSource, mv_rectangle_s *roi)
 
        LOGE("Size: w:%d, h:%d", cvSource.size().width, cvSource.size().height);
 
-       // Convert color space of input tensor data and then normalize it.
-       ret = DoPreprocess(cvSource);
-       if (ret != MEDIA_VISION_ERROR_NONE) {
-               LOGE("Fail to preprocess input tensor data.");
-               return ret;
+       if (mCh != 1 && mCh != 3) {
+               LOGE("Channel not supported.");
+               return MEDIA_VISION_ERROR_INVALID_PARAMETER;
+       }
+
+       // TODO. Consider multiple tensors.
+       inference_engine_tensor_info tensor_info = mInputLayerProperty.tensor_infos.front();
+       if (tensor_info.data_type == TENSOR_DATA_TYPE_UINT8) {
+               LOGI("InputType is %d ch with UINT8", mCh);
+               mMatType = mCh == 1 ? CV_8UC1 : CV_8UC3;
+       }
+
+       if (tensor_info.data_type == TENSOR_DATA_TYPE_FLOAT32) {
+               LOGI("InputType is %d ch with FLOAT32", mCh);
+               mMatType = mCh == 1 ? CV_32FC1 : CV_32FC3;
+       }
+
+       std::vector<inference_engine_tensor_buffer>::iterator iter;
+       for (iter = mInputTensorBuffers.begin(); iter != mInputTensorBuffers.end(); iter++) {
+               inference_engine_tensor_buffer tensor_buffer = *iter;
+
+               // Convert color space of input tensor data and then normalize it.
+               ret = Preprocess(cvSource, cv::Mat(mInputSize.height, mInputSize.width, mMatType, tensor_buffer.buffer));
+               if (ret != MEDIA_VISION_ERROR_NONE) {
+                       LOGE("Fail to preprocess input tensor data.");
+                       return ret;
+               }
        }
 
-       ret = mBackend->Run();
+       ret = mBackend->Run(mInputTensorBuffers);
 
        return ConvertEngineErrorToVisionError(ret);
 }
index ab00c49cf2fc7e9d595d31ad48d891e495854ccd..1ecfb27bd38a3901181a44051b38c4c719f86a9b 100755 (executable)
@@ -424,8 +424,15 @@ int mv_inference_image_classify_open(
 
        int ret = MEDIA_VISION_ERROR_NONE;
        int numberOfOutputs = 0;
+       std::vector<mv_source_h> sources;
+       std::vector<mv_rectangle_s> rects;
 
-       ret = pInfer->Run(source, roi);
+       sources.push_back(source);
+
+       if (roi != NULL)
+               rects.push_back(*roi);
+
+       ret = pInfer->Run(sources, rects);
        if (ret != MEDIA_VISION_ERROR_NONE) {
                LOGE("Fail to run inference");
                return ret;
@@ -473,8 +480,12 @@ int mv_inference_object_detect_open(
 
        int ret = MEDIA_VISION_ERROR_NONE;
        int numberOfOutputs = 0;
+       std::vector<mv_source_h> sources;
+       std::vector<mv_rectangle_s> rects;
+
+       sources.push_back(source);
 
-       ret = pInfer->Run(source, NULL);
+       ret = pInfer->Run(sources, rects);
        if (ret != MEDIA_VISION_ERROR_NONE) {
                LOGE("Fail to run inference");
                return ret;
@@ -525,8 +536,12 @@ int mv_inference_face_detect_open(
 
        int ret = MEDIA_VISION_ERROR_NONE;
        int numberOfOutputs = 0;
+       std::vector<mv_source_h> sources;
+       std::vector<mv_rectangle_s> rects;
 
-       ret = pInfer->Run(source, NULL);
+       sources.push_back(source);
+
+       ret = pInfer->Run(sources, rects);
        if (ret != MEDIA_VISION_ERROR_NONE) {
                LOGE("Fail to run inference");
                return ret;
@@ -567,8 +582,15 @@ int mv_inference_facial_landmark_detect_open(
 
        int ret = MEDIA_VISION_ERROR_NONE;
        int numberOfLandmarks = 0;
+       std::vector<mv_source_h> sources;
+       std::vector<mv_rectangle_s> rects;
+
+       sources.push_back(source);
+
+       if (roi != NULL)
+               rects.push_back(*roi);
 
-       ret = pInfer->Run(source, roi);
+       ret = pInfer->Run(sources, rects);
        if (ret != MEDIA_VISION_ERROR_NONE) {
                LOGE("Fail to run inference");
                return ret;