mv_inference: Pass input tensor buffers and consider multiple tensors

author Inki Dae <inki.dae@samsung.com>

Thu, 13 Feb 2020 07:37:30 +0000 (16:37 +0900)

committer Inki Dae <inki.dae@samsung.com>

Tue, 14 Apr 2020 00:40:31 +0000 (09:40 +0900)
author Inki Dae <inki.dae@samsung.com>
Thu, 13 Feb 2020 07:37:30 +0000 (16:37 +0900)
committer Inki Dae <inki.dae@samsung.com>
Tue, 14 Apr 2020 00:40:31 +0000 (09:40 +0900)
diff --git a/mv_inference/inference/include/Inference.h b/mv_inference/inference/include/Inference.h

index 177a5d9ec2ca86fccea2a603e043bd686ec01f7a..f20af31638e016a5e688093d2f04a5f660137062 100755 (executable)
--- a/mv_inference/inference/include/Inference.h
+++ b/mv_inference/inference/include/Inference.h
@@ -232,16 +232,16 @@ public:
         int Load();
  
         /**
-        * @brief       Runs inference with the roi of a given image
+        * @brief       Runs inference with a region of a given image
          * @details Use this function to run forward pass with the given image.
-        *          The given image is preprocessed and the roi of the image is
+        *          The given image is preprocessed and the region of the image is
          *          thrown to neural network. Then, the output tensor is returned.
          *          If roi is NULL, then full source will be analyzed.
          *
          * @since_tizen 5.5
          * @return @c true on success, otherwise a negative error value
          */
-       int Run(mv_source_h mvSource, mv_rectangle_s *roi = NULL);
+       int Run(std::vector<mv_source_h> &mvSources, std::vector<mv_rectangle_s> &rects);
  
         /**
          * @brief       Gets that given engine is supported or not
@@ -312,11 +312,14 @@ private:
         std::map<std::string, int> mModelFormats;
      std::vector<std::string> mUserListName;
  
+       std::vector<inference_engine_tensor_buffer> mInputTensorBuffers;
+       inference_engine_layer_property mInputLayerProperty;
+
  private:
         void CheckSupportedInferenceBackend();
         int ConvertEngineErrorToVisionError(int error);
         int ConvertTargetTypes(int given_types);
-       int DoPreprocess(cv::Mat cvImg);
+       int Preprocess(cv::Mat cvImg, cv::Mat cvDst);
         int SetUserFile(std::string filename);
  };
  
diff --git a/mv_inference/inference/src/Inference.cpp b/mv_inference/inference/src/Inference.cpp

index b3273c7ad7e5a291107cee1348e054b2a505ba35..5f3c5d1d00aa39d1a3640b6bfb1443c8f68ff3f8 100755 (executable)
--- a/mv_inference/inference/src/Inference.cpp
+++ b/mv_inference/inference/src/Inference.cpp
@@ -78,6 +78,9 @@ Inference::Inference() :
  {
         LOGI("ENTER");
  
+       mInputTensorBuffers.clear();
+       mInputLayerProperty.tensor_infos.clear();
+
         mBackend = NULL;
         mSupportedInferenceBackend.insert(std::make_pair(MV_INFERENCE_BACKEND_OPENCV, std::make_pair("opencv", false)));
         mSupportedInferenceBackend.insert(std::make_pair(MV_INFERENCE_BACKEND_TFLITE, std::make_pair("tflite", false)));
@@ -103,6 +106,8 @@ Inference::Inference() :
  
  Inference::~Inference()
  {
+       std::vector<inference_engine_tensor_buffer>().swap(mInputTensorBuffers);
+       std::vector<inference_engine_tensor_info>().swap(mInputLayerProperty.tensor_infos);
         mModelFormats.clear();
  
         // Release backend engine.
@@ -199,7 +204,7 @@ int Inference::ConvertTargetTypes(int given_types)
         return target_types;
  }
  
-int Inference::DoPreprocess(cv::Mat cvImg)
+int Inference::Preprocess(cv::Mat cvImg, cv::Mat cvDst)
  {
      mSourceSize = cvImg.size();
      int width = mInputSize.width;
@@ -237,7 +242,7 @@ int Inference::DoPreprocess(cv::Mat cvImg)
  
      sampleNormalized /= (float)mDeviation;
  
-    sampleNormalized.convertTo(mInputBuffer, mMatType);
+    sampleNormalized.convertTo(cvDst, mMatType);
  
      return MEDIA_VISION_ERROR_NONE;
  }
@@ -390,16 +395,12 @@ int Inference::Prepare(void)
         mMean = mConfig.mMeanValue;
         LOGI("mean %.4f, deviation %.4f", mMean, mDeviation);
  
-       mBackend->SetInputTensorParamNode(mConfig.mInputNodeName);
-
         mOutputNumbers = mConfig.mMaxOutputNumbers;
         LOGI("outputNumber %d", mOutputNumbers);
  
         mThreshold = mConfig.mConfidenceThresHold;
         LOGI("threshold %.4f", mThreshold);
  
-       mBackend->SetOutputTensorParamNodes(mConfig.mOutputNodeNames);
-
         // Check if backend supports a given target device/devices or not.
         if (mConfig.mTargetTypes & MV_INFERENCE_TARGET_CPU) {
                 if (!(mBackendCapacity.supported_accel_devices & INFERENCE_TARGET_CPU)) {
@@ -488,71 +489,50 @@ int Inference::Load(void)
                 return ConvertEngineErrorToVisionError(ret);
         }
  
-       tensor_t inputData;
-       std::vector<int> info{1, mCh, mInputSize.height, mInputSize.width};
-       inputData.dimInfo.push_back(info);
+       std::vector<std::string>().swap(models);
  
-       // some plug-in (opencv) doesn't allocate memory for input while loading models
-       // But, others (tflite) allcate memory while loading.
-       // Thus, the SetInputData() will be implemented in plug-in such as OpenCV, but
-       // just leave empty in plug-in such as tflite.
-       ret = mBackend->SetInputDataBuffer(inputData);
+       // Get tensor buffers from a backend engine if the backend engine allocated input tensor buffers.
+       ret = mBackend->GetInputTensorBuffers(mInputTensorBuffers);
         if (ret != INFERENCE_ENGINE_ERROR_NONE) {
-               LOGE("Fail to SetInputData");
-               return ConvertEngineErrorToVisionError(ret);;
+               LOGE("Fail to get input tensor buffers from backend engine.");
+               return ConvertEngineErrorToVisionError(ret);
         }
  
-       void *dataPtr = mBackend->GetInputDataPtr();
-       if (dataPtr == nullptr) {
-               LOGE("input data address is null");
-               std::vector<std::string>().swap(models);
-               return MEDIA_VISION_ERROR_INTERNAL;
+       ret = mBackend->GetInputLayerProperty(mInputLayerProperty);
+       if (ret != INFERENCE_ENGINE_ERROR_NONE) {
+               LOGE("Fail to get input layer property from backend engine.");
+               return ConvertEngineErrorToVisionError(ret);
         }
  
-       //get type and allocate memory to mInputBuffer;
-       InputAttrType attrType = static_cast<InputAttrType>(mBackend->GetInputLayerAttrType());
-       if (attrType == InputAttrUInt8) {
-               LOGI("InputType is %d ch with UINT8", mCh);
-               if (mCh == 1) {
-                       mMatType = CV_8UC1;
-               } else if (mCh == 3) {
-                       mMatType = CV_8UC3;
-               } else {
-                       LOGE("Not supported");
-                       std::vector<std::string>().swap(models);
-                       return ConvertEngineErrorToVisionError(ret);;
+       // If the backend engine isn't able to allocate input tensor buffers internally,
+       // then allocate the buffers at here.
+       if (mInputTensorBuffers.empty()) {
+               for (int i = 0; i < mInputLayerProperty.tensor_infos.size(); ++i) {
+                       inference_engine_tensor_info tensor_info = mInputLayerProperty.tensor_infos[i];
+                       inference_engine_tensor_buffer tensor_buffer;
+                       if (tensor_info.data_type == TENSOR_DATA_TYPE_FLOAT32) {
+                               tensor_buffer.buffer = new float[tensor_info.size];
+                       } else if (tensor_info.data_type == TENSOR_DATA_TYPE_UINT8) {
+                               tensor_buffer.buffer = new char[tensor_info.size];
+                       } else {
+                               LOGE("Fail to allocate input tensor buffer.");
+                               return MEDIA_VISION_ERROR_INVALID_OPERATION;
+                       }
+
+                       mInputTensorBuffers.push_back(tensor_buffer);
                 }
         }
-       else if (attrType == InputAttrFloat32) {
-               LOGI("InputType is %d ch with FLOAT32", mCh);
-               if (mCh == 1) {
-                       mMatType = CV_32FC1;
-               } else if (mCh == 3) {
-                       mMatType = CV_32FC3;
-               } else {
-                       LOGE("Not supported");
-                       std::vector<std::string>().swap(models);
-                       return MEDIA_VISION_ERROR_NOT_SUPPORTED_FORMAT;
-               }
-       }
-       else {
-               LOGE("Not supported");
-               std::vector<std::string>().swap(models);
-               return MEDIA_VISION_ERROR_NOT_SUPPORTED_FORMAT;
-       }
  
-       mInputBuffer = cv::Mat(mInputSize.height, mInputSize.width, mMatType, dataPtr);
+       LOGI("Input tensor buffer count is %d", mInputTensorBuffers.size());
  
         mCanRun = true;
  
-       std::vector<std::string>().swap(models);
-
         LOGI("LEAVE");
  
         return ConvertEngineErrorToVisionError(ret);
  }
  
-int Inference::Run(mv_source_h mvSource, mv_rectangle_s *roi)
+int Inference::Run(std::vector<mv_source_h> &mvSources, std::vector<mv_rectangle_s> &rects)
  {
         int ret = INFERENCE_ENGINE_ERROR_NONE;
  
@@ -568,6 +548,21 @@ int Inference::Run(mv_source_h mvSource, mv_rectangle_s *roi)
         unsigned int bufferSize = 0;
         unsigned char *buffer = NULL;
  
+       if (mvSources.empty()) {
+               LOGE("mvSources should contain only one cv source.");
+               return MEDIA_VISION_ERROR_INVALID_PARAMETER;
+       }
+
+       // We are able to request Only one input data for the inference as of now.
+       if (mvSources.size() > 1) {
+               LOGE("It allows only one mv source for the inference.");
+               return MEDIA_VISION_ERROR_INVALID_PARAMETER;
+       }
+
+       // TODO. Consider multiple sources.
+       mv_source_h mvSource = mvSources.front();
+       mv_rectangle_s *roi = rects.empty() ? NULL : &(rects.front());
+
         mv_colorspace_e colorspace = MEDIA_VISION_COLORSPACE_INVALID;
  
         if (mv_source_get_width(mvSource, &width) != MEDIA_VISION_ERROR_NONE ||
@@ -576,6 +571,8 @@ int Inference::Run(mv_source_h mvSource, mv_rectangle_s *roi)
                         mv_source_get_buffer(mvSource, &buffer, &bufferSize))
                 return MEDIA_VISION_ERROR_INTERNAL;
  
+       // TODO. Let's support various color spaces.
+
         if (colorspace != MEDIA_VISION_COLORSPACE_RGB888) {
                 LOGE("Not Supported format!\n");
                 return MEDIA_VISION_ERROR_NOT_SUPPORTED_FORMAT;
@@ -593,14 +590,36 @@ int Inference::Run(mv_source_h mvSource, mv_rectangle_s *roi)
  
         LOGE("Size: w:%d, h:%d", cvSource.size().width, cvSource.size().height);
  
-       // Convert color space of input tensor data and then normalize it.
-       ret = DoPreprocess(cvSource);
-       if (ret != MEDIA_VISION_ERROR_NONE) {
-               LOGE("Fail to preprocess input tensor data.");
-               return ret;
+       if (mCh != 1 && mCh != 3) {
+               LOGE("Channel not supported.");
+               return MEDIA_VISION_ERROR_INVALID_PARAMETER;
+       }
+
+       // TODO. Consider multiple tensors.
+       inference_engine_tensor_info tensor_info = mInputLayerProperty.tensor_infos.front();
+       if (tensor_info.data_type == TENSOR_DATA_TYPE_UINT8) {
+               LOGI("InputType is %d ch with UINT8", mCh);
+               mMatType = mCh == 1 ? CV_8UC1 : CV_8UC3;
+       }
+
+       if (tensor_info.data_type == TENSOR_DATA_TYPE_FLOAT32) {
+               LOGI("InputType is %d ch with FLOAT32", mCh);
+               mMatType = mCh == 1 ? CV_32FC1 : CV_32FC3;
+       }
+
+       std::vector<inference_engine_tensor_buffer>::iterator iter;
+       for (iter = mInputTensorBuffers.begin(); iter != mInputTensorBuffers.end(); iter++) {
+               inference_engine_tensor_buffer tensor_buffer = *iter;
+
+               // Convert color space of input tensor data and then normalize it.
+               ret = Preprocess(cvSource, cv::Mat(mInputSize.height, mInputSize.width, mMatType, tensor_buffer.buffer));
+               if (ret != MEDIA_VISION_ERROR_NONE) {
+                       LOGE("Fail to preprocess input tensor data.");
+                       return ret;
+               }
         }
  
-       ret = mBackend->Run();
+       ret = mBackend->Run(mInputTensorBuffers);
  
         return ConvertEngineErrorToVisionError(ret);
  }
diff --git a/mv_inference/inference/src/mv_inference_open.cpp b/mv_inference/inference/src/mv_inference_open.cpp

index ab00c49cf2fc7e9d595d31ad48d891e495854ccd..1ecfb27bd38a3901181a44051b38c4c719f86a9b 100755 (executable)
--- a/mv_inference/inference/src/mv_inference_open.cpp
+++ b/mv_inference/inference/src/mv_inference_open.cpp
@@ -424,8 +424,15 @@ int mv_inference_image_classify_open(
  
         int ret = MEDIA_VISION_ERROR_NONE;
         int numberOfOutputs = 0;
+       std::vector<mv_source_h> sources;
+       std::vector<mv_rectangle_s> rects;
  
-       ret = pInfer->Run(source, roi);
+       sources.push_back(source);
+
+       if (roi != NULL)
+               rects.push_back(*roi);
+
+       ret = pInfer->Run(sources, rects);
         if (ret != MEDIA_VISION_ERROR_NONE) {
                 LOGE("Fail to run inference");
                 return ret;
@@ -473,8 +480,12 @@ int mv_inference_object_detect_open(
  
         int ret = MEDIA_VISION_ERROR_NONE;
         int numberOfOutputs = 0;
+       std::vector<mv_source_h> sources;
+       std::vector<mv_rectangle_s> rects;
+
+       sources.push_back(source);
  
-       ret = pInfer->Run(source, NULL);
+       ret = pInfer->Run(sources, rects);
         if (ret != MEDIA_VISION_ERROR_NONE) {
                 LOGE("Fail to run inference");
                 return ret;
@@ -525,8 +536,12 @@ int mv_inference_face_detect_open(
  
         int ret = MEDIA_VISION_ERROR_NONE;
         int numberOfOutputs = 0;
+       std::vector<mv_source_h> sources;
+       std::vector<mv_rectangle_s> rects;
  
-       ret = pInfer->Run(source, NULL);
+       sources.push_back(source);
+
+       ret = pInfer->Run(sources, rects);
         if (ret != MEDIA_VISION_ERROR_NONE) {
                 LOGE("Fail to run inference");
                 return ret;
@@ -567,8 +582,15 @@ int mv_inference_facial_landmark_detect_open(
  
         int ret = MEDIA_VISION_ERROR_NONE;
         int numberOfLandmarks = 0;
+       std::vector<mv_source_h> sources;
+       std::vector<mv_rectangle_s> rects;
+
+       sources.push_back(source);
+
+       if (roi != NULL)
+               rects.push_back(*roi);
  
-       ret = pInfer->Run(source, roi);
+       ret = pInfer->Run(sources, rects);
         if (ret != MEDIA_VISION_ERROR_NONE) {
                 LOGE("Fail to run inference");
                 return ret;
author	Inki Dae <inki.dae@samsung.com>
	Thu, 13 Feb 2020 07:37:30 +0000 (16:37 +0900)
committer	Inki Dae <inki.dae@samsung.com>
	Tue, 14 Apr 2020 00:40:31 +0000 (09:40 +0900)
mv_inference/inference/include/Inference.h		patch \| blob \| history
mv_inference/inference/src/Inference.cpp		patch \| blob \| history
mv_inference/inference/src/mv_inference_open.cpp		patch \| blob \| history