WIP: Performance improvement

author Sangjung Woo <sangjung.woo@samsung.com>

Mon, 26 Oct 2020 02:24:57 +0000 (11:24 +0900)

committer Sangjung Woo <sangjung.woo@samsung.com>

Mon, 26 Oct 2020 02:24:57 +0000 (11:24 +0900)
author Sangjung Woo <sangjung.woo@samsung.com>
Mon, 26 Oct 2020 02:24:57 +0000 (11:24 +0900)
committer Sangjung Woo <sangjung.woo@samsung.com>
Mon, 26 Oct 2020 02:24:57 +0000 (11:24 +0900)
diff --git a/src/inference_engine_mlapi.cpp b/src/inference_engine_mlapi.cpp

index 2e3c0a2..706ec29 100644 (file)
--- a/src/inference_engine_mlapi.cpp
+++ b/src/inference_engine_mlapi.cpp
@@ -23,6 +23,12 @@
  #include <time.h>
  #include <queue>
  
+// TODO. Below is test code. DO NOT use ML internal function.
+#define ENABLE_NO_ALLOC
+#if defined(ENABLE_NO_ALLOC)
+extern "C" int ml_single_invoke_no_alloc(ml_single_h single, const ml_tensors_data_h input, ml_tensors_data_h output);
+#endif
+
  namespace InferenceEngineImpl
  {
  namespace MLAPIImpl
@@ -31,6 +37,8 @@ namespace MLAPIImpl
                         mPluginType(),
                         mTargetDevice(),
                         mSingle(),
+                       mInputInfoHandle(),
+                       mOutputInfoHandle(),
                         mInputDataHandle(),
                         mOutputDataHandle(),
                         mDesignated_inputs(),
@@ -53,12 +61,20 @@ namespace MLAPIImpl
  
                 ml_single_close(mSingle);
  
+               if (mInputInfoHandle)
+                       ml_tensors_info_destroy(mInputInfoHandle);
+
+               if (mOutputInfoHandle)
+                       ml_tensors_info_destroy(mOutputInfoHandle);
+
                 if (mInputDataHandle)
                         ml_tensors_data_destroy(mInputDataHandle);
  
                 if (mOutputDataHandle)
                         ml_tensors_data_destroy(mOutputDataHandle);
  
+               mInputInfoHandle = NULL;
+               mOutputInfoHandle = NULL;
                 mInputDataHandle = NULL;
                 mOutputDataHandle = NULL;
         }
@@ -192,16 +208,22 @@ namespace MLAPIImpl
                 // TODO. create ml_tensor_info for input and output tensor and pass
                 //               them as parameters of ml_single_open function.
  
-               int ret = ml_single_open(&mSingle, model_str.c_str(), NULL, NULL,
+               int err = ml_single_open(&mSingle, model_str.c_str(), NULL, NULL,
                                                                  nnfw_type, nnfw_hw);
-               if (ret != ML_ERROR_NONE) {
-                       LOGE("Failed to request ml_single_open(%d).", ret);
+               if (err != ML_ERROR_NONE) {
+                       LOGE("Failed to request ml_single_open(%d).", err);
                         return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
                 }
  
+               err = UpdateTensorsInfo();
+               if (err != INFERENCE_ENGINE_ERROR_NONE) {
+                       ml_single_close(mSingle);
+                       mSingle = NULL;
+               }
+
                 LOGI("LEAVE");
  
-               return INFERENCE_ENGINE_ERROR_NONE;
+               return err;
         }
  
         int InferenceMLAPI::GetInputTensorBuffers(
@@ -209,25 +231,18 @@ namespace MLAPIImpl
         {
                 LOGI("ENTER");
  
-               buffers.clear();
-
                 // TODO. Implement this function according to a given ML Single API backend properly.
  
-               ml_tensors_info_h in_info = NULL;
-
-               int ret = ml_single_get_input_info(mSingle, &in_info);
-               if (ret != ML_ERROR_NONE) {
-                       LOGE("Failed to request ml_single_get_input_info(%d).", ret);
-                       return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
-               }
-
                 // ML Single API will always provide internal tensor buffers so
                 // get the tensor buffers back to Mediavision framework so that
                 // Mediavision framework doesn't allocate the tensor buffers internally.
  
+               buffers.clear();
+
+               int ret;
                 unsigned int cnt;
  
-               ret = ml_tensors_info_get_count(in_info, &cnt);
+               ret = ml_tensors_info_get_count(mInputInfoHandle, &cnt);
                 if (ret != ML_ERROR_NONE) {
                         LOGE("Failed to request ml_tensors_info_get_count(%d).", ret);
                         return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
@@ -235,15 +250,19 @@ namespace MLAPIImpl
  
                 LOGI("input tensor count = %u", cnt);
  
-               for (unsigned int i = 0; i < cnt; ++i) {
-                       inference_engine_tensor_buffer in_buffer;
-                       ml_tensor_type_e in_type;
-
-                       ret = ml_tensors_data_create(in_info, &mInputDataHandle);
+               // TODO. Below is test code, should we allocate new buffer for every inference?
+               if (mInputDataHandle == NULL) {
+                       ret = ml_tensors_data_create(mInputInfoHandle, &mInputDataHandle);
                         if (ret != ML_ERROR_NONE) {
                                 LOGE("Failed to request ml_tensors_data_create(%d).", ret);
                                 return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
                         }
+               }
+
+               // TODO. Cache tensor info and reduce function call in UpdateTensorsInfo()
+               for (unsigned int i = 0; i < cnt; ++i) {
+                       inference_engine_tensor_buffer in_buffer;
+                       ml_tensor_type_e in_type;
  
                         ret = ml_tensors_data_get_tensor_data(mInputDataHandle, i, &in_buffer.buffer, &in_buffer.size);
                         if (ret != ML_ERROR_NONE) {
@@ -253,10 +272,9 @@ namespace MLAPIImpl
  
                         LOGE("buffer = %p, size = %d\n", in_buffer.buffer, in_buffer.size);
  
-                       int ret = ml_tensors_info_get_tensor_type(in_info, i, &in_type);
+                       ret = ml_tensors_info_get_tensor_type(mInputInfoHandle, i, &in_type);
                         if (ret != ML_ERROR_NONE) {
-                               LOGE("Failed to request ml_tensors_info_get_tensor_type(%d).",
-                                        ret);
+                               LOGE("Failed to request ml_tensors_info_get_tensor_type(%d).", ret);
                                 return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
                         }
  
@@ -283,25 +301,18 @@ namespace MLAPIImpl
         {
                 LOGI("ENTER");
  
-               buffers.clear();
-
                 // TODO. Need to check if model file loading is done.
  
-               ml_tensors_info_h out_info = NULL;
-
-               int ret = ml_single_get_output_info(mSingle, &out_info);
-               if (ret != ML_ERROR_NONE) {
-                       LOGE("Failed to request ml_single_get_output_info(%d).", ret);
-                       return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
-               }
-
                 // ML Single API will always provide internal tensor buffers so
                 // get the tensor buffers back to Mediavision framework so that
                 // Mediavision framework doesn't allocate the tensor buffers internally.
  
+               buffers.clear();
+
+               int ret;
                 unsigned int cnt;
  
-               ret = ml_tensors_info_get_count(out_info, &cnt);
+               ret = ml_tensors_info_get_count(mOutputInfoHandle, &cnt);
                 if (ret != ML_ERROR_NONE) {
                         LOGE("Failed to request ml_tensors_info_get_count(%d).", ret);
                         return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
@@ -309,15 +320,19 @@ namespace MLAPIImpl
  
                 LOGI("output tensor count = %u", cnt);
  
-               for (unsigned int i = 0; i < cnt; ++i) {
-                       inference_engine_tensor_buffer out_buffer;
-                       ml_tensor_type_e out_type;
-
-                       ret = ml_tensors_data_create(out_info, &mOutputDataHandle);
+               // TODO. Below is test code, should we allocate new buffer for every inference?
+               if (mOutputDataHandle == NULL) {
+                       ret = ml_tensors_data_create(mOutputInfoHandle, &mOutputDataHandle);
                         if (ret != ML_ERROR_NONE) {
                                 LOGE("Failed to request ml_tensors_data_create(%d).", ret);
                                 return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
                         }
+               }
+
+               // TODO. Cache tensor info and reduce function call in UpdateTensorsInfo()
+               for (unsigned int i = 0; i < cnt; ++i) {
+                       inference_engine_tensor_buffer out_buffer;
+                       ml_tensor_type_e out_type;
  
                         ret = ml_tensors_data_get_tensor_data(mOutputDataHandle, i, &out_buffer.buffer, &out_buffer.size);
                         if (ret != ML_ERROR_NONE) {
@@ -327,10 +342,9 @@ namespace MLAPIImpl
  
                         LOGE("buffer = %p, size = %d\n", out_buffer.buffer, out_buffer.size);
  
-                       ret = ml_tensors_info_get_tensor_type(out_info, i, &out_type);
+                       ret = ml_tensors_info_get_tensor_type(mOutputInfoHandle, i, &out_type);
                         if (ret != ML_ERROR_NONE) {
-                               LOGE("Failed to request ml_tensors_info_get_tensor_type(%d).",
-                                        ret);
+                               LOGE("Failed to request ml_tensors_info_get_tensor_type(%d).", ret);
                                 return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
                         }
  
@@ -357,18 +371,11 @@ namespace MLAPIImpl
         {
                 LOGI("ENTER");
  
-               ml_tensors_info_h in_info = NULL;
-
                 // TODO. Need to check if model file loading is done.
-
-               int ret = ml_single_get_input_info(mSingle, &in_info);
-               if (ret != ML_ERROR_NONE) {
-                       LOGE("Failed to request ml_single_get_input_info(%d).", ret);
-                       return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
-               }
-
+               int ret;
                 unsigned int cnt;
-               ret = ml_tensors_info_get_count(in_info, &cnt);
+
+               ret = ml_tensors_info_get_count(mInputInfoHandle, &cnt);
                 if (ret != ML_ERROR_NONE) {
                         LOGE("Failed to request ml_tensors_info_get_count(%d).", ret);
                         return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
@@ -379,11 +386,11 @@ namespace MLAPIImpl
                 for (unsigned int i = 0; i < cnt; ++i) {
                         inference_engine_tensor_info tensor_info;
                         ml_tensor_type_e in_type;
-                       unsigned int in_dim[ML_TENSOR_RANK_LIMIT];
+                       ml_tensor_dimension in_dim;
                         char *in_name = NULL;
                         size_t in_size = 1;
  
-                       ret = ml_tensors_info_get_tensor_type(in_info, i, &in_type);
+                       ret = ml_tensors_info_get_tensor_type(mInputInfoHandle, i, &in_type);
                         if (ret != ML_ERROR_NONE) {
                                 LOGE("Failed to request ml_tensors_info_get_tensor_type(%d).",
                                          ret);
@@ -397,7 +404,7 @@ namespace MLAPIImpl
                                 return INFERENCE_ENGINE_ERROR_NOT_SUPPORTED;
                         }
  
-                       ret = ml_tensors_info_get_tensor_dimension(in_info, i, in_dim);
+                       ret = ml_tensors_info_get_tensor_dimension(mInputInfoHandle, i, in_dim);
                         if (ret != ML_ERROR_NONE) {
                                 LOGE("Failed to request ml_tensors_info_get_tensor_dimension(%d).",
                                          ret);
@@ -413,7 +420,7 @@ namespace MLAPIImpl
  
                         LOGI("input tensor size = %zu", in_size);
  
-                       ret = ml_tensors_info_get_tensor_name(in_info, i, &in_name);
+                       ret = ml_tensors_info_get_tensor_name(mInputInfoHandle, i, &in_name);
                         if (ret != ML_ERROR_NONE) {
                                 LOGE("Failed to request ml_tensors_info_get_tensor_name(%d).",
                                          ret);
@@ -442,18 +449,11 @@ namespace MLAPIImpl
         {
                 LOGI("ENTER");
  
-               ml_tensors_info_h out_info = NULL;
-
                 // TODO. Need to check if model file loading is done.
-
-               int ret = ml_single_get_output_info(mSingle, &out_info);
-               if (ret != ML_ERROR_NONE) {
-                       LOGE("Failed to request ml_single_get_output_info(%d).", ret);
-                       return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
-               }
-
+               int ret;
                 unsigned int cnt;
-               ret = ml_tensors_info_get_count(out_info, &cnt);
+
+               ret = ml_tensors_info_get_count(mOutputInfoHandle, &cnt);
                 if (ret != ML_ERROR_NONE) {
                         LOGE("Failed to request ml_tensors_info_get_count(%d).", ret);
                         return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
@@ -468,7 +468,7 @@ namespace MLAPIImpl
                         char *out_name = NULL;
                         size_t out_size = 1;
  
-                       ret = ml_tensors_info_get_tensor_type(out_info, i, &out_type);
+                       ret = ml_tensors_info_get_tensor_type(mOutputInfoHandle, i, &out_type);
                         if (ret != ML_ERROR_NONE) {
                                 LOGE("Failed to request ml_tensors_info_get_tensor_type(%d).",
                                          ret);
@@ -482,7 +482,7 @@ namespace MLAPIImpl
                                 return INFERENCE_ENGINE_ERROR_NOT_SUPPORTED;
                         }
  
-                       ret = ml_tensors_info_get_tensor_dimension(out_info, i, out_dim);
+                       ret = ml_tensors_info_get_tensor_dimension(mOutputInfoHandle, i, out_dim);
                         if (ret != ML_ERROR_NONE) {
                                 LOGE("Failed to request ml_tensors_info_get_tensor_dimension(%d).",
                                          ret);
@@ -513,7 +513,7 @@ namespace MLAPIImpl
  
                         LOGI("output tensor size = %zu", out_size);
  
-                       ret = ml_tensors_info_get_tensor_name(out_info, i, &out_name);
+                       ret = ml_tensors_info_get_tensor_name(mOutputInfoHandle, i, &out_name);
                         if (ret != ML_ERROR_NONE) {
                                 LOGE("Failed to request ml_tensors_info_get_tensor_name(%d).",
                                          ret);
@@ -554,7 +554,7 @@ namespace MLAPIImpl
  
                 // TODO. Request input property information to a given ML Single API of nnstreamer backend,
                 // and set it instead of user-given one,
-
+               // Call UpdateTensorsInfo() after requesting input info.
                 mDesignated_inputs = property.layer_names;
                 mInputProperty = property;
  
@@ -580,7 +580,7 @@ namespace MLAPIImpl
  
                 // TODO. Request output property information to a given ML Single API of nnstreamer backend,
                 // and set it instead of user-given one,
-
+               // Call UpdateTensorsInfo() after requesting output info.
                 mDesignated_outputs = property.layer_names;
                 mOutputProperty = property;
  
@@ -647,6 +647,41 @@ namespace MLAPIImpl
                 return -1;
         }
  
+       int InferenceMLAPI::UpdateTensorsInfo()
+       {
+               LOGI("ENTER");
+
+               if (!mSingle) {
+                       LOGE("Invalid state, single-shot handle is not initialized.");
+                       return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
+               }
+
+               if (mInputInfoHandle) {
+                       ml_tensors_info_destroy(mInputInfoHandle);
+                       mInputInfoHandle = NULL;
+               }
+
+               if (mOutputInfoHandle) {
+                       ml_tensors_info_destroy(mOutputInfoHandle);
+                       mOutputInfoHandle = NULL;
+               }
+
+               int ret = ml_single_get_input_info(mSingle, &mInputInfoHandle);
+               if (ret != ML_ERROR_NONE) {
+                       LOGE("Failed to request ml_single_get_input_info(%d).", ret);
+                       return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
+               }
+
+               ret = ml_single_get_output_info(mSingle, &mOutputInfoHandle);
+               if (ret != ML_ERROR_NONE) {
+                       LOGE("Failed to request ml_single_get_output_info(%d).", ret);
+                       return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
+               }
+
+               LOGI("LEAVE");
+               return INFERENCE_ENGINE_ERROR_NONE;
+       }
+
         int InferenceMLAPI::Run(
                         std::vector<inference_engine_tensor_buffer> &input_buffers,
                         std::vector<inference_engine_tensor_buffer> &output_buffers)
@@ -659,46 +694,46 @@ namespace MLAPIImpl
                         return err;
                 }
  
-               err = ml_single_invoke(mSingle, mInputDataHandle, &mOutputDataHandle);
+#if defined(ENABLE_NO_ALLOC)
+               err = ml_single_invoke_no_alloc(mSingle, mInputDataHandle, mOutputDataHandle);
                 if (err != ML_ERROR_NONE) {
-                       LOGE("Failed to request ml_single_invoke(%d).", err);
+                       LOGE("Failed to request ml_single_invoke_no_alloc(%d).", err);
                         return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
                 }
+#else
+               ml_tensors_data_h out_data = NULL;
+               void *data_ptr;
+               size_t data_size;
+               unsigned int out_cnt;
  
-               ml_tensors_info_h out_info = NULL;
-
-               err = ml_single_get_output_info(mSingle, &out_info);
+               err = ml_tensors_info_get_count(mOutputInfoHandle, &out_cnt);
                 if (err != ML_ERROR_NONE) {
-                       LOGE("Failed to request ml_single_get_output_info(%d).", err);
+                       LOGE("Failed to request ml_tensors_info_get_count(%d).", err);
                         return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
                 }
  
-               unsigned int out_cnt;
-
-               err = ml_tensors_info_get_count(out_info, &out_cnt);
+               // Be carefull, ml_single_invoke() returns newly allocated output handle.
+               err = ml_single_invoke(mSingle, mInputDataHandle, &out_data);
                 if (err != ML_ERROR_NONE) {
-                       LOGE("Failed to request ml_tensors_info_get_count(%d).", err);
+                       LOGE("Failed to request ml_single_invoke(%d).", err);
                         return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
                 }
  
-               // TODO. Why below code is required?
-               // ML Single API provides internal tensor buffer for output tensor
-               // and user alreadys know the buffer by GetOutputTensorBuffers.
-               //
-               // However, without below code, user cannot get the output result
-               // correctly. What happens in ML Single API framework?
                 for (unsigned int i = 0; i < out_cnt; ++i) {
-                       err = ml_tensors_data_get_tensor_data(
-                               mOutputDataHandle, i, (void **) &output_buffers[i].buffer,
-                               &output_buffers[i].size);
+                       err = ml_tensors_data_get_tensor_data(out_data, i, &data_ptr, &data_size);
                         if (err != ML_ERROR_NONE) {
                                 LOGE("Failed to request ml_tensors_data_get_tensor_data(%d).", err);
+                               ml_tensors_data_destroy(out_data);
                                 return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
                         }
  
+                       // TODO. Remove memcpy() using ml_single_invoke_fill() later.
+                       memcpy(output_buffers[i].buffer, data_ptr, output_buffers[i].size);
                         LOGI("Output tensor[%u] = %zu", i, output_buffers[i].size);
                 }
  
+               ml_tensors_data_destroy(out_data);
+#endif
                 LOGI("LEAVE");
  
                 return INFERENCE_ENGINE_ERROR_NONE;
diff --git a/src/inference_engine_mlapi_private.h b/src/inference_engine_mlapi_private.h

index b6b4b1e..b34cfc8 100644 (file)
--- a/src/inference_engine_mlapi_private.h
+++ b/src/inference_engine_mlapi_private.h
@@ -77,10 +77,13 @@ namespace MLAPIImpl
                                 std::vector<inference_engine_tensor_buffer> &input_buffers,
                                 std::vector<inference_engine_tensor_buffer> &output_buffers);
                 int ConvertTensorType(int tensor_type);
+               int UpdateTensorsInfo();
  
                 int mPluginType;
                 int mTargetDevice;
                 ml_single_h mSingle;
+               ml_tensors_info_h mInputInfoHandle;
+               ml_tensors_info_h mOutputInfoHandle;
                 ml_tensors_data_h mInputDataHandle;
                 ml_tensors_data_h mOutputDataHandle;
                 std::vector<std::string> mDesignated_inputs;
author	Sangjung Woo <sangjung.woo@samsung.com>
	Mon, 26 Oct 2020 02:24:57 +0000 (11:24 +0900)
committer	Sangjung Woo <sangjung.woo@samsung.com>
	Mon, 26 Oct 2020 02:24:57 +0000 (11:24 +0900)
src/inference_engine_mlapi.cpp		patch \| blob \| history
src/inference_engine_mlapi_private.h		patch \| blob \| history