WIP: Performance improvement sandbox/sangjung/improve_perf
authorSangjung Woo <sangjung.woo@samsung.com>
Mon, 26 Oct 2020 02:24:57 +0000 (11:24 +0900)
committerSangjung Woo <sangjung.woo@samsung.com>
Mon, 26 Oct 2020 02:24:57 +0000 (11:24 +0900)
* Apply ml_single_invoke_no_alloc() ML API instead of
  ml_single_invoke().
* Remove unnecessary memory copies.

Signed-off-by: Sangjung Woo <sangjung.woo@samsung.com>
src/inference_engine_mlapi.cpp
src/inference_engine_mlapi_private.h

index 2e3c0a2..706ec29 100644 (file)
 #include <time.h>
 #include <queue>
 
+// TODO. Below is test code. DO NOT use ML internal function.
+#define ENABLE_NO_ALLOC
+#if defined(ENABLE_NO_ALLOC)
+extern "C" int ml_single_invoke_no_alloc(ml_single_h single, const ml_tensors_data_h input, ml_tensors_data_h output);
+#endif
+
 namespace InferenceEngineImpl
 {
 namespace MLAPIImpl
@@ -31,6 +37,8 @@ namespace MLAPIImpl
                        mPluginType(),
                        mTargetDevice(),
                        mSingle(),
+                       mInputInfoHandle(),
+                       mOutputInfoHandle(),
                        mInputDataHandle(),
                        mOutputDataHandle(),
                        mDesignated_inputs(),
@@ -53,12 +61,20 @@ namespace MLAPIImpl
 
                ml_single_close(mSingle);
 
+               if (mInputInfoHandle)
+                       ml_tensors_info_destroy(mInputInfoHandle);
+
+               if (mOutputInfoHandle)
+                       ml_tensors_info_destroy(mOutputInfoHandle);
+
                if (mInputDataHandle)
                        ml_tensors_data_destroy(mInputDataHandle);
 
                if (mOutputDataHandle)
                        ml_tensors_data_destroy(mOutputDataHandle);
 
+               mInputInfoHandle = NULL;
+               mOutputInfoHandle = NULL;
                mInputDataHandle = NULL;
                mOutputDataHandle = NULL;
        }
@@ -192,16 +208,22 @@ namespace MLAPIImpl
                // TODO. create ml_tensor_info for input and output tensor and pass
                //               them as parameters of ml_single_open function.
 
-               int ret = ml_single_open(&mSingle, model_str.c_str(), NULL, NULL,
+               int err = ml_single_open(&mSingle, model_str.c_str(), NULL, NULL,
                                                                 nnfw_type, nnfw_hw);
-               if (ret != ML_ERROR_NONE) {
-                       LOGE("Failed to request ml_single_open(%d).", ret);
+               if (err != ML_ERROR_NONE) {
+                       LOGE("Failed to request ml_single_open(%d).", err);
                        return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
                }
 
+               err = UpdateTensorsInfo();
+               if (err != INFERENCE_ENGINE_ERROR_NONE) {
+                       ml_single_close(mSingle);
+                       mSingle = NULL;
+               }
+
                LOGI("LEAVE");
 
-               return INFERENCE_ENGINE_ERROR_NONE;
+               return err;
        }
 
        int InferenceMLAPI::GetInputTensorBuffers(
@@ -209,25 +231,18 @@ namespace MLAPIImpl
        {
                LOGI("ENTER");
 
-               buffers.clear();
-
                // TODO. Implement this function according to a given ML Single API backend properly.
 
-               ml_tensors_info_h in_info = NULL;
-
-               int ret = ml_single_get_input_info(mSingle, &in_info);
-               if (ret != ML_ERROR_NONE) {
-                       LOGE("Failed to request ml_single_get_input_info(%d).", ret);
-                       return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
-               }
-
                // ML Single API will always provide internal tensor buffers so
                // get the tensor buffers back to Mediavision framework so that
                // Mediavision framework doesn't allocate the tensor buffers internally.
 
+               buffers.clear();
+
+               int ret;
                unsigned int cnt;
 
-               ret = ml_tensors_info_get_count(in_info, &cnt);
+               ret = ml_tensors_info_get_count(mInputInfoHandle, &cnt);
                if (ret != ML_ERROR_NONE) {
                        LOGE("Failed to request ml_tensors_info_get_count(%d).", ret);
                        return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
@@ -235,15 +250,19 @@ namespace MLAPIImpl
 
                LOGI("input tensor count = %u", cnt);
 
-               for (unsigned int i = 0; i < cnt; ++i) {
-                       inference_engine_tensor_buffer in_buffer;
-                       ml_tensor_type_e in_type;
-
-                       ret = ml_tensors_data_create(in_info, &mInputDataHandle);
+               // TODO. Below is test code, should we allocate new buffer for every inference?
+               if (mInputDataHandle == NULL) {
+                       ret = ml_tensors_data_create(mInputInfoHandle, &mInputDataHandle);
                        if (ret != ML_ERROR_NONE) {
                                LOGE("Failed to request ml_tensors_data_create(%d).", ret);
                                return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
                        }
+               }
+
+               // TODO. Cache tensor info and reduce function call in UpdateTensorsInfo()
+               for (unsigned int i = 0; i < cnt; ++i) {
+                       inference_engine_tensor_buffer in_buffer;
+                       ml_tensor_type_e in_type;
 
                        ret = ml_tensors_data_get_tensor_data(mInputDataHandle, i, &in_buffer.buffer, &in_buffer.size);
                        if (ret != ML_ERROR_NONE) {
@@ -253,10 +272,9 @@ namespace MLAPIImpl
 
                        LOGE("buffer = %p, size = %d\n", in_buffer.buffer, in_buffer.size);
 
-                       int ret = ml_tensors_info_get_tensor_type(in_info, i, &in_type);
+                       ret = ml_tensors_info_get_tensor_type(mInputInfoHandle, i, &in_type);
                        if (ret != ML_ERROR_NONE) {
-                               LOGE("Failed to request ml_tensors_info_get_tensor_type(%d).",
-                                        ret);
+                               LOGE("Failed to request ml_tensors_info_get_tensor_type(%d).", ret);
                                return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
                        }
 
@@ -283,25 +301,18 @@ namespace MLAPIImpl
        {
                LOGI("ENTER");
 
-               buffers.clear();
-
                // TODO. Need to check if model file loading is done.
 
-               ml_tensors_info_h out_info = NULL;
-
-               int ret = ml_single_get_output_info(mSingle, &out_info);
-               if (ret != ML_ERROR_NONE) {
-                       LOGE("Failed to request ml_single_get_output_info(%d).", ret);
-                       return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
-               }
-
                // ML Single API will always provide internal tensor buffers so
                // get the tensor buffers back to Mediavision framework so that
                // Mediavision framework doesn't allocate the tensor buffers internally.
 
+               buffers.clear();
+
+               int ret;
                unsigned int cnt;
 
-               ret = ml_tensors_info_get_count(out_info, &cnt);
+               ret = ml_tensors_info_get_count(mOutputInfoHandle, &cnt);
                if (ret != ML_ERROR_NONE) {
                        LOGE("Failed to request ml_tensors_info_get_count(%d).", ret);
                        return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
@@ -309,15 +320,19 @@ namespace MLAPIImpl
 
                LOGI("output tensor count = %u", cnt);
 
-               for (unsigned int i = 0; i < cnt; ++i) {
-                       inference_engine_tensor_buffer out_buffer;
-                       ml_tensor_type_e out_type;
-
-                       ret = ml_tensors_data_create(out_info, &mOutputDataHandle);
+               // TODO. Below is test code, should we allocate new buffer for every inference?
+               if (mOutputDataHandle == NULL) {
+                       ret = ml_tensors_data_create(mOutputInfoHandle, &mOutputDataHandle);
                        if (ret != ML_ERROR_NONE) {
                                LOGE("Failed to request ml_tensors_data_create(%d).", ret);
                                return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
                        }
+               }
+
+               // TODO. Cache tensor info and reduce function call in UpdateTensorsInfo()
+               for (unsigned int i = 0; i < cnt; ++i) {
+                       inference_engine_tensor_buffer out_buffer;
+                       ml_tensor_type_e out_type;
 
                        ret = ml_tensors_data_get_tensor_data(mOutputDataHandle, i, &out_buffer.buffer, &out_buffer.size);
                        if (ret != ML_ERROR_NONE) {
@@ -327,10 +342,9 @@ namespace MLAPIImpl
 
                        LOGE("buffer = %p, size = %d\n", out_buffer.buffer, out_buffer.size);
 
-                       ret = ml_tensors_info_get_tensor_type(out_info, i, &out_type);
+                       ret = ml_tensors_info_get_tensor_type(mOutputInfoHandle, i, &out_type);
                        if (ret != ML_ERROR_NONE) {
-                               LOGE("Failed to request ml_tensors_info_get_tensor_type(%d).",
-                                        ret);
+                               LOGE("Failed to request ml_tensors_info_get_tensor_type(%d).", ret);
                                return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
                        }
 
@@ -357,18 +371,11 @@ namespace MLAPIImpl
        {
                LOGI("ENTER");
 
-               ml_tensors_info_h in_info = NULL;
-
                // TODO. Need to check if model file loading is done.
-
-               int ret = ml_single_get_input_info(mSingle, &in_info);
-               if (ret != ML_ERROR_NONE) {
-                       LOGE("Failed to request ml_single_get_input_info(%d).", ret);
-                       return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
-               }
-
+               int ret;
                unsigned int cnt;
-               ret = ml_tensors_info_get_count(in_info, &cnt);
+
+               ret = ml_tensors_info_get_count(mInputInfoHandle, &cnt);
                if (ret != ML_ERROR_NONE) {
                        LOGE("Failed to request ml_tensors_info_get_count(%d).", ret);
                        return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
@@ -379,11 +386,11 @@ namespace MLAPIImpl
                for (unsigned int i = 0; i < cnt; ++i) {
                        inference_engine_tensor_info tensor_info;
                        ml_tensor_type_e in_type;
-                       unsigned int in_dim[ML_TENSOR_RANK_LIMIT];
+                       ml_tensor_dimension in_dim;
                        char *in_name = NULL;
                        size_t in_size = 1;
 
-                       ret = ml_tensors_info_get_tensor_type(in_info, i, &in_type);
+                       ret = ml_tensors_info_get_tensor_type(mInputInfoHandle, i, &in_type);
                        if (ret != ML_ERROR_NONE) {
                                LOGE("Failed to request ml_tensors_info_get_tensor_type(%d).",
                                         ret);
@@ -397,7 +404,7 @@ namespace MLAPIImpl
                                return INFERENCE_ENGINE_ERROR_NOT_SUPPORTED;
                        }
 
-                       ret = ml_tensors_info_get_tensor_dimension(in_info, i, in_dim);
+                       ret = ml_tensors_info_get_tensor_dimension(mInputInfoHandle, i, in_dim);
                        if (ret != ML_ERROR_NONE) {
                                LOGE("Failed to request ml_tensors_info_get_tensor_dimension(%d).",
                                         ret);
@@ -413,7 +420,7 @@ namespace MLAPIImpl
 
                        LOGI("input tensor size = %zu", in_size);
 
-                       ret = ml_tensors_info_get_tensor_name(in_info, i, &in_name);
+                       ret = ml_tensors_info_get_tensor_name(mInputInfoHandle, i, &in_name);
                        if (ret != ML_ERROR_NONE) {
                                LOGE("Failed to request ml_tensors_info_get_tensor_name(%d).",
                                         ret);
@@ -442,18 +449,11 @@ namespace MLAPIImpl
        {
                LOGI("ENTER");
 
-               ml_tensors_info_h out_info = NULL;
-
                // TODO. Need to check if model file loading is done.
-
-               int ret = ml_single_get_output_info(mSingle, &out_info);
-               if (ret != ML_ERROR_NONE) {
-                       LOGE("Failed to request ml_single_get_output_info(%d).", ret);
-                       return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
-               }
-
+               int ret;
                unsigned int cnt;
-               ret = ml_tensors_info_get_count(out_info, &cnt);
+
+               ret = ml_tensors_info_get_count(mOutputInfoHandle, &cnt);
                if (ret != ML_ERROR_NONE) {
                        LOGE("Failed to request ml_tensors_info_get_count(%d).", ret);
                        return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
@@ -468,7 +468,7 @@ namespace MLAPIImpl
                        char *out_name = NULL;
                        size_t out_size = 1;
 
-                       ret = ml_tensors_info_get_tensor_type(out_info, i, &out_type);
+                       ret = ml_tensors_info_get_tensor_type(mOutputInfoHandle, i, &out_type);
                        if (ret != ML_ERROR_NONE) {
                                LOGE("Failed to request ml_tensors_info_get_tensor_type(%d).",
                                         ret);
@@ -482,7 +482,7 @@ namespace MLAPIImpl
                                return INFERENCE_ENGINE_ERROR_NOT_SUPPORTED;
                        }
 
-                       ret = ml_tensors_info_get_tensor_dimension(out_info, i, out_dim);
+                       ret = ml_tensors_info_get_tensor_dimension(mOutputInfoHandle, i, out_dim);
                        if (ret != ML_ERROR_NONE) {
                                LOGE("Failed to request ml_tensors_info_get_tensor_dimension(%d).",
                                         ret);
@@ -513,7 +513,7 @@ namespace MLAPIImpl
 
                        LOGI("output tensor size = %zu", out_size);
 
-                       ret = ml_tensors_info_get_tensor_name(out_info, i, &out_name);
+                       ret = ml_tensors_info_get_tensor_name(mOutputInfoHandle, i, &out_name);
                        if (ret != ML_ERROR_NONE) {
                                LOGE("Failed to request ml_tensors_info_get_tensor_name(%d).",
                                         ret);
@@ -554,7 +554,7 @@ namespace MLAPIImpl
 
                // TODO. Request input property information to a given ML Single API of nnstreamer backend,
                // and set it instead of user-given one,
-
+               // Call UpdateTensorsInfo() after requesting input info.
                mDesignated_inputs = property.layer_names;
                mInputProperty = property;
 
@@ -580,7 +580,7 @@ namespace MLAPIImpl
 
                // TODO. Request output property information to a given ML Single API of nnstreamer backend,
                // and set it instead of user-given one,
-
+               // Call UpdateTensorsInfo() after requesting output info.
                mDesignated_outputs = property.layer_names;
                mOutputProperty = property;
 
@@ -647,6 +647,41 @@ namespace MLAPIImpl
                return -1;
        }
 
+       int InferenceMLAPI::UpdateTensorsInfo()
+       {
+               LOGI("ENTER");
+
+               if (!mSingle) {
+                       LOGE("Invalid state, single-shot handle is not initialized.");
+                       return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
+               }
+
+               if (mInputInfoHandle) {
+                       ml_tensors_info_destroy(mInputInfoHandle);
+                       mInputInfoHandle = NULL;
+               }
+
+               if (mOutputInfoHandle) {
+                       ml_tensors_info_destroy(mOutputInfoHandle);
+                       mOutputInfoHandle = NULL;
+               }
+
+               int ret = ml_single_get_input_info(mSingle, &mInputInfoHandle);
+               if (ret != ML_ERROR_NONE) {
+                       LOGE("Failed to request ml_single_get_input_info(%d).", ret);
+                       return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
+               }
+
+               ret = ml_single_get_output_info(mSingle, &mOutputInfoHandle);
+               if (ret != ML_ERROR_NONE) {
+                       LOGE("Failed to request ml_single_get_output_info(%d).", ret);
+                       return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
+               }
+
+               LOGI("LEAVE");
+               return INFERENCE_ENGINE_ERROR_NONE;
+       }
+
        int InferenceMLAPI::Run(
                        std::vector<inference_engine_tensor_buffer> &input_buffers,
                        std::vector<inference_engine_tensor_buffer> &output_buffers)
@@ -659,46 +694,46 @@ namespace MLAPIImpl
                        return err;
                }
 
-               err = ml_single_invoke(mSingle, mInputDataHandle, &mOutputDataHandle);
+#if defined(ENABLE_NO_ALLOC)
+               err = ml_single_invoke_no_alloc(mSingle, mInputDataHandle, mOutputDataHandle);
                if (err != ML_ERROR_NONE) {
-                       LOGE("Failed to request ml_single_invoke(%d).", err);
+                       LOGE("Failed to request ml_single_invoke_no_alloc(%d).", err);
                        return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
                }
+#else
+               ml_tensors_data_h out_data = NULL;
+               void *data_ptr;
+               size_t data_size;
+               unsigned int out_cnt;
 
-               ml_tensors_info_h out_info = NULL;
-
-               err = ml_single_get_output_info(mSingle, &out_info);
+               err = ml_tensors_info_get_count(mOutputInfoHandle, &out_cnt);
                if (err != ML_ERROR_NONE) {
-                       LOGE("Failed to request ml_single_get_output_info(%d).", err);
+                       LOGE("Failed to request ml_tensors_info_get_count(%d).", err);
                        return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
                }
 
-               unsigned int out_cnt;
-
-               err = ml_tensors_info_get_count(out_info, &out_cnt);
+               // Be carefull, ml_single_invoke() returns newly allocated output handle.
+               err = ml_single_invoke(mSingle, mInputDataHandle, &out_data);
                if (err != ML_ERROR_NONE) {
-                       LOGE("Failed to request ml_tensors_info_get_count(%d).", err);
+                       LOGE("Failed to request ml_single_invoke(%d).", err);
                        return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
                }
 
-               // TODO. Why below code is required?
-               // ML Single API provides internal tensor buffer for output tensor
-               // and user alreadys know the buffer by GetOutputTensorBuffers.
-               //
-               // However, without below code, user cannot get the output result
-               // correctly. What happens in ML Single API framework?
                for (unsigned int i = 0; i < out_cnt; ++i) {
-                       err = ml_tensors_data_get_tensor_data(
-                               mOutputDataHandle, i, (void **) &output_buffers[i].buffer,
-                               &output_buffers[i].size);
+                       err = ml_tensors_data_get_tensor_data(out_data, i, &data_ptr, &data_size);
                        if (err != ML_ERROR_NONE) {
                                LOGE("Failed to request ml_tensors_data_get_tensor_data(%d).", err);
+                               ml_tensors_data_destroy(out_data);
                                return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
                        }
 
+                       // TODO. Remove memcpy() using ml_single_invoke_fill() later.
+                       memcpy(output_buffers[i].buffer, data_ptr, output_buffers[i].size);
                        LOGI("Output tensor[%u] = %zu", i, output_buffers[i].size);
                }
 
+               ml_tensors_data_destroy(out_data);
+#endif
                LOGI("LEAVE");
 
                return INFERENCE_ENGINE_ERROR_NONE;
index b6b4b1e..b34cfc8 100644 (file)
@@ -77,10 +77,13 @@ namespace MLAPIImpl
                                std::vector<inference_engine_tensor_buffer> &input_buffers,
                                std::vector<inference_engine_tensor_buffer> &output_buffers);
                int ConvertTensorType(int tensor_type);
+               int UpdateTensorsInfo();
 
                int mPluginType;
                int mTargetDevice;
                ml_single_h mSingle;
+               ml_tensors_info_h mInputInfoHandle;
+               ml_tensors_info_h mOutputInfoHandle;
                ml_tensors_data_h mInputDataHandle;
                ml_tensors_data_h mOutputDataHandle;
                std::vector<std::string> mDesignated_inputs;