From 775d4ec34c0b95a6a58e697857a640f2ab6246aa Mon Sep 17 00:00:00 2001 From: Sangjung Woo Date: Mon, 26 Oct 2020 11:24:57 +0900 Subject: [PATCH] WIP: Performance improvement * Apply ml_single_invoke_no_alloc() ML API instead of ml_single_invoke(). * Remove unnecessary memory copies. Signed-off-by: Sangjung Woo --- src/inference_engine_mlapi.cpp | 215 ++++++++++++++++++++--------------- src/inference_engine_mlapi_private.h | 3 + 2 files changed, 128 insertions(+), 90 deletions(-) diff --git a/src/inference_engine_mlapi.cpp b/src/inference_engine_mlapi.cpp index 2e3c0a2..706ec29 100644 --- a/src/inference_engine_mlapi.cpp +++ b/src/inference_engine_mlapi.cpp @@ -23,6 +23,12 @@ #include #include +// TODO. Below is test code. DO NOT use ML internal function. +#define ENABLE_NO_ALLOC +#if defined(ENABLE_NO_ALLOC) +extern "C" int ml_single_invoke_no_alloc(ml_single_h single, const ml_tensors_data_h input, ml_tensors_data_h output); +#endif + namespace InferenceEngineImpl { namespace MLAPIImpl @@ -31,6 +37,8 @@ namespace MLAPIImpl mPluginType(), mTargetDevice(), mSingle(), + mInputInfoHandle(), + mOutputInfoHandle(), mInputDataHandle(), mOutputDataHandle(), mDesignated_inputs(), @@ -53,12 +61,20 @@ namespace MLAPIImpl ml_single_close(mSingle); + if (mInputInfoHandle) + ml_tensors_info_destroy(mInputInfoHandle); + + if (mOutputInfoHandle) + ml_tensors_info_destroy(mOutputInfoHandle); + if (mInputDataHandle) ml_tensors_data_destroy(mInputDataHandle); if (mOutputDataHandle) ml_tensors_data_destroy(mOutputDataHandle); + mInputInfoHandle = NULL; + mOutputInfoHandle = NULL; mInputDataHandle = NULL; mOutputDataHandle = NULL; } @@ -192,16 +208,22 @@ namespace MLAPIImpl // TODO. create ml_tensor_info for input and output tensor and pass // them as parameters of ml_single_open function. - int ret = ml_single_open(&mSingle, model_str.c_str(), NULL, NULL, + int err = ml_single_open(&mSingle, model_str.c_str(), NULL, NULL, nnfw_type, nnfw_hw); - if (ret != ML_ERROR_NONE) { - LOGE("Failed to request ml_single_open(%d).", ret); + if (err != ML_ERROR_NONE) { + LOGE("Failed to request ml_single_open(%d).", err); return INFERENCE_ENGINE_ERROR_INVALID_OPERATION; } + err = UpdateTensorsInfo(); + if (err != INFERENCE_ENGINE_ERROR_NONE) { + ml_single_close(mSingle); + mSingle = NULL; + } + LOGI("LEAVE"); - return INFERENCE_ENGINE_ERROR_NONE; + return err; } int InferenceMLAPI::GetInputTensorBuffers( @@ -209,25 +231,18 @@ namespace MLAPIImpl { LOGI("ENTER"); - buffers.clear(); - // TODO. Implement this function according to a given ML Single API backend properly. - ml_tensors_info_h in_info = NULL; - - int ret = ml_single_get_input_info(mSingle, &in_info); - if (ret != ML_ERROR_NONE) { - LOGE("Failed to request ml_single_get_input_info(%d).", ret); - return INFERENCE_ENGINE_ERROR_INVALID_OPERATION; - } - // ML Single API will always provide internal tensor buffers so // get the tensor buffers back to Mediavision framework so that // Mediavision framework doesn't allocate the tensor buffers internally. + buffers.clear(); + + int ret; unsigned int cnt; - ret = ml_tensors_info_get_count(in_info, &cnt); + ret = ml_tensors_info_get_count(mInputInfoHandle, &cnt); if (ret != ML_ERROR_NONE) { LOGE("Failed to request ml_tensors_info_get_count(%d).", ret); return INFERENCE_ENGINE_ERROR_INVALID_OPERATION; @@ -235,15 +250,19 @@ namespace MLAPIImpl LOGI("input tensor count = %u", cnt); - for (unsigned int i = 0; i < cnt; ++i) { - inference_engine_tensor_buffer in_buffer; - ml_tensor_type_e in_type; - - ret = ml_tensors_data_create(in_info, &mInputDataHandle); + // TODO. Below is test code, should we allocate new buffer for every inference? + if (mInputDataHandle == NULL) { + ret = ml_tensors_data_create(mInputInfoHandle, &mInputDataHandle); if (ret != ML_ERROR_NONE) { LOGE("Failed to request ml_tensors_data_create(%d).", ret); return INFERENCE_ENGINE_ERROR_INVALID_OPERATION; } + } + + // TODO. Cache tensor info and reduce function call in UpdateTensorsInfo() + for (unsigned int i = 0; i < cnt; ++i) { + inference_engine_tensor_buffer in_buffer; + ml_tensor_type_e in_type; ret = ml_tensors_data_get_tensor_data(mInputDataHandle, i, &in_buffer.buffer, &in_buffer.size); if (ret != ML_ERROR_NONE) { @@ -253,10 +272,9 @@ namespace MLAPIImpl LOGE("buffer = %p, size = %d\n", in_buffer.buffer, in_buffer.size); - int ret = ml_tensors_info_get_tensor_type(in_info, i, &in_type); + ret = ml_tensors_info_get_tensor_type(mInputInfoHandle, i, &in_type); if (ret != ML_ERROR_NONE) { - LOGE("Failed to request ml_tensors_info_get_tensor_type(%d).", - ret); + LOGE("Failed to request ml_tensors_info_get_tensor_type(%d).", ret); return INFERENCE_ENGINE_ERROR_INVALID_OPERATION; } @@ -283,25 +301,18 @@ namespace MLAPIImpl { LOGI("ENTER"); - buffers.clear(); - // TODO. Need to check if model file loading is done. - ml_tensors_info_h out_info = NULL; - - int ret = ml_single_get_output_info(mSingle, &out_info); - if (ret != ML_ERROR_NONE) { - LOGE("Failed to request ml_single_get_output_info(%d).", ret); - return INFERENCE_ENGINE_ERROR_INVALID_OPERATION; - } - // ML Single API will always provide internal tensor buffers so // get the tensor buffers back to Mediavision framework so that // Mediavision framework doesn't allocate the tensor buffers internally. + buffers.clear(); + + int ret; unsigned int cnt; - ret = ml_tensors_info_get_count(out_info, &cnt); + ret = ml_tensors_info_get_count(mOutputInfoHandle, &cnt); if (ret != ML_ERROR_NONE) { LOGE("Failed to request ml_tensors_info_get_count(%d).", ret); return INFERENCE_ENGINE_ERROR_INVALID_OPERATION; @@ -309,15 +320,19 @@ namespace MLAPIImpl LOGI("output tensor count = %u", cnt); - for (unsigned int i = 0; i < cnt; ++i) { - inference_engine_tensor_buffer out_buffer; - ml_tensor_type_e out_type; - - ret = ml_tensors_data_create(out_info, &mOutputDataHandle); + // TODO. Below is test code, should we allocate new buffer for every inference? + if (mOutputDataHandle == NULL) { + ret = ml_tensors_data_create(mOutputInfoHandle, &mOutputDataHandle); if (ret != ML_ERROR_NONE) { LOGE("Failed to request ml_tensors_data_create(%d).", ret); return INFERENCE_ENGINE_ERROR_INVALID_OPERATION; } + } + + // TODO. Cache tensor info and reduce function call in UpdateTensorsInfo() + for (unsigned int i = 0; i < cnt; ++i) { + inference_engine_tensor_buffer out_buffer; + ml_tensor_type_e out_type; ret = ml_tensors_data_get_tensor_data(mOutputDataHandle, i, &out_buffer.buffer, &out_buffer.size); if (ret != ML_ERROR_NONE) { @@ -327,10 +342,9 @@ namespace MLAPIImpl LOGE("buffer = %p, size = %d\n", out_buffer.buffer, out_buffer.size); - ret = ml_tensors_info_get_tensor_type(out_info, i, &out_type); + ret = ml_tensors_info_get_tensor_type(mOutputInfoHandle, i, &out_type); if (ret != ML_ERROR_NONE) { - LOGE("Failed to request ml_tensors_info_get_tensor_type(%d).", - ret); + LOGE("Failed to request ml_tensors_info_get_tensor_type(%d).", ret); return INFERENCE_ENGINE_ERROR_INVALID_OPERATION; } @@ -357,18 +371,11 @@ namespace MLAPIImpl { LOGI("ENTER"); - ml_tensors_info_h in_info = NULL; - // TODO. Need to check if model file loading is done. - - int ret = ml_single_get_input_info(mSingle, &in_info); - if (ret != ML_ERROR_NONE) { - LOGE("Failed to request ml_single_get_input_info(%d).", ret); - return INFERENCE_ENGINE_ERROR_INVALID_OPERATION; - } - + int ret; unsigned int cnt; - ret = ml_tensors_info_get_count(in_info, &cnt); + + ret = ml_tensors_info_get_count(mInputInfoHandle, &cnt); if (ret != ML_ERROR_NONE) { LOGE("Failed to request ml_tensors_info_get_count(%d).", ret); return INFERENCE_ENGINE_ERROR_INVALID_OPERATION; @@ -379,11 +386,11 @@ namespace MLAPIImpl for (unsigned int i = 0; i < cnt; ++i) { inference_engine_tensor_info tensor_info; ml_tensor_type_e in_type; - unsigned int in_dim[ML_TENSOR_RANK_LIMIT]; + ml_tensor_dimension in_dim; char *in_name = NULL; size_t in_size = 1; - ret = ml_tensors_info_get_tensor_type(in_info, i, &in_type); + ret = ml_tensors_info_get_tensor_type(mInputInfoHandle, i, &in_type); if (ret != ML_ERROR_NONE) { LOGE("Failed to request ml_tensors_info_get_tensor_type(%d).", ret); @@ -397,7 +404,7 @@ namespace MLAPIImpl return INFERENCE_ENGINE_ERROR_NOT_SUPPORTED; } - ret = ml_tensors_info_get_tensor_dimension(in_info, i, in_dim); + ret = ml_tensors_info_get_tensor_dimension(mInputInfoHandle, i, in_dim); if (ret != ML_ERROR_NONE) { LOGE("Failed to request ml_tensors_info_get_tensor_dimension(%d).", ret); @@ -413,7 +420,7 @@ namespace MLAPIImpl LOGI("input tensor size = %zu", in_size); - ret = ml_tensors_info_get_tensor_name(in_info, i, &in_name); + ret = ml_tensors_info_get_tensor_name(mInputInfoHandle, i, &in_name); if (ret != ML_ERROR_NONE) { LOGE("Failed to request ml_tensors_info_get_tensor_name(%d).", ret); @@ -442,18 +449,11 @@ namespace MLAPIImpl { LOGI("ENTER"); - ml_tensors_info_h out_info = NULL; - // TODO. Need to check if model file loading is done. - - int ret = ml_single_get_output_info(mSingle, &out_info); - if (ret != ML_ERROR_NONE) { - LOGE("Failed to request ml_single_get_output_info(%d).", ret); - return INFERENCE_ENGINE_ERROR_INVALID_OPERATION; - } - + int ret; unsigned int cnt; - ret = ml_tensors_info_get_count(out_info, &cnt); + + ret = ml_tensors_info_get_count(mOutputInfoHandle, &cnt); if (ret != ML_ERROR_NONE) { LOGE("Failed to request ml_tensors_info_get_count(%d).", ret); return INFERENCE_ENGINE_ERROR_INVALID_OPERATION; @@ -468,7 +468,7 @@ namespace MLAPIImpl char *out_name = NULL; size_t out_size = 1; - ret = ml_tensors_info_get_tensor_type(out_info, i, &out_type); + ret = ml_tensors_info_get_tensor_type(mOutputInfoHandle, i, &out_type); if (ret != ML_ERROR_NONE) { LOGE("Failed to request ml_tensors_info_get_tensor_type(%d).", ret); @@ -482,7 +482,7 @@ namespace MLAPIImpl return INFERENCE_ENGINE_ERROR_NOT_SUPPORTED; } - ret = ml_tensors_info_get_tensor_dimension(out_info, i, out_dim); + ret = ml_tensors_info_get_tensor_dimension(mOutputInfoHandle, i, out_dim); if (ret != ML_ERROR_NONE) { LOGE("Failed to request ml_tensors_info_get_tensor_dimension(%d).", ret); @@ -513,7 +513,7 @@ namespace MLAPIImpl LOGI("output tensor size = %zu", out_size); - ret = ml_tensors_info_get_tensor_name(out_info, i, &out_name); + ret = ml_tensors_info_get_tensor_name(mOutputInfoHandle, i, &out_name); if (ret != ML_ERROR_NONE) { LOGE("Failed to request ml_tensors_info_get_tensor_name(%d).", ret); @@ -554,7 +554,7 @@ namespace MLAPIImpl // TODO. Request input property information to a given ML Single API of nnstreamer backend, // and set it instead of user-given one, - + // Call UpdateTensorsInfo() after requesting input info. mDesignated_inputs = property.layer_names; mInputProperty = property; @@ -580,7 +580,7 @@ namespace MLAPIImpl // TODO. Request output property information to a given ML Single API of nnstreamer backend, // and set it instead of user-given one, - + // Call UpdateTensorsInfo() after requesting output info. mDesignated_outputs = property.layer_names; mOutputProperty = property; @@ -647,6 +647,41 @@ namespace MLAPIImpl return -1; } + int InferenceMLAPI::UpdateTensorsInfo() + { + LOGI("ENTER"); + + if (!mSingle) { + LOGE("Invalid state, single-shot handle is not initialized."); + return INFERENCE_ENGINE_ERROR_INVALID_OPERATION; + } + + if (mInputInfoHandle) { + ml_tensors_info_destroy(mInputInfoHandle); + mInputInfoHandle = NULL; + } + + if (mOutputInfoHandle) { + ml_tensors_info_destroy(mOutputInfoHandle); + mOutputInfoHandle = NULL; + } + + int ret = ml_single_get_input_info(mSingle, &mInputInfoHandle); + if (ret != ML_ERROR_NONE) { + LOGE("Failed to request ml_single_get_input_info(%d).", ret); + return INFERENCE_ENGINE_ERROR_INVALID_OPERATION; + } + + ret = ml_single_get_output_info(mSingle, &mOutputInfoHandle); + if (ret != ML_ERROR_NONE) { + LOGE("Failed to request ml_single_get_output_info(%d).", ret); + return INFERENCE_ENGINE_ERROR_INVALID_OPERATION; + } + + LOGI("LEAVE"); + return INFERENCE_ENGINE_ERROR_NONE; + } + int InferenceMLAPI::Run( std::vector &input_buffers, std::vector &output_buffers) @@ -659,46 +694,46 @@ namespace MLAPIImpl return err; } - err = ml_single_invoke(mSingle, mInputDataHandle, &mOutputDataHandle); +#if defined(ENABLE_NO_ALLOC) + err = ml_single_invoke_no_alloc(mSingle, mInputDataHandle, mOutputDataHandle); if (err != ML_ERROR_NONE) { - LOGE("Failed to request ml_single_invoke(%d).", err); + LOGE("Failed to request ml_single_invoke_no_alloc(%d).", err); return INFERENCE_ENGINE_ERROR_INVALID_OPERATION; } +#else + ml_tensors_data_h out_data = NULL; + void *data_ptr; + size_t data_size; + unsigned int out_cnt; - ml_tensors_info_h out_info = NULL; - - err = ml_single_get_output_info(mSingle, &out_info); + err = ml_tensors_info_get_count(mOutputInfoHandle, &out_cnt); if (err != ML_ERROR_NONE) { - LOGE("Failed to request ml_single_get_output_info(%d).", err); + LOGE("Failed to request ml_tensors_info_get_count(%d).", err); return INFERENCE_ENGINE_ERROR_INVALID_OPERATION; } - unsigned int out_cnt; - - err = ml_tensors_info_get_count(out_info, &out_cnt); + // Be carefull, ml_single_invoke() returns newly allocated output handle. + err = ml_single_invoke(mSingle, mInputDataHandle, &out_data); if (err != ML_ERROR_NONE) { - LOGE("Failed to request ml_tensors_info_get_count(%d).", err); + LOGE("Failed to request ml_single_invoke(%d).", err); return INFERENCE_ENGINE_ERROR_INVALID_OPERATION; } - // TODO. Why below code is required? - // ML Single API provides internal tensor buffer for output tensor - // and user alreadys know the buffer by GetOutputTensorBuffers. - // - // However, without below code, user cannot get the output result - // correctly. What happens in ML Single API framework? for (unsigned int i = 0; i < out_cnt; ++i) { - err = ml_tensors_data_get_tensor_data( - mOutputDataHandle, i, (void **) &output_buffers[i].buffer, - &output_buffers[i].size); + err = ml_tensors_data_get_tensor_data(out_data, i, &data_ptr, &data_size); if (err != ML_ERROR_NONE) { LOGE("Failed to request ml_tensors_data_get_tensor_data(%d).", err); + ml_tensors_data_destroy(out_data); return INFERENCE_ENGINE_ERROR_INVALID_OPERATION; } + // TODO. Remove memcpy() using ml_single_invoke_fill() later. + memcpy(output_buffers[i].buffer, data_ptr, output_buffers[i].size); LOGI("Output tensor[%u] = %zu", i, output_buffers[i].size); } + ml_tensors_data_destroy(out_data); +#endif LOGI("LEAVE"); return INFERENCE_ENGINE_ERROR_NONE; diff --git a/src/inference_engine_mlapi_private.h b/src/inference_engine_mlapi_private.h index b6b4b1e..b34cfc8 100644 --- a/src/inference_engine_mlapi_private.h +++ b/src/inference_engine_mlapi_private.h @@ -77,10 +77,13 @@ namespace MLAPIImpl std::vector &input_buffers, std::vector &output_buffers); int ConvertTensorType(int tensor_type); + int UpdateTensorsInfo(); int mPluginType; int mTargetDevice; ml_single_h mSingle; + ml_tensors_info_h mInputInfoHandle; + ml_tensors_info_h mOutputInfoHandle; ml_tensors_data_h mInputDataHandle; ml_tensors_data_h mOutputDataHandle; std::vector mDesignated_inputs; -- 2.7.4