From: Sangjung Woo Date: Mon, 26 Oct 2020 02:24:57 +0000 (+0900) Subject: Fix memory leak issue X-Git-Tag: submit/tizen/20201104.021236~1 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=4cb1bc364fa7d6a5bce0346bc9642781e3e3be28;p=platform%2Fcore%2Fmultimedia%2Finference-engine-mlapi.git Fix memory leak issue * Apply ml_single_invoke_no_alloc() ML API instead of ml_single_invoke(). * Remove unnecessary memory copies. Change-Id: I41c6eaf0afe35a4dd481ac57e942dd45f0fb1e4a Signed-off-by: Sangjung Woo Signed-off-by: Inki Dae --- diff --git a/src/inference_engine_mlapi.cpp b/src/inference_engine_mlapi.cpp index 7f10204..0a8e41f 100644 --- a/src/inference_engine_mlapi.cpp +++ b/src/inference_engine_mlapi.cpp @@ -24,6 +24,12 @@ #include #include +// TODO. Below is test code. DO NOT use ML internal function. +#define ENABLE_NO_ALLOC +#if defined(ENABLE_NO_ALLOC) +extern "C" int ml_single_invoke_no_alloc(ml_single_h single, const ml_tensors_data_h input, ml_tensors_data_h output); +#endif + namespace InferenceEngineImpl { namespace MLAPIImpl @@ -32,6 +38,8 @@ namespace MLAPIImpl mPluginType(), mTargetDevice(), mSingle(), + mInputInfoHandle(), + mOutputInfoHandle(), mInputDataHandle(), mOutputDataHandle(), mDesignated_inputs(), @@ -54,12 +62,20 @@ namespace MLAPIImpl ml_single_close(mSingle); + if (mInputInfoHandle) + ml_tensors_info_destroy(mInputInfoHandle); + + if (mOutputInfoHandle) + ml_tensors_info_destroy(mOutputInfoHandle); + if (mInputDataHandle) ml_tensors_data_destroy(mInputDataHandle); if (mOutputDataHandle) ml_tensors_data_destroy(mOutputDataHandle); + mInputInfoHandle = NULL; + mOutputInfoHandle = NULL; mInputDataHandle = NULL; mOutputDataHandle = NULL; } @@ -185,16 +201,22 @@ namespace MLAPIImpl // TODO. create ml_tensor_info for input and output tensor and pass // them as parameters of ml_single_open function. - int ret = ml_single_open(&mSingle, model_str.c_str(), NULL, NULL, + int err = ml_single_open(&mSingle, model_str.c_str(), NULL, NULL, nnfw_type, nnfw_hw); - if (ret != ML_ERROR_NONE) { - LOGE("Failed to request ml_single_open(%d).", ret); + if (err != ML_ERROR_NONE) { + LOGE("Failed to request ml_single_open(%d).", err); return INFERENCE_ENGINE_ERROR_INVALID_OPERATION; } + err = UpdateTensorsInfo(); + if (err != INFERENCE_ENGINE_ERROR_NONE) { + ml_single_close(mSingle); + mSingle = NULL; + } + LOGI("LEAVE"); - return INFERENCE_ENGINE_ERROR_NONE; + return err; } int InferenceMLAPI::GetInputTensorBuffers( @@ -202,25 +224,18 @@ namespace MLAPIImpl { LOGI("ENTER"); - buffers.clear(); - // TODO. Implement this function according to a given ML Single API backend properly. - ml_tensors_info_h in_info = NULL; - - int ret = ml_single_get_input_info(mSingle, &in_info); - if (ret != ML_ERROR_NONE) { - LOGE("Failed to request ml_single_get_input_info(%d).", ret); - return INFERENCE_ENGINE_ERROR_INVALID_OPERATION; - } - // ML Single API will always provide internal tensor buffers so // get the tensor buffers back to Mediavision framework so that // Mediavision framework doesn't allocate the tensor buffers internally. + buffers.clear(); + + int ret; unsigned int cnt; - ret = ml_tensors_info_get_count(in_info, &cnt); + ret = ml_tensors_info_get_count(mInputInfoHandle, &cnt); if (ret != ML_ERROR_NONE) { LOGE("Failed to request ml_tensors_info_get_count(%d).", ret); return INFERENCE_ENGINE_ERROR_INVALID_OPERATION; @@ -228,15 +243,19 @@ namespace MLAPIImpl LOGI("input tensor count = %u", cnt); - for (unsigned int i = 0; i < cnt; ++i) { - inference_engine_tensor_buffer in_buffer; - ml_tensor_type_e in_type; - - ret = ml_tensors_data_create(in_info, &mInputDataHandle); + // TODO. Below is test code, should we allocate new buffer for every inference? + if (mInputDataHandle == NULL) { + ret = ml_tensors_data_create(mInputInfoHandle, &mInputDataHandle); if (ret != ML_ERROR_NONE) { LOGE("Failed to request ml_tensors_data_create(%d).", ret); return INFERENCE_ENGINE_ERROR_INVALID_OPERATION; } + } + + // TODO. Cache tensor info and reduce function call in UpdateTensorsInfo() + for (unsigned int i = 0; i < cnt; ++i) { + inference_engine_tensor_buffer in_buffer; + ml_tensor_type_e in_type; ret = ml_tensors_data_get_tensor_data(mInputDataHandle, i, &in_buffer.buffer, &in_buffer.size); if (ret != ML_ERROR_NONE) { @@ -246,10 +265,9 @@ namespace MLAPIImpl LOGE("buffer = %p, size = %d\n", in_buffer.buffer, in_buffer.size); - int ret = ml_tensors_info_get_tensor_type(in_info, i, &in_type); + ret = ml_tensors_info_get_tensor_type(mInputInfoHandle, i, &in_type); if (ret != ML_ERROR_NONE) { - LOGE("Failed to request ml_tensors_info_get_tensor_type(%d).", - ret); + LOGE("Failed to request ml_tensors_info_get_tensor_type(%d).", ret); return INFERENCE_ENGINE_ERROR_INVALID_OPERATION; } @@ -276,25 +294,18 @@ namespace MLAPIImpl { LOGI("ENTER"); - buffers.clear(); - // TODO. Need to check if model file loading is done. - ml_tensors_info_h out_info = NULL; - - int ret = ml_single_get_output_info(mSingle, &out_info); - if (ret != ML_ERROR_NONE) { - LOGE("Failed to request ml_single_get_output_info(%d).", ret); - return INFERENCE_ENGINE_ERROR_INVALID_OPERATION; - } - // ML Single API will always provide internal tensor buffers so // get the tensor buffers back to Mediavision framework so that // Mediavision framework doesn't allocate the tensor buffers internally. + buffers.clear(); + + int ret; unsigned int cnt; - ret = ml_tensors_info_get_count(out_info, &cnt); + ret = ml_tensors_info_get_count(mOutputInfoHandle, &cnt); if (ret != ML_ERROR_NONE) { LOGE("Failed to request ml_tensors_info_get_count(%d).", ret); return INFERENCE_ENGINE_ERROR_INVALID_OPERATION; @@ -302,15 +313,19 @@ namespace MLAPIImpl LOGI("output tensor count = %u", cnt); - for (unsigned int i = 0; i < cnt; ++i) { - inference_engine_tensor_buffer out_buffer; - ml_tensor_type_e out_type; - - ret = ml_tensors_data_create(out_info, &mOutputDataHandle); + // TODO. Below is test code, should we allocate new buffer for every inference? + if (mOutputDataHandle == NULL) { + ret = ml_tensors_data_create(mOutputInfoHandle, &mOutputDataHandle); if (ret != ML_ERROR_NONE) { LOGE("Failed to request ml_tensors_data_create(%d).", ret); return INFERENCE_ENGINE_ERROR_INVALID_OPERATION; } + } + + // TODO. Cache tensor info and reduce function call in UpdateTensorsInfo() + for (unsigned int i = 0; i < cnt; ++i) { + inference_engine_tensor_buffer out_buffer; + ml_tensor_type_e out_type; ret = ml_tensors_data_get_tensor_data(mOutputDataHandle, i, &out_buffer.buffer, &out_buffer.size); if (ret != ML_ERROR_NONE) { @@ -320,10 +335,9 @@ namespace MLAPIImpl LOGE("buffer = %p, size = %d\n", out_buffer.buffer, out_buffer.size); - ret = ml_tensors_info_get_tensor_type(out_info, i, &out_type); + ret = ml_tensors_info_get_tensor_type(mOutputInfoHandle, i, &out_type); if (ret != ML_ERROR_NONE) { - LOGE("Failed to request ml_tensors_info_get_tensor_type(%d).", - ret); + LOGE("Failed to request ml_tensors_info_get_tensor_type(%d).", ret); return INFERENCE_ENGINE_ERROR_INVALID_OPERATION; } @@ -350,18 +364,11 @@ namespace MLAPIImpl { LOGI("ENTER"); - ml_tensors_info_h in_info = NULL; - // TODO. Need to check if model file loading is done. - - int ret = ml_single_get_input_info(mSingle, &in_info); - if (ret != ML_ERROR_NONE) { - LOGE("Failed to request ml_single_get_input_info(%d).", ret); - return INFERENCE_ENGINE_ERROR_INVALID_OPERATION; - } - + int ret; unsigned int cnt; - ret = ml_tensors_info_get_count(in_info, &cnt); + + ret = ml_tensors_info_get_count(mInputInfoHandle, &cnt); if (ret != ML_ERROR_NONE) { LOGE("Failed to request ml_tensors_info_get_count(%d).", ret); return INFERENCE_ENGINE_ERROR_INVALID_OPERATION; @@ -372,11 +379,11 @@ namespace MLAPIImpl for (unsigned int i = 0; i < cnt; ++i) { inference_engine_tensor_info tensor_info; ml_tensor_type_e in_type; - unsigned int in_dim[ML_TENSOR_RANK_LIMIT]; + ml_tensor_dimension in_dim; char *in_name = NULL; size_t in_size = 1; - ret = ml_tensors_info_get_tensor_type(in_info, i, &in_type); + ret = ml_tensors_info_get_tensor_type(mInputInfoHandle, i, &in_type); if (ret != ML_ERROR_NONE) { LOGE("Failed to request ml_tensors_info_get_tensor_type(%d).", ret); @@ -390,7 +397,7 @@ namespace MLAPIImpl return INFERENCE_ENGINE_ERROR_NOT_SUPPORTED; } - ret = ml_tensors_info_get_tensor_dimension(in_info, i, in_dim); + ret = ml_tensors_info_get_tensor_dimension(mInputInfoHandle, i, in_dim); if (ret != ML_ERROR_NONE) { LOGE("Failed to request ml_tensors_info_get_tensor_dimension(%d).", ret); @@ -406,7 +413,7 @@ namespace MLAPIImpl LOGI("input tensor size = %zu", in_size); - ret = ml_tensors_info_get_tensor_name(in_info, i, &in_name); + ret = ml_tensors_info_get_tensor_name(mInputInfoHandle, i, &in_name); if (ret != ML_ERROR_NONE) { LOGE("Failed to request ml_tensors_info_get_tensor_name(%d).", ret); @@ -435,18 +442,11 @@ namespace MLAPIImpl { LOGI("ENTER"); - ml_tensors_info_h out_info = NULL; - // TODO. Need to check if model file loading is done. - - int ret = ml_single_get_output_info(mSingle, &out_info); - if (ret != ML_ERROR_NONE) { - LOGE("Failed to request ml_single_get_output_info(%d).", ret); - return INFERENCE_ENGINE_ERROR_INVALID_OPERATION; - } - + int ret; unsigned int cnt; - ret = ml_tensors_info_get_count(out_info, &cnt); + + ret = ml_tensors_info_get_count(mOutputInfoHandle, &cnt); if (ret != ML_ERROR_NONE) { LOGE("Failed to request ml_tensors_info_get_count(%d).", ret); return INFERENCE_ENGINE_ERROR_INVALID_OPERATION; @@ -461,7 +461,7 @@ namespace MLAPIImpl char *out_name = NULL; size_t out_size = 1; - ret = ml_tensors_info_get_tensor_type(out_info, i, &out_type); + ret = ml_tensors_info_get_tensor_type(mOutputInfoHandle, i, &out_type); if (ret != ML_ERROR_NONE) { LOGE("Failed to request ml_tensors_info_get_tensor_type(%d).", ret); @@ -475,7 +475,7 @@ namespace MLAPIImpl return INFERENCE_ENGINE_ERROR_NOT_SUPPORTED; } - ret = ml_tensors_info_get_tensor_dimension(out_info, i, out_dim); + ret = ml_tensors_info_get_tensor_dimension(mOutputInfoHandle, i, out_dim); if (ret != ML_ERROR_NONE) { LOGE("Failed to request ml_tensors_info_get_tensor_dimension(%d).", ret); @@ -506,7 +506,7 @@ namespace MLAPIImpl LOGI("output tensor size = %zu", out_size); - ret = ml_tensors_info_get_tensor_name(out_info, i, &out_name); + ret = ml_tensors_info_get_tensor_name(mOutputInfoHandle, i, &out_name); if (ret != ML_ERROR_NONE) { LOGE("Failed to request ml_tensors_info_get_tensor_name(%d).", ret); @@ -547,7 +547,7 @@ namespace MLAPIImpl // TODO. Request input property information to a given ML Single API of nnstreamer backend, // and set it instead of user-given one, - + // Call UpdateTensorsInfo() after requesting input info. mDesignated_inputs = property.layer_names; mInputProperty = property; @@ -573,7 +573,7 @@ namespace MLAPIImpl // TODO. Request output property information to a given ML Single API of nnstreamer backend, // and set it instead of user-given one, - + // Call UpdateTensorsInfo() after requesting output info. mDesignated_outputs = property.layer_names; mOutputProperty = property; @@ -640,6 +640,41 @@ namespace MLAPIImpl return -1; } + int InferenceMLAPI::UpdateTensorsInfo() + { + LOGI("ENTER"); + + if (!mSingle) { + LOGE("Invalid state, single-shot handle is not initialized."); + return INFERENCE_ENGINE_ERROR_INVALID_OPERATION; + } + + if (mInputInfoHandle) { + ml_tensors_info_destroy(mInputInfoHandle); + mInputInfoHandle = NULL; + } + + if (mOutputInfoHandle) { + ml_tensors_info_destroy(mOutputInfoHandle); + mOutputInfoHandle = NULL; + } + + int ret = ml_single_get_input_info(mSingle, &mInputInfoHandle); + if (ret != ML_ERROR_NONE) { + LOGE("Failed to request ml_single_get_input_info(%d).", ret); + return INFERENCE_ENGINE_ERROR_INVALID_OPERATION; + } + + ret = ml_single_get_output_info(mSingle, &mOutputInfoHandle); + if (ret != ML_ERROR_NONE) { + LOGE("Failed to request ml_single_get_output_info(%d).", ret); + return INFERENCE_ENGINE_ERROR_INVALID_OPERATION; + } + + LOGI("LEAVE"); + return INFERENCE_ENGINE_ERROR_NONE; + } + int InferenceMLAPI::Run( std::vector &input_buffers, std::vector &output_buffers) @@ -652,46 +687,46 @@ namespace MLAPIImpl return err; } - err = ml_single_invoke(mSingle, mInputDataHandle, &mOutputDataHandle); +#if defined(ENABLE_NO_ALLOC) + err = ml_single_invoke_no_alloc(mSingle, mInputDataHandle, mOutputDataHandle); if (err != ML_ERROR_NONE) { - LOGE("Failed to request ml_single_invoke(%d).", err); + LOGE("Failed to request ml_single_invoke_no_alloc(%d).", err); return INFERENCE_ENGINE_ERROR_INVALID_OPERATION; } +#else + ml_tensors_data_h out_data = NULL; + void *data_ptr; + size_t data_size; + unsigned int out_cnt; - ml_tensors_info_h out_info = NULL; - - err = ml_single_get_output_info(mSingle, &out_info); + err = ml_tensors_info_get_count(mOutputInfoHandle, &out_cnt); if (err != ML_ERROR_NONE) { - LOGE("Failed to request ml_single_get_output_info(%d).", err); + LOGE("Failed to request ml_tensors_info_get_count(%d).", err); return INFERENCE_ENGINE_ERROR_INVALID_OPERATION; } - unsigned int out_cnt; - - err = ml_tensors_info_get_count(out_info, &out_cnt); + // Be carefull, ml_single_invoke() returns newly allocated output handle. + err = ml_single_invoke(mSingle, mInputDataHandle, &out_data); if (err != ML_ERROR_NONE) { - LOGE("Failed to request ml_tensors_info_get_count(%d).", err); + LOGE("Failed to request ml_single_invoke(%d).", err); return INFERENCE_ENGINE_ERROR_INVALID_OPERATION; } - // TODO. Why below code is required? - // ML Single API provides internal tensor buffer for output tensor - // and user alreadys know the buffer by GetOutputTensorBuffers. - // - // However, without below code, user cannot get the output result - // correctly. What happens in ML Single API framework? for (unsigned int i = 0; i < out_cnt; ++i) { - err = ml_tensors_data_get_tensor_data( - mOutputDataHandle, i, (void **) &output_buffers[i].buffer, - &output_buffers[i].size); + err = ml_tensors_data_get_tensor_data(out_data, i, &data_ptr, &data_size); if (err != ML_ERROR_NONE) { LOGE("Failed to request ml_tensors_data_get_tensor_data(%d).", err); + ml_tensors_data_destroy(out_data); return INFERENCE_ENGINE_ERROR_INVALID_OPERATION; } + // TODO. Remove memcpy() using ml_single_invoke_fill() later. + memcpy(output_buffers[i].buffer, data_ptr, output_buffers[i].size); LOGI("Output tensor[%u] = %zu", i, output_buffers[i].size); } + ml_tensors_data_destroy(out_data); +#endif LOGI("LEAVE"); return INFERENCE_ENGINE_ERROR_NONE; diff --git a/src/inference_engine_mlapi_private.h b/src/inference_engine_mlapi_private.h index b6b4b1e..b34cfc8 100644 --- a/src/inference_engine_mlapi_private.h +++ b/src/inference_engine_mlapi_private.h @@ -77,10 +77,13 @@ namespace MLAPIImpl std::vector &input_buffers, std::vector &output_buffers); int ConvertTensorType(int tensor_type); + int UpdateTensorsInfo(); int mPluginType; int mTargetDevice; ml_single_h mSingle; + ml_tensors_info_h mInputInfoHandle; + ml_tensors_info_h mOutputInfoHandle; ml_tensors_data_h mInputDataHandle; ml_tensors_data_h mOutputDataHandle; std::vector mDesignated_inputs;