#include <time.h>
#include <queue>
+// TODO. Below is test code. DO NOT use ML internal function.
+#define ENABLE_NO_ALLOC
+#if defined(ENABLE_NO_ALLOC)
+extern "C" int ml_single_invoke_no_alloc(ml_single_h single, const ml_tensors_data_h input, ml_tensors_data_h output);
+#endif
+
namespace InferenceEngineImpl
{
namespace MLAPIImpl
mPluginType(),
mTargetDevice(),
mSingle(),
+ mInputInfoHandle(),
+ mOutputInfoHandle(),
mInputDataHandle(),
mOutputDataHandle(),
mDesignated_inputs(),
ml_single_close(mSingle);
+ if (mInputInfoHandle)
+ ml_tensors_info_destroy(mInputInfoHandle);
+
+ if (mOutputInfoHandle)
+ ml_tensors_info_destroy(mOutputInfoHandle);
+
if (mInputDataHandle)
ml_tensors_data_destroy(mInputDataHandle);
if (mOutputDataHandle)
ml_tensors_data_destroy(mOutputDataHandle);
+ mInputInfoHandle = NULL;
+ mOutputInfoHandle = NULL;
mInputDataHandle = NULL;
mOutputDataHandle = NULL;
}
// TODO. create ml_tensor_info for input and output tensor and pass
// them as parameters of ml_single_open function.
- int ret = ml_single_open(&mSingle, model_str.c_str(), NULL, NULL,
+ int err = ml_single_open(&mSingle, model_str.c_str(), NULL, NULL,
nnfw_type, nnfw_hw);
- if (ret != ML_ERROR_NONE) {
- LOGE("Failed to request ml_single_open(%d).", ret);
+ if (err != ML_ERROR_NONE) {
+ LOGE("Failed to request ml_single_open(%d).", err);
return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
}
+ err = UpdateTensorsInfo();
+ if (err != INFERENCE_ENGINE_ERROR_NONE) {
+ ml_single_close(mSingle);
+ mSingle = NULL;
+ }
+
LOGI("LEAVE");
- return INFERENCE_ENGINE_ERROR_NONE;
+ return err;
}
int InferenceMLAPI::GetInputTensorBuffers(
{
LOGI("ENTER");
- buffers.clear();
-
// TODO. Implement this function according to a given ML Single API backend properly.
- ml_tensors_info_h in_info = NULL;
-
- int ret = ml_single_get_input_info(mSingle, &in_info);
- if (ret != ML_ERROR_NONE) {
- LOGE("Failed to request ml_single_get_input_info(%d).", ret);
- return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
- }
-
// ML Single API will always provide internal tensor buffers so
// get the tensor buffers back to Mediavision framework so that
// Mediavision framework doesn't allocate the tensor buffers internally.
+ buffers.clear();
+
+ int ret;
unsigned int cnt;
- ret = ml_tensors_info_get_count(in_info, &cnt);
+ ret = ml_tensors_info_get_count(mInputInfoHandle, &cnt);
if (ret != ML_ERROR_NONE) {
LOGE("Failed to request ml_tensors_info_get_count(%d).", ret);
return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
LOGI("input tensor count = %u", cnt);
- for (unsigned int i = 0; i < cnt; ++i) {
- inference_engine_tensor_buffer in_buffer;
- ml_tensor_type_e in_type;
-
- ret = ml_tensors_data_create(in_info, &mInputDataHandle);
+ // TODO. Below is test code, should we allocate new buffer for every inference?
+ if (mInputDataHandle == NULL) {
+ ret = ml_tensors_data_create(mInputInfoHandle, &mInputDataHandle);
if (ret != ML_ERROR_NONE) {
LOGE("Failed to request ml_tensors_data_create(%d).", ret);
return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
}
+ }
+
+ // TODO. Cache tensor info and reduce function call in UpdateTensorsInfo()
+ for (unsigned int i = 0; i < cnt; ++i) {
+ inference_engine_tensor_buffer in_buffer;
+ ml_tensor_type_e in_type;
ret = ml_tensors_data_get_tensor_data(mInputDataHandle, i, &in_buffer.buffer, &in_buffer.size);
if (ret != ML_ERROR_NONE) {
LOGE("buffer = %p, size = %d\n", in_buffer.buffer, in_buffer.size);
- int ret = ml_tensors_info_get_tensor_type(in_info, i, &in_type);
+ ret = ml_tensors_info_get_tensor_type(mInputInfoHandle, i, &in_type);
if (ret != ML_ERROR_NONE) {
- LOGE("Failed to request ml_tensors_info_get_tensor_type(%d).",
- ret);
+ LOGE("Failed to request ml_tensors_info_get_tensor_type(%d).", ret);
return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
}
{
LOGI("ENTER");
- buffers.clear();
-
// TODO. Need to check if model file loading is done.
- ml_tensors_info_h out_info = NULL;
-
- int ret = ml_single_get_output_info(mSingle, &out_info);
- if (ret != ML_ERROR_NONE) {
- LOGE("Failed to request ml_single_get_output_info(%d).", ret);
- return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
- }
-
// ML Single API will always provide internal tensor buffers so
// get the tensor buffers back to Mediavision framework so that
// Mediavision framework doesn't allocate the tensor buffers internally.
+ buffers.clear();
+
+ int ret;
unsigned int cnt;
- ret = ml_tensors_info_get_count(out_info, &cnt);
+ ret = ml_tensors_info_get_count(mOutputInfoHandle, &cnt);
if (ret != ML_ERROR_NONE) {
LOGE("Failed to request ml_tensors_info_get_count(%d).", ret);
return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
LOGI("output tensor count = %u", cnt);
- for (unsigned int i = 0; i < cnt; ++i) {
- inference_engine_tensor_buffer out_buffer;
- ml_tensor_type_e out_type;
-
- ret = ml_tensors_data_create(out_info, &mOutputDataHandle);
+ // TODO. Below is test code, should we allocate new buffer for every inference?
+ if (mOutputDataHandle == NULL) {
+ ret = ml_tensors_data_create(mOutputInfoHandle, &mOutputDataHandle);
if (ret != ML_ERROR_NONE) {
LOGE("Failed to request ml_tensors_data_create(%d).", ret);
return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
}
+ }
+
+ // TODO. Cache tensor info and reduce function call in UpdateTensorsInfo()
+ for (unsigned int i = 0; i < cnt; ++i) {
+ inference_engine_tensor_buffer out_buffer;
+ ml_tensor_type_e out_type;
ret = ml_tensors_data_get_tensor_data(mOutputDataHandle, i, &out_buffer.buffer, &out_buffer.size);
if (ret != ML_ERROR_NONE) {
LOGE("buffer = %p, size = %d\n", out_buffer.buffer, out_buffer.size);
- ret = ml_tensors_info_get_tensor_type(out_info, i, &out_type);
+ ret = ml_tensors_info_get_tensor_type(mOutputInfoHandle, i, &out_type);
if (ret != ML_ERROR_NONE) {
- LOGE("Failed to request ml_tensors_info_get_tensor_type(%d).",
- ret);
+ LOGE("Failed to request ml_tensors_info_get_tensor_type(%d).", ret);
return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
}
{
LOGI("ENTER");
- ml_tensors_info_h in_info = NULL;
-
// TODO. Need to check if model file loading is done.
-
- int ret = ml_single_get_input_info(mSingle, &in_info);
- if (ret != ML_ERROR_NONE) {
- LOGE("Failed to request ml_single_get_input_info(%d).", ret);
- return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
- }
-
+ int ret;
unsigned int cnt;
- ret = ml_tensors_info_get_count(in_info, &cnt);
+
+ ret = ml_tensors_info_get_count(mInputInfoHandle, &cnt);
if (ret != ML_ERROR_NONE) {
LOGE("Failed to request ml_tensors_info_get_count(%d).", ret);
return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
for (unsigned int i = 0; i < cnt; ++i) {
inference_engine_tensor_info tensor_info;
ml_tensor_type_e in_type;
- unsigned int in_dim[ML_TENSOR_RANK_LIMIT];
+ ml_tensor_dimension in_dim;
char *in_name = NULL;
size_t in_size = 1;
- ret = ml_tensors_info_get_tensor_type(in_info, i, &in_type);
+ ret = ml_tensors_info_get_tensor_type(mInputInfoHandle, i, &in_type);
if (ret != ML_ERROR_NONE) {
LOGE("Failed to request ml_tensors_info_get_tensor_type(%d).",
ret);
return INFERENCE_ENGINE_ERROR_NOT_SUPPORTED;
}
- ret = ml_tensors_info_get_tensor_dimension(in_info, i, in_dim);
+ ret = ml_tensors_info_get_tensor_dimension(mInputInfoHandle, i, in_dim);
if (ret != ML_ERROR_NONE) {
LOGE("Failed to request ml_tensors_info_get_tensor_dimension(%d).",
ret);
LOGI("input tensor size = %zu", in_size);
- ret = ml_tensors_info_get_tensor_name(in_info, i, &in_name);
+ ret = ml_tensors_info_get_tensor_name(mInputInfoHandle, i, &in_name);
if (ret != ML_ERROR_NONE) {
LOGE("Failed to request ml_tensors_info_get_tensor_name(%d).",
ret);
{
LOGI("ENTER");
- ml_tensors_info_h out_info = NULL;
-
// TODO. Need to check if model file loading is done.
-
- int ret = ml_single_get_output_info(mSingle, &out_info);
- if (ret != ML_ERROR_NONE) {
- LOGE("Failed to request ml_single_get_output_info(%d).", ret);
- return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
- }
-
+ int ret;
unsigned int cnt;
- ret = ml_tensors_info_get_count(out_info, &cnt);
+
+ ret = ml_tensors_info_get_count(mOutputInfoHandle, &cnt);
if (ret != ML_ERROR_NONE) {
LOGE("Failed to request ml_tensors_info_get_count(%d).", ret);
return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
char *out_name = NULL;
size_t out_size = 1;
- ret = ml_tensors_info_get_tensor_type(out_info, i, &out_type);
+ ret = ml_tensors_info_get_tensor_type(mOutputInfoHandle, i, &out_type);
if (ret != ML_ERROR_NONE) {
LOGE("Failed to request ml_tensors_info_get_tensor_type(%d).",
ret);
return INFERENCE_ENGINE_ERROR_NOT_SUPPORTED;
}
- ret = ml_tensors_info_get_tensor_dimension(out_info, i, out_dim);
+ ret = ml_tensors_info_get_tensor_dimension(mOutputInfoHandle, i, out_dim);
if (ret != ML_ERROR_NONE) {
LOGE("Failed to request ml_tensors_info_get_tensor_dimension(%d).",
ret);
LOGI("output tensor size = %zu", out_size);
- ret = ml_tensors_info_get_tensor_name(out_info, i, &out_name);
+ ret = ml_tensors_info_get_tensor_name(mOutputInfoHandle, i, &out_name);
if (ret != ML_ERROR_NONE) {
LOGE("Failed to request ml_tensors_info_get_tensor_name(%d).",
ret);
// TODO. Request input property information to a given ML Single API of nnstreamer backend,
// and set it instead of user-given one,
-
+ // Call UpdateTensorsInfo() after requesting input info.
mDesignated_inputs = property.layer_names;
mInputProperty = property;
// TODO. Request output property information to a given ML Single API of nnstreamer backend,
// and set it instead of user-given one,
-
+ // Call UpdateTensorsInfo() after requesting output info.
mDesignated_outputs = property.layer_names;
mOutputProperty = property;
return -1;
}
+ int InferenceMLAPI::UpdateTensorsInfo()
+ {
+ LOGI("ENTER");
+
+ if (!mSingle) {
+ LOGE("Invalid state, single-shot handle is not initialized.");
+ return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
+ }
+
+ if (mInputInfoHandle) {
+ ml_tensors_info_destroy(mInputInfoHandle);
+ mInputInfoHandle = NULL;
+ }
+
+ if (mOutputInfoHandle) {
+ ml_tensors_info_destroy(mOutputInfoHandle);
+ mOutputInfoHandle = NULL;
+ }
+
+ int ret = ml_single_get_input_info(mSingle, &mInputInfoHandle);
+ if (ret != ML_ERROR_NONE) {
+ LOGE("Failed to request ml_single_get_input_info(%d).", ret);
+ return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
+ }
+
+ ret = ml_single_get_output_info(mSingle, &mOutputInfoHandle);
+ if (ret != ML_ERROR_NONE) {
+ LOGE("Failed to request ml_single_get_output_info(%d).", ret);
+ return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
+ }
+
+ LOGI("LEAVE");
+ return INFERENCE_ENGINE_ERROR_NONE;
+ }
+
int InferenceMLAPI::Run(
std::vector<inference_engine_tensor_buffer> &input_buffers,
std::vector<inference_engine_tensor_buffer> &output_buffers)
return err;
}
- err = ml_single_invoke(mSingle, mInputDataHandle, &mOutputDataHandle);
+#if defined(ENABLE_NO_ALLOC)
+ err = ml_single_invoke_no_alloc(mSingle, mInputDataHandle, mOutputDataHandle);
if (err != ML_ERROR_NONE) {
- LOGE("Failed to request ml_single_invoke(%d).", err);
+ LOGE("Failed to request ml_single_invoke_no_alloc(%d).", err);
return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
}
+#else
+ ml_tensors_data_h out_data = NULL;
+ void *data_ptr;
+ size_t data_size;
+ unsigned int out_cnt;
- ml_tensors_info_h out_info = NULL;
-
- err = ml_single_get_output_info(mSingle, &out_info);
+ err = ml_tensors_info_get_count(mOutputInfoHandle, &out_cnt);
if (err != ML_ERROR_NONE) {
- LOGE("Failed to request ml_single_get_output_info(%d).", err);
+ LOGE("Failed to request ml_tensors_info_get_count(%d).", err);
return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
}
- unsigned int out_cnt;
-
- err = ml_tensors_info_get_count(out_info, &out_cnt);
+ // Be carefull, ml_single_invoke() returns newly allocated output handle.
+ err = ml_single_invoke(mSingle, mInputDataHandle, &out_data);
if (err != ML_ERROR_NONE) {
- LOGE("Failed to request ml_tensors_info_get_count(%d).", err);
+ LOGE("Failed to request ml_single_invoke(%d).", err);
return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
}
- // TODO. Why below code is required?
- // ML Single API provides internal tensor buffer for output tensor
- // and user alreadys know the buffer by GetOutputTensorBuffers.
- //
- // However, without below code, user cannot get the output result
- // correctly. What happens in ML Single API framework?
for (unsigned int i = 0; i < out_cnt; ++i) {
- err = ml_tensors_data_get_tensor_data(
- mOutputDataHandle, i, (void **) &output_buffers[i].buffer,
- &output_buffers[i].size);
+ err = ml_tensors_data_get_tensor_data(out_data, i, &data_ptr, &data_size);
if (err != ML_ERROR_NONE) {
LOGE("Failed to request ml_tensors_data_get_tensor_data(%d).", err);
+ ml_tensors_data_destroy(out_data);
return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
}
+ // TODO. Remove memcpy() using ml_single_invoke_fill() later.
+ memcpy(output_buffers[i].buffer, data_ptr, output_buffers[i].size);
LOGI("Output tensor[%u] = %zu", i, output_buffers[i].size);
}
+ ml_tensors_data_destroy(out_data);
+#endif
LOGI("LEAVE");
return INFERENCE_ENGINE_ERROR_NONE;