2 * Copyright (c) 2020 Samsung Electronics Co., Ltd All Rights Reserved
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #include <inference_engine_error.h>
18 #include "inference_engine_private_type.h"
19 #include "inference_engine_mlapi_private.h"
27 // TODO. Below is test code. DO NOT use ML internal function.
29 #if defined(ENABLE_FAST)
30 extern "C" int ml_single_invoke_fast(ml_single_h single, const ml_tensors_data_h input, ml_tensors_data_h output);
33 namespace InferenceEngineImpl
37 InferenceMLAPI::InferenceMLAPI(void) :
46 mDesignated_outputs(),
55 InferenceMLAPI::~InferenceMLAPI()
57 mDesignated_inputs.clear();
58 std::map<std::string, int>().swap(mDesignated_inputs);
60 mDesignated_outputs.clear();
61 std::map<std::string, int>().swap(mDesignated_outputs);
63 ml_single_close(mSingle);
66 ml_tensors_info_destroy(mInputInfoHandle);
68 if (mOutputInfoHandle)
69 ml_tensors_info_destroy(mOutputInfoHandle);
72 ml_tensors_data_destroy(mInputDataHandle);
74 if (mOutputDataHandle)
75 ml_tensors_data_destroy(mOutputDataHandle);
77 mInputInfoHandle = NULL;
78 mOutputInfoHandle = NULL;
79 mInputDataHandle = NULL;
80 mOutputDataHandle = NULL;
83 int InferenceMLAPI::SetPrivateData(void *data)
87 inference_backend_type_e type =
88 *(static_cast<inference_backend_type_e *>(data));
90 if (INFERENCE_BACKEND_NONE >= type || INFERENCE_BACKEND_MAX <= type ||
91 INFERENCE_BACKEND_OPENCV == type) {
92 LOGE("Invalid backend type.(%d)", type);
93 return INFERENCE_ENGINE_ERROR_NOT_SUPPORTED;
97 LOGI("backend type.(%d)", type);
100 return INFERENCE_ENGINE_ERROR_NONE;
103 int InferenceMLAPI::SetTargetDevices(int types)
107 LOGI("Inference targets are, ");
108 if (types & INFERENCE_TARGET_CPU) {
109 mTargetDevice |= INFERENCE_TARGET_CPU;
113 if (types & INFERENCE_TARGET_GPU) {
114 mTargetDevice |= INFERENCE_TARGET_GPU;
118 if (types & INFERENCE_TARGET_CUSTOM) {
119 mTargetDevice |= INFERENCE_TARGET_CUSTOM;
125 return INFERENCE_ENGINE_ERROR_NONE;
128 int InferenceMLAPI::SetCLTuner(const inference_engine_cltuner *cltuner)
132 // TODO. let's wait until CLTuner feature is ready for NNFW tensor
133 // filter which is a ONERT runtime backend of MLAPI.
137 return INFERENCE_ENGINE_ERROR_NONE;
140 int InferenceMLAPI::Load(std::vector<std::string> model_paths,
141 inference_model_format_e model_format)
145 std::string model_str(model_paths[0]);
147 // TODO. Set NNFW backend type and HW type properly.
149 ml_nnfw_type_e nnfw_type;
150 ml_nnfw_hw_e nnfw_hw;
152 switch (mPluginType) {
153 case INFERENCE_BACKEND_NPU_VIVANTE:
154 nnfw_type = ML_NNFW_TYPE_VIVANTE;
155 nnfw_hw = ML_NNFW_HW_ANY;
156 LOGI("Vivante tensor filter will be used.");
158 if (access(model_str.c_str(), R_OK) ||
159 access(model_paths[1].c_str(), R_OK)) {
160 LOGE("model file path in [%s,%s]", model_str.c_str(),
161 model_paths[1].c_str());
162 return INFERENCE_ENGINE_ERROR_INVALID_PATH;
165 // ML Single API of MLAPI requires model_paths rule like below,
166 // "so library file path,nb model file path" or vise versa.
167 model_str += "," + model_paths[1];
169 case INFERENCE_BACKEND_ONE:
170 case INFERENCE_BACKEND_ARMNN:
171 case INFERENCE_BACKEND_TFLITE:
172 if (mPluginType == INFERENCE_BACKEND_ONE) {
173 nnfw_type = ML_NNFW_TYPE_NNFW;
175 if (mTargetDevice == INFERENCE_TARGET_CPU) {
176 nnfw_hw = ML_NNFW_HW_CPU_NEON;
177 LOGI("Target device is NEON.");
178 } else if (mTargetDevice == INFERENCE_TARGET_GPU) {
179 nnfw_hw = ML_NNFW_HW_GPU;
180 LOGI("Target device is GPU");
182 LOGE("Invalid inference target device type.");
183 return INFERENCE_ENGINE_ERROR_INVALID_PARAMETER;
186 LOGI("NNFW tensor filter will be used.");
189 if (mPluginType == INFERENCE_BACKEND_ARMNN) {
190 nnfw_type = ML_NNFW_TYPE_ARMNN;
191 LOGI("ARMNN tensor filter will be used.");
194 if (mPluginType == INFERENCE_BACKEND_TFLITE) {
195 nnfw_type = ML_NNFW_TYPE_TENSORFLOW_LITE;
196 LOGI("TFLITE tensor filter will be used.");
199 if (access(model_str.c_str(), R_OK)) {
200 LOGE("model file path in [%s]", model_str.c_str());
201 return INFERENCE_ENGINE_ERROR_INVALID_PATH;
207 LOGE("Invalid plugin type.");
208 return INFERENCE_ENGINE_ERROR_INVALID_PARAMETER;
211 LOGI("Model name = %s", model_str.c_str());
213 // TODO. create ml_tensor_info for input and output tensor and pass
214 // them as parameters of ml_single_open function.
216 int err = ml_single_open(&mSingle, model_str.c_str(), NULL, NULL,
218 if (err != ML_ERROR_NONE) {
219 LOGE("Failed to request ml_single_open(%d).", err);
220 return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
223 err = UpdateTensorsInfo();
224 if (err != INFERENCE_ENGINE_ERROR_NONE) {
225 ml_single_close(mSingle);
234 int InferenceMLAPI::GetInputTensorBuffers(
235 std::map<std::string, inference_engine_tensor_buffer> &buffers)
239 // TODO. Implement this function according to a given ML Single API backend properly.
241 // ML Single API will always provide internal tensor buffers so
242 // get the tensor buffers back to Mediavision framework so that
243 // Mediavision framework doesn't allocate the tensor buffers internally.
247 int ret = INFERENCE_ENGINE_ERROR_NONE;
249 // TODO. Below is test code, should we allocate new buffer for every inference?
250 if (mInputDataHandle == NULL) {
251 ret = ml_tensors_data_create(mInputInfoHandle, &mInputDataHandle);
252 if (ret != ML_ERROR_NONE) {
253 LOGE("Failed to request ml_tensors_data_create(%d).", ret);
254 return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
258 // TODO. Cache tensor info and reduce function call in UpdateTensorsInfo()
259 for (auto& input : mDesignated_inputs) {
260 inference_engine_tensor_buffer in_buffer;
261 ml_tensor_type_e in_type;
263 ret = ml_tensors_data_get_tensor_data(mInputDataHandle, input.second, &in_buffer.buffer, &in_buffer.size);
264 if (ret != ML_ERROR_NONE) {
265 LOGE("Failed to request ml_tensors_data_get_tensor_data(%d).", ret);
266 return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
269 LOGE("buffer = %p, size = %zu\n", in_buffer.buffer, in_buffer.size);
271 ret = ml_tensors_info_get_tensor_type(mInputInfoHandle, input.second, &in_type);
272 if (ret != ML_ERROR_NONE) {
273 LOGE("Failed to request ml_tensors_info_get_tensor_type(%d).", ret);
274 return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
277 LOGI("input tensor type = %d", in_type);
279 int type = ConvertTensorType(in_type);
281 return INFERENCE_ENGINE_ERROR_NOT_SUPPORTED;
284 in_buffer.data_type = static_cast<inference_tensor_data_type_e>(type);
285 in_buffer.owner_is_backend = 1;
287 buffers.insert(std::make_pair(input.first, in_buffer));
292 return INFERENCE_ENGINE_ERROR_NONE;
295 int InferenceMLAPI::GetOutputTensorBuffers(
296 std::map<std::string, inference_engine_tensor_buffer> &buffers)
300 // TODO. Need to check if model file loading is done.
302 // ML Single API will always provide internal tensor buffers so
303 // get the tensor buffers back to Mediavision framework so that
304 // Mediavision framework doesn't allocate the tensor buffers internally.
308 int ret = INFERENCE_ENGINE_ERROR_NONE;
310 // TODO. Below is test code, should we allocate new buffer for every inference?
311 if (mOutputDataHandle == NULL) {
312 ret = ml_tensors_data_create(mOutputInfoHandle, &mOutputDataHandle);
313 if (ret != ML_ERROR_NONE) {
314 LOGE("Failed to request ml_tensors_data_create(%d).", ret);
315 return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
319 // TODO. Cache tensor info and reduce function call in UpdateTensorsInfo()
320 for (auto& output : mDesignated_outputs) {
321 inference_engine_tensor_buffer out_buffer;
322 ml_tensor_type_e out_type;
324 ret = ml_tensors_data_get_tensor_data(mOutputDataHandle, output.second, &out_buffer.buffer, &out_buffer.size);
325 if (ret != ML_ERROR_NONE) {
326 LOGE("Failed to request ml_tensors_data_get_tensor_data(%d).", ret);
327 return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
330 LOGE("buffer = %p, size = %zu\n", out_buffer.buffer, out_buffer.size);
332 ret = ml_tensors_info_get_tensor_type(mOutputInfoHandle, output.second, &out_type);
333 if (ret != ML_ERROR_NONE) {
334 LOGE("Failed to request ml_tensors_info_get_tensor_type(%d).", ret);
335 return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
338 LOGI("output tensor type = %d", out_type);
340 int type = ConvertTensorType(out_type);
342 return INFERENCE_ENGINE_ERROR_NOT_SUPPORTED;
345 out_buffer.data_type = static_cast<inference_tensor_data_type_e>(type);
346 out_buffer.owner_is_backend = 1;
348 buffers.insert(std::make_pair(output.first, out_buffer));
353 return INFERENCE_ENGINE_ERROR_NONE;
356 int InferenceMLAPI::GetInputLayerProperty(
357 inference_engine_layer_property &property)
361 // TODO. Need to check if model file loading is done.
362 int ret = INFERENCE_ENGINE_ERROR_NONE;
364 for (auto& input : mDesignated_inputs) {
365 inference_engine_tensor_info tensor_info;
366 ml_tensor_type_e in_type;
367 ml_tensor_dimension in_dim;
370 ret = ml_tensors_info_get_tensor_type(mInputInfoHandle, input.second, &in_type);
371 if (ret != ML_ERROR_NONE) {
372 LOGE("Failed to request ml_tensors_info_get_tensor_type(%d).",
374 return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
377 LOGI("input tensor type = %d", in_type);
379 int type = ConvertTensorType(in_type);
381 return INFERENCE_ENGINE_ERROR_NOT_SUPPORTED;
384 ret = ml_tensors_info_get_tensor_dimension(mInputInfoHandle, input.second, in_dim);
385 if (ret != ML_ERROR_NONE) {
386 LOGE("Failed to request ml_tensors_info_get_tensor_dimension(%d).",
388 return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
391 LOGI("Input tensor dimension:");
392 for (unsigned int shape_idx = 0; shape_idx < ML_TENSOR_RANK_LIMIT; ++shape_idx) {
393 tensor_info.shape.push_back(in_dim[shape_idx]);
394 in_size *= static_cast<size_t>(in_dim[shape_idx]);
395 LOGI("%u", in_dim[shape_idx]);
398 LOGI("input tensor size = %zu", in_size);
400 LOGI("input tensor name = %s", input.first.c_str());
402 tensor_info.data_type = static_cast<inference_tensor_data_type_e>(type);
403 tensor_info.size = in_size;
405 property.layers.insert(std::make_pair(input.first, tensor_info));
407 // TODO. Compare tensor info from engine to one from a given property.
412 return INFERENCE_ENGINE_ERROR_NONE;
415 int InferenceMLAPI::GetOutputLayerProperty(
416 inference_engine_layer_property &property)
420 // TODO. Need to check if model file loading is done.
421 int ret = INFERENCE_ENGINE_ERROR_NONE;
423 for (auto& output : mDesignated_outputs) {
424 inference_engine_tensor_info tensor_info;
425 ml_tensor_type_e out_type;
426 unsigned int out_dim[ML_TENSOR_RANK_LIMIT];
429 ret = ml_tensors_info_get_tensor_type(mOutputInfoHandle, output.second, &out_type);
430 if (ret != ML_ERROR_NONE) {
431 LOGE("Failed to request ml_tensors_info_get_tensor_type(%d).",
433 return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
436 LOGI("output tensor type = %d", out_type);
438 int type = ConvertTensorType(out_type);
440 return INFERENCE_ENGINE_ERROR_NOT_SUPPORTED;
443 ret = ml_tensors_info_get_tensor_dimension(mOutputInfoHandle, output.second, out_dim);
444 if (ret != ML_ERROR_NONE) {
445 LOGE("Failed to request ml_tensors_info_get_tensor_dimension(%d).",
447 return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
452 LOGI("Output tensor dimension:");
454 for (unsigned int shape_idx = 0; shape_idx < ML_TENSOR_RANK_LIMIT; ++shape_idx) {
455 out_size *= static_cast<size_t>(out_dim[shape_idx]);
457 if (out_dim[shape_idx] == 1 && shape_size == 0)
458 shape_size = shape_idx;
460 LOGI("%d", out_dim[shape_idx]);
463 LOGI("Shape size of output tensor : %d", shape_size);
464 LOGI("Reversed output tensor dimension:");
466 // Reverse shape order.
467 for (int idx = shape_size; idx >= 0; --idx) {
468 tensor_info.shape.push_back(out_dim[idx]);
469 LOGI("%u", out_dim[idx]);
472 LOGI("output tensor size = %zu", out_size);
474 LOGI("output tensor name = %s", output.first.c_str());
476 tensor_info.data_type = static_cast<inference_tensor_data_type_e>(type);
477 tensor_info.size = out_size;
479 property.layers.insert(std::make_pair(output.first, tensor_info));
481 // TODO. Compare tensor info from engine to one from a given property.
486 return INFERENCE_ENGINE_ERROR_NONE;
489 int InferenceMLAPI::SetInputLayerProperty(
490 inference_engine_layer_property &property)
494 for (auto& layer : property.layers) {
495 LOGI("input layer name = %s", layer.first.c_str());
498 mDesignated_inputs.clear();
499 std::map<std::string, int>().swap(mDesignated_inputs);
501 // TODO. Request input property information to a given ML Single API of nnstreamer backend,
502 // and set it instead of user-given one,
503 // Call UpdateTensorsInfo() after requesting input info.
505 mInputProperty = property;
509 return INFERENCE_ENGINE_ERROR_NONE;
512 int InferenceMLAPI::SetOutputLayerProperty(
513 inference_engine_layer_property &property)
517 for (auto& layer : property.layers) {
518 LOGI("output layer name = %s", layer.first.c_str());
521 mDesignated_outputs.clear();
522 std::map<std::string, int>().swap(mDesignated_outputs);
524 // TODO. Request output property information to a given ML Single API of nnstreamer backend,
525 // and set it instead of user-given one,
526 // Call UpdateTensorsInfo() after requesting output info.
528 mOutputProperty = property;
532 return INFERENCE_ENGINE_ERROR_NONE;
535 int InferenceMLAPI::GetBackendCapacity(inference_engine_capacity *capacity)
539 if (capacity == NULL) {
540 LOGE("Bad pointer.");
541 return INFERENCE_ENGINE_ERROR_INVALID_PARAMETER;
544 // TODO. flag supported accel device types according to a given ML Single API of nnstreamer backend.
545 if (mPluginType == INFERENCE_BACKEND_NPU_VIVANTE) {
546 capacity->supported_accel_devices = INFERENCE_TARGET_CUSTOM;
548 capacity->supported_accel_devices = INFERENCE_TARGET_GPU |
549 INFERENCE_TARGET_CPU;
554 return INFERENCE_ENGINE_ERROR_NONE;
557 int InferenceMLAPI::CheckTensorBuffers(
558 std::map<std::string, inference_engine_tensor_buffer> &input_buffers,
559 std::map<std::string, inference_engine_tensor_buffer> &output_buffers)
565 return INFERENCE_ENGINE_ERROR_NONE;
568 int InferenceMLAPI::ConvertTensorType(int tensor_type)
572 switch (tensor_type) {
573 case ML_TENSOR_TYPE_FLOAT32:
574 return INFERENCE_TENSOR_DATA_TYPE_FLOAT32;
575 case ML_TENSOR_TYPE_UINT8:
576 return INFERENCE_TENSOR_DATA_TYPE_UINT8;
577 case ML_TENSOR_TYPE_UINT16:
578 return INFERENCE_TENSOR_DATA_TYPE_UINT16;
579 case ML_TENSOR_TYPE_INT64:
580 return INFERENCE_TENSOR_DATA_TYPE_INT64;
581 case ML_TENSOR_TYPE_UINT64:
582 return INFERENCE_TENSOR_DATA_TYPE_UINT64;
584 LOGE("Tensor type(%d) is invalid.", tensor_type);
585 return INFERENCE_ENGINE_ERROR_INVALID_PARAMETER;
593 int InferenceMLAPI::UpdateTensorsInfo()
598 LOGE("Invalid state, single-shot handle is not initialized.");
599 return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
602 if (mInputInfoHandle) {
603 ml_tensors_info_destroy(mInputInfoHandle);
604 mInputInfoHandle = NULL;
607 if (mOutputInfoHandle) {
608 ml_tensors_info_destroy(mOutputInfoHandle);
609 mOutputInfoHandle = NULL;
612 int ret = ml_single_get_input_info(mSingle, &mInputInfoHandle);
613 if (ret != ML_ERROR_NONE) {
614 LOGE("Failed to request ml_single_get_input_info(%d).", ret);
615 return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
618 unsigned int cnt = 0;
619 ret = ml_tensors_info_get_count(mInputInfoHandle, &cnt);
620 if (ret != ML_ERROR_NONE || !cnt) {
621 LOGE("Failed to request ml_tensors_info_get_count(%d).", ret);
622 return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
625 LOGI("input tensor count = %u", cnt);
626 mDesignated_inputs.clear();
627 std::map<std::string, int>().swap(mDesignated_inputs);
628 for(unsigned int index = 0; index < cnt; ++index) {
629 char *in_name = NULL;
630 ret = ml_tensors_info_get_tensor_name(mInputInfoHandle, index, &in_name);
631 LOGI("index:%d with name %s", index, in_name);
632 if (ret != ML_ERROR_NONE) {
633 LOGE("Failed to request ml_tensors_info_get_tensor_name(%d).",
635 return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
641 mDesignated_inputs.insert(std::make_pair(std::string(in_name), index));
645 ret = ml_single_get_output_info(mSingle, &mOutputInfoHandle);
646 if (ret != ML_ERROR_NONE) {
647 LOGE("Failed to request ml_single_get_output_info(%d).", ret);
648 return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
652 ret = ml_tensors_info_get_count(mOutputInfoHandle, &cnt);
653 if (ret != ML_ERROR_NONE || cnt == 0) {
654 LOGE("Failed to request ml_tensors_info_get_count(%d) with cnt %u.", ret, cnt);
655 return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
658 LOGI("output tensor count = %u", cnt);
659 mDesignated_outputs.clear();
660 std::map<std::string, int>().swap(mDesignated_outputs);
661 for (unsigned int index = 0; index < cnt; ++index) {
662 char *out_name = NULL;
663 ret = ml_tensors_info_get_tensor_name(mOutputInfoHandle, index, &out_name);
664 LOGI("index:%u with name %s", index, out_name);
665 if (ret != ML_ERROR_NONE) {
666 LOGE("Failed to request ml_tensors_info_get_tensor_name(%d).",
668 return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
671 if (out_name == NULL)
674 mDesignated_outputs.insert(std::make_pair(std::string(out_name), index));
675 if (out_name != NULL){
681 return INFERENCE_ENGINE_ERROR_NONE;
684 int InferenceMLAPI::Run(
685 std::map<std::string, inference_engine_tensor_buffer> &input_buffers,
686 std::map<std::string, inference_engine_tensor_buffer> &output_buffers)
690 // Make sure to check if tensor buffer count and binding info one are same.
691 int err = CheckTensorBuffers(input_buffers, output_buffers);
692 if (err != INFERENCE_ENGINE_ERROR_NONE) {
696 #if defined(ENABLE_FAST)
697 err = ml_single_invoke_fast(mSingle, mInputDataHandle, mOutputDataHandle);
698 if (err != ML_ERROR_NONE) {
699 LOGE("Failed to request ml_single_invoke_fast(%d).", err);
700 return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
703 ml_tensors_data_h out_data = NULL;
706 unsigned int out_cnt;
708 err = ml_tensors_info_get_count(mOutputInfoHandle, &out_cnt);
709 if (err != ML_ERROR_NONE) {
710 LOGE("Failed to request ml_tensors_info_get_count(%d).", err);
711 return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
714 // Be carefull, ml_single_invoke() returns newly allocated output handle.
715 err = ml_single_invoke(mSingle, mInputDataHandle, &out_data);
716 if (err != ML_ERROR_NONE) {
717 LOGE("Failed to request ml_single_invoke(%d).", err);
718 return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
721 for (unsigned int i = 0; i < out_cnt; ++i) {
722 err = ml_tensors_data_get_tensor_data(out_data, i, &data_ptr, &data_size);
723 if (err != ML_ERROR_NONE) {
724 LOGE("Failed to request ml_tensors_data_get_tensor_data(%d).", err);
725 ml_tensors_data_destroy(out_data);
726 return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
729 // TODO. Remove memcpy() using ml_single_invoke_fill() later.
730 memcpy(output_buffers[i].buffer, data_ptr, output_buffers[i].size);
731 LOGI("Output tensor[%u] = %zu", i, output_buffers[i].size);
734 ml_tensors_data_destroy(out_data);
738 return INFERENCE_ENGINE_ERROR_NONE;
743 class IInferenceEngineCommon *EngineCommonInit(void)
747 InferenceMLAPI *engine = new InferenceMLAPI();
754 void EngineCommonDestroy(class IInferenceEngineCommon *engine)
764 } /* InferenceEngineImpl */