2 * Copyright (c) 2020 Samsung Electronics Co., Ltd All Rights Reserved
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #include <inference_engine_error.h>
18 #include "inference_engine_private_type.h"
19 #include "inference_engine_mlapi_private.h"
27 // TODO. Below is test code. DO NOT use ML internal function.
28 #define ENABLE_NO_ALLOC
29 #if defined(ENABLE_NO_ALLOC)
30 extern "C" int ml_single_invoke_no_alloc(ml_single_h single, const ml_tensors_data_h input, ml_tensors_data_h output);
33 namespace InferenceEngineImpl
37 InferenceMLAPI::InferenceMLAPI(void) :
46 mDesignated_outputs(),
55 InferenceMLAPI::~InferenceMLAPI()
57 mDesignated_inputs.clear();
58 std::vector<std::string>().swap(mDesignated_inputs);
60 mDesignated_outputs.clear();
61 std::vector<std::string>().swap(mDesignated_outputs);
63 ml_single_close(mSingle);
66 ml_tensors_info_destroy(mInputInfoHandle);
68 if (mOutputInfoHandle)
69 ml_tensors_info_destroy(mOutputInfoHandle);
72 ml_tensors_data_destroy(mInputDataHandle);
74 if (mOutputDataHandle)
75 ml_tensors_data_destroy(mOutputDataHandle);
77 mInputInfoHandle = NULL;
78 mOutputInfoHandle = NULL;
79 mInputDataHandle = NULL;
80 mOutputDataHandle = NULL;
83 int InferenceMLAPI::SetPrivateData(void *data)
87 inference_backend_type_e type =
88 *(static_cast<inference_backend_type_e *>(data));
90 if (INFERENCE_BACKEND_NONE >= type || INFERENCE_BACKEND_MAX <= type ||
91 INFERENCE_BACKEND_OPENCV == type) {
92 LOGE("Invalid backend type.");
93 return INFERENCE_ENGINE_ERROR_NOT_SUPPORTED;
100 return INFERENCE_ENGINE_ERROR_NONE;
103 int InferenceMLAPI::SetTargetDevices(int types)
107 LOGI("Inference targets are, ");
108 if (types & INFERENCE_TARGET_CPU) {
109 mTargetDevice |= INFERENCE_TARGET_CPU;
113 if (types & INFERENCE_TARGET_GPU) {
114 mTargetDevice |= INFERENCE_TARGET_GPU;
118 if (types & INFERENCE_TARGET_CUSTOM) {
119 mTargetDevice |= INFERENCE_TARGET_CUSTOM;
125 return INFERENCE_ENGINE_ERROR_NONE;
128 int InferenceMLAPI::Load(std::vector<std::string> model_paths,
129 inference_model_format_e model_format)
133 std::string model_str(model_paths[0]);
135 // TODO. Set NNFW backend type and HW type properly.
137 ml_nnfw_type_e nnfw_type;
138 ml_nnfw_hw_e nnfw_hw;
140 switch (mPluginType) {
141 case INFERENCE_BACKEND_NPU_VIVANTE:
142 nnfw_type = ML_NNFW_TYPE_VIVANTE;
143 nnfw_hw = ML_NNFW_HW_ANY;
144 LOGI("Vivante tensor filter will be used.");
146 if (access(model_str.c_str(), R_OK) ||
147 access(model_paths[1].c_str(), R_OK)) {
148 LOGE("model file path in [%s,%s]", model_str.c_str(),
149 model_paths[1].c_str());
150 return INFERENCE_ENGINE_ERROR_INVALID_PATH;
153 // ML Single API of MLAPI requires model_paths rule like below,
154 // "so library file path,nb model file path" or vise versa.
155 model_str += "," + model_paths[1];
157 case INFERENCE_BACKEND_ONE:
158 case INFERENCE_BACKEND_ARMNN:
159 case INFERENCE_BACKEND_TFLITE:
160 if (mPluginType == INFERENCE_BACKEND_ONE) {
161 nnfw_type = ML_NNFW_TYPE_NNFW;
163 if (mTargetDevice == INFERENCE_TARGET_CPU) {
164 nnfw_hw = ML_NNFW_HW_CPU_NEON;
165 LOGI("Target device is NEON.");
166 } else if (mTargetDevice == INFERENCE_TARGET_GPU) {
167 nnfw_hw = ML_NNFW_HW_GPU;
168 LOGI("Target device is GPU");
170 LOGE("Invalid inference target device type.");
171 return INFERENCE_ENGINE_ERROR_INVALID_PARAMETER;
174 LOGI("NNFW tensor filter will be used.");
177 if (mPluginType == INFERENCE_BACKEND_ARMNN) {
178 nnfw_type = ML_NNFW_TYPE_ARMNN;
179 LOGI("ARMNN tensor filter will be used.");
182 if (mPluginType == INFERENCE_BACKEND_TFLITE) {
183 nnfw_type = ML_NNFW_TYPE_TENSORFLOW_LITE;
184 LOGI("TFLITE tensor filter will be used.");
187 if (access(model_str.c_str(), R_OK)) {
188 LOGE("model file path in [%s]", model_str.c_str());
189 return INFERENCE_ENGINE_ERROR_INVALID_PATH;
195 LOGE("Invalid plugin type.");
196 return INFERENCE_ENGINE_ERROR_INVALID_PARAMETER;
199 LOGI("Model name = %s", model_str.c_str());
201 // TODO. create ml_tensor_info for input and output tensor and pass
202 // them as parameters of ml_single_open function.
204 int err = ml_single_open(&mSingle, model_str.c_str(), NULL, NULL,
206 if (err != ML_ERROR_NONE) {
207 LOGE("Failed to request ml_single_open(%d).", err);
208 return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
211 err = UpdateTensorsInfo();
212 if (err != INFERENCE_ENGINE_ERROR_NONE) {
213 ml_single_close(mSingle);
222 int InferenceMLAPI::GetInputTensorBuffers(
223 std::vector<inference_engine_tensor_buffer> &buffers)
227 // TODO. Implement this function according to a given ML Single API backend properly.
229 // ML Single API will always provide internal tensor buffers so
230 // get the tensor buffers back to Mediavision framework so that
231 // Mediavision framework doesn't allocate the tensor buffers internally.
238 ret = ml_tensors_info_get_count(mInputInfoHandle, &cnt);
239 if (ret != ML_ERROR_NONE) {
240 LOGE("Failed to request ml_tensors_info_get_count(%d).", ret);
241 return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
244 LOGI("input tensor count = %u", cnt);
246 // TODO. Below is test code, should we allocate new buffer for every inference?
247 if (mInputDataHandle == NULL) {
248 ret = ml_tensors_data_create(mInputInfoHandle, &mInputDataHandle);
249 if (ret != ML_ERROR_NONE) {
250 LOGE("Failed to request ml_tensors_data_create(%d).", ret);
251 return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
255 // TODO. Cache tensor info and reduce function call in UpdateTensorsInfo()
256 for (unsigned int i = 0; i < cnt; ++i) {
257 inference_engine_tensor_buffer in_buffer;
258 ml_tensor_type_e in_type;
260 ret = ml_tensors_data_get_tensor_data(mInputDataHandle, i, &in_buffer.buffer, &in_buffer.size);
261 if (ret != ML_ERROR_NONE) {
262 LOGE("Failed to request ml_tensors_data_get_tensor_data(%d).", ret);
263 return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
266 LOGE("buffer = %p, size = %d\n", in_buffer.buffer, in_buffer.size);
268 ret = ml_tensors_info_get_tensor_type(mInputInfoHandle, i, &in_type);
269 if (ret != ML_ERROR_NONE) {
270 LOGE("Failed to request ml_tensors_info_get_tensor_type(%d).", ret);
271 return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
274 LOGI("input tensor type = %d", in_type);
276 int type = ConvertTensorType(in_type);
278 return INFERENCE_ENGINE_ERROR_NOT_SUPPORTED;
281 in_buffer.data_type = static_cast<inference_tensor_data_type_e>(type);
282 in_buffer.owner_is_backend = 1;
284 buffers.push_back(in_buffer);
289 return INFERENCE_ENGINE_ERROR_NONE;
292 int InferenceMLAPI::GetOutputTensorBuffers(
293 std::vector<inference_engine_tensor_buffer> &buffers)
297 // TODO. Need to check if model file loading is done.
299 // ML Single API will always provide internal tensor buffers so
300 // get the tensor buffers back to Mediavision framework so that
301 // Mediavision framework doesn't allocate the tensor buffers internally.
308 ret = ml_tensors_info_get_count(mOutputInfoHandle, &cnt);
309 if (ret != ML_ERROR_NONE) {
310 LOGE("Failed to request ml_tensors_info_get_count(%d).", ret);
311 return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
314 LOGI("output tensor count = %u", cnt);
316 // TODO. Below is test code, should we allocate new buffer for every inference?
317 if (mOutputDataHandle == NULL) {
318 ret = ml_tensors_data_create(mOutputInfoHandle, &mOutputDataHandle);
319 if (ret != ML_ERROR_NONE) {
320 LOGE("Failed to request ml_tensors_data_create(%d).", ret);
321 return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
325 // TODO. Cache tensor info and reduce function call in UpdateTensorsInfo()
326 for (unsigned int i = 0; i < cnt; ++i) {
327 inference_engine_tensor_buffer out_buffer;
328 ml_tensor_type_e out_type;
330 ret = ml_tensors_data_get_tensor_data(mOutputDataHandle, i, &out_buffer.buffer, &out_buffer.size);
331 if (ret != ML_ERROR_NONE) {
332 LOGE("Failed to request ml_tensors_data_get_tensor_data(%d).", ret);
333 return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
336 LOGE("buffer = %p, size = %d\n", out_buffer.buffer, out_buffer.size);
338 ret = ml_tensors_info_get_tensor_type(mOutputInfoHandle, i, &out_type);
339 if (ret != ML_ERROR_NONE) {
340 LOGE("Failed to request ml_tensors_info_get_tensor_type(%d).", ret);
341 return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
344 LOGI("output tensor type = %d", out_type);
346 int type = ConvertTensorType(out_type);
348 return INFERENCE_ENGINE_ERROR_NOT_SUPPORTED;
351 out_buffer.data_type = static_cast<inference_tensor_data_type_e>(type);
352 out_buffer.owner_is_backend = 1;
354 buffers.push_back(out_buffer);
359 return INFERENCE_ENGINE_ERROR_NONE;
362 int InferenceMLAPI::GetInputLayerProperty(
363 inference_engine_layer_property &property)
367 // TODO. Need to check if model file loading is done.
371 ret = ml_tensors_info_get_count(mInputInfoHandle, &cnt);
372 if (ret != ML_ERROR_NONE) {
373 LOGE("Failed to request ml_tensors_info_get_count(%d).", ret);
374 return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
377 LOGI("input tensor count = %u", cnt);
379 for (unsigned int i = 0; i < cnt; ++i) {
380 inference_engine_tensor_info tensor_info;
381 ml_tensor_type_e in_type;
382 ml_tensor_dimension in_dim;
383 char *in_name = NULL;
386 ret = ml_tensors_info_get_tensor_type(mInputInfoHandle, i, &in_type);
387 if (ret != ML_ERROR_NONE) {
388 LOGE("Failed to request ml_tensors_info_get_tensor_type(%d).",
390 return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
393 LOGI("input tensor type = %d", in_type);
395 int type = ConvertTensorType(in_type);
397 return INFERENCE_ENGINE_ERROR_NOT_SUPPORTED;
400 ret = ml_tensors_info_get_tensor_dimension(mInputInfoHandle, i, in_dim);
401 if (ret != ML_ERROR_NONE) {
402 LOGE("Failed to request ml_tensors_info_get_tensor_dimension(%d).",
404 return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
407 LOGI("Input tensor dimension:");
408 for (unsigned int shape_idx = 0; shape_idx < ML_TENSOR_RANK_LIMIT; ++shape_idx) {
409 tensor_info.shape.push_back(in_dim[shape_idx]);
410 in_size *= static_cast<size_t>(in_dim[shape_idx]);
411 LOGI("%u", in_dim[shape_idx]);
414 LOGI("input tensor size = %zu", in_size);
416 ret = ml_tensors_info_get_tensor_name(mInputInfoHandle, i, &in_name);
417 if (ret != ML_ERROR_NONE) {
418 LOGE("Failed to request ml_tensors_info_get_tensor_name(%d).",
420 return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
423 LOGI("input tensor name = %s", in_name);
425 tensor_info.data_type = static_cast<inference_tensor_data_type_e>(type);
426 tensor_info.size = in_size;
428 property.tensor_infos.push_back(tensor_info);
430 // TODO. Compare tensor info from engine to one from a given property.
433 property.layer_names = mInputProperty.layer_names;
437 return INFERENCE_ENGINE_ERROR_NONE;
440 int InferenceMLAPI::GetOutputLayerProperty(
441 inference_engine_layer_property &property)
445 // TODO. Need to check if model file loading is done.
449 ret = ml_tensors_info_get_count(mOutputInfoHandle, &cnt);
450 if (ret != ML_ERROR_NONE) {
451 LOGE("Failed to request ml_tensors_info_get_count(%d).", ret);
452 return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
455 LOGI("output tensor count = %u", cnt);
457 for (unsigned int i = 0; i < cnt; ++i) {
458 inference_engine_tensor_info tensor_info;
459 ml_tensor_type_e out_type;
460 unsigned int out_dim[ML_TENSOR_RANK_LIMIT];
461 char *out_name = NULL;
464 ret = ml_tensors_info_get_tensor_type(mOutputInfoHandle, i, &out_type);
465 if (ret != ML_ERROR_NONE) {
466 LOGE("Failed to request ml_tensors_info_get_tensor_type(%d).",
468 return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
471 LOGI("output tensor type = %d", out_type);
473 int type = ConvertTensorType(out_type);
475 return INFERENCE_ENGINE_ERROR_NOT_SUPPORTED;
478 ret = ml_tensors_info_get_tensor_dimension(mOutputInfoHandle, i, out_dim);
479 if (ret != ML_ERROR_NONE) {
480 LOGE("Failed to request ml_tensors_info_get_tensor_dimension(%d).",
482 return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
487 LOGI("Output tensor dimension:");
489 for (unsigned int shape_idx = 0; shape_idx < ML_TENSOR_RANK_LIMIT; ++shape_idx) {
490 out_size *= static_cast<size_t>(out_dim[shape_idx]);
492 if (out_dim[shape_idx] == 1 && shape_size == 0)
493 shape_size = shape_idx;
495 LOGI("%d", out_dim[shape_idx]);
498 LOGI("Shape size of output tensor : %d", shape_size);
499 LOGI("Reversed output tensor dimension:");
501 // Reverse shape order.
502 for (int idx = shape_size; idx >= 0; --idx) {
503 tensor_info.shape.push_back(out_dim[idx]);
504 LOGI("%u", out_dim[idx]);
507 LOGI("output tensor size = %zu", out_size);
509 ret = ml_tensors_info_get_tensor_name(mOutputInfoHandle, i, &out_name);
510 if (ret != ML_ERROR_NONE) {
511 LOGE("Failed to request ml_tensors_info_get_tensor_name(%d).",
513 return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
516 LOGI("output tensor name = %s", out_name);
518 tensor_info.data_type = static_cast<inference_tensor_data_type_e>(type);
519 tensor_info.size = out_size;
521 property.tensor_infos.push_back(tensor_info);
523 // TODO. Compare tensor info from engine to one from a given property.
526 property.layer_names = mOutputProperty.layer_names;
530 return INFERENCE_ENGINE_ERROR_NONE;
533 int InferenceMLAPI::SetInputLayerProperty(
534 inference_engine_layer_property &property)
538 std::vector<std::string>::iterator iter;
539 for (iter = property.layer_names.begin();
540 iter != property.layer_names.end(); iter++) {
541 std::string name = *iter;
542 LOGI("input layer name = %s", name.c_str());
545 mDesignated_inputs.clear();
546 std::vector<std::string>().swap(mDesignated_inputs);
548 // TODO. Request input property information to a given ML Single API of nnstreamer backend,
549 // and set it instead of user-given one,
550 // Call UpdateTensorsInfo() after requesting input info.
551 mDesignated_inputs = property.layer_names;
552 mInputProperty = property;
556 return INFERENCE_ENGINE_ERROR_NONE;
559 int InferenceMLAPI::SetOutputLayerProperty(
560 inference_engine_layer_property &property)
564 std::vector<std::string>::iterator iter;
565 for (iter = property.layer_names.begin();
566 iter != property.layer_names.end(); iter++) {
567 std::string name = *iter;
568 LOGI("output layer name = %s", name.c_str());
571 mDesignated_outputs.clear();
572 std::vector<std::string>().swap(mDesignated_outputs);
574 // TODO. Request output property information to a given ML Single API of nnstreamer backend,
575 // and set it instead of user-given one,
576 // Call UpdateTensorsInfo() after requesting output info.
577 mDesignated_outputs = property.layer_names;
578 mOutputProperty = property;
582 return INFERENCE_ENGINE_ERROR_NONE;
585 int InferenceMLAPI::GetBackendCapacity(inference_engine_capacity *capacity)
589 if (capacity == NULL) {
590 LOGE("Bad pointer.");
591 return INFERENCE_ENGINE_ERROR_INVALID_PARAMETER;
594 // TODO. flag supported accel device types according to a given ML Single API of nnstreamer backend.
595 if (mPluginType == INFERENCE_BACKEND_NPU_VIVANTE) {
596 capacity->supported_accel_devices = INFERENCE_TARGET_CUSTOM;
598 capacity->supported_accel_devices = INFERENCE_TARGET_GPU |
599 INFERENCE_TARGET_CPU;
604 return INFERENCE_ENGINE_ERROR_NONE;
607 int InferenceMLAPI::CheckTensorBuffers(
608 std::vector<inference_engine_tensor_buffer> &input_buffers,
609 std::vector<inference_engine_tensor_buffer> &output_buffers)
615 return INFERENCE_ENGINE_ERROR_NONE;
618 int InferenceMLAPI::ConvertTensorType(int tensor_type)
622 switch (tensor_type) {
623 case ML_TENSOR_TYPE_FLOAT32:
624 return INFERENCE_TENSOR_DATA_TYPE_FLOAT32;
625 case ML_TENSOR_TYPE_UINT8:
626 return INFERENCE_TENSOR_DATA_TYPE_UINT8;
627 case ML_TENSOR_TYPE_UINT16:
628 return INFERENCE_TENSOR_DATA_TYPE_UINT16;
629 case ML_TENSOR_TYPE_INT64:
630 return INFERENCE_TENSOR_DATA_TYPE_INT64;
631 case ML_TENSOR_TYPE_UINT64:
632 return INFERENCE_TENSOR_DATA_TYPE_UINT64;
634 LOGE("Tensor type(%d) is invalid.", tensor_type);
635 return INFERENCE_ENGINE_ERROR_INVALID_PARAMETER;
643 int InferenceMLAPI::UpdateTensorsInfo()
648 LOGE("Invalid state, single-shot handle is not initialized.");
649 return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
652 if (mInputInfoHandle) {
653 ml_tensors_info_destroy(mInputInfoHandle);
654 mInputInfoHandle = NULL;
657 if (mOutputInfoHandle) {
658 ml_tensors_info_destroy(mOutputInfoHandle);
659 mOutputInfoHandle = NULL;
662 int ret = ml_single_get_input_info(mSingle, &mInputInfoHandle);
663 if (ret != ML_ERROR_NONE) {
664 LOGE("Failed to request ml_single_get_input_info(%d).", ret);
665 return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
668 ret = ml_single_get_output_info(mSingle, &mOutputInfoHandle);
669 if (ret != ML_ERROR_NONE) {
670 LOGE("Failed to request ml_single_get_output_info(%d).", ret);
671 return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
675 return INFERENCE_ENGINE_ERROR_NONE;
678 int InferenceMLAPI::Run(
679 std::vector<inference_engine_tensor_buffer> &input_buffers,
680 std::vector<inference_engine_tensor_buffer> &output_buffers)
684 // Make sure to check if tensor buffer count and binding info one are same.
685 int err = CheckTensorBuffers(input_buffers, output_buffers);
686 if (err != INFERENCE_ENGINE_ERROR_NONE) {
690 #if defined(ENABLE_NO_ALLOC)
691 err = ml_single_invoke_no_alloc(mSingle, mInputDataHandle, mOutputDataHandle);
692 if (err != ML_ERROR_NONE) {
693 LOGE("Failed to request ml_single_invoke_no_alloc(%d).", err);
694 return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
697 ml_tensors_data_h out_data = NULL;
700 unsigned int out_cnt;
702 err = ml_tensors_info_get_count(mOutputInfoHandle, &out_cnt);
703 if (err != ML_ERROR_NONE) {
704 LOGE("Failed to request ml_tensors_info_get_count(%d).", err);
705 return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
708 // Be carefull, ml_single_invoke() returns newly allocated output handle.
709 err = ml_single_invoke(mSingle, mInputDataHandle, &out_data);
710 if (err != ML_ERROR_NONE) {
711 LOGE("Failed to request ml_single_invoke(%d).", err);
712 return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
715 for (unsigned int i = 0; i < out_cnt; ++i) {
716 err = ml_tensors_data_get_tensor_data(out_data, i, &data_ptr, &data_size);
717 if (err != ML_ERROR_NONE) {
718 LOGE("Failed to request ml_tensors_data_get_tensor_data(%d).", err);
719 ml_tensors_data_destroy(out_data);
720 return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
723 // TODO. Remove memcpy() using ml_single_invoke_fill() later.
724 memcpy(output_buffers[i].buffer, data_ptr, output_buffers[i].size);
725 LOGI("Output tensor[%u] = %zu", i, output_buffers[i].size);
728 ml_tensors_data_destroy(out_data);
732 return INFERENCE_ENGINE_ERROR_NONE;
737 class IInferenceEngineCommon *EngineCommonInit(void)
741 InferenceMLAPI *engine = new InferenceMLAPI();
748 void EngineCommonDestroy(class IInferenceEngineCommon *engine)
758 } /* InferenceEngineImpl */