src/inference_engine_mlapi.cpp

   1 /**
   2  * Copyright (c) 2020 Samsung Electronics Co., Ltd All Rights Reserved
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  * http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 #include <inference_engine_error.h>
  18 #include "inference_engine_private_type.h"
  19 #include "inference_engine_mlapi_private.h"
  20
  21 #include <fstream>
  22 #include <iostream>
  23 #include <unistd.h>
  24 #include <time.h>
  25 #include <queue>
  26 #include <tuple>
  27
  28 // TODO. Below is test code. DO NOT use ML internal function.
  29 #define ENABLE_FAST
  30 #if defined(ENABLE_FAST)
  31 extern "C" int ml_single_invoke_fast(ml_single_h single, const ml_tensors_data_h input, ml_tensors_data_h output);
  32 #endif
  33
  34 namespace InferenceEngineImpl
  35 {
  36 namespace MLAPIImpl
  37 {
  38         InferenceMLAPI::InferenceMLAPI(void) :
  39                         mPluginType(),
  40                         mTargetDevice(),
  41                         mSingle(),
  42                         mInputInfoHandle(),
  43                         mOutputInfoHandle(),
  44                         mInputDataHandle(),
  45                         mOutputDataHandle(),
  46                         mDesignated_inputs(),
  47                         mDesignated_outputs(),
  48                         mInputProperty(),
  49                         mOutputProperty()
  50         {
  51                 LOGI("ENTER");
  52
  53                 LOGI("LEAVE");
  54         }
  55
  56         InferenceMLAPI::~InferenceMLAPI()
  57         {
  58                 mDesignated_inputs.clear();
  59                 std::map<std::string, int>().swap(mDesignated_inputs);
  60
  61                 mDesignated_outputs.clear();
  62                 std::map<std::string, int>().swap(mDesignated_outputs);
  63
  64                 ml_single_close(mSingle);
  65
  66                 if (mInputInfoHandle)
  67                         ml_tensors_info_destroy(mInputInfoHandle);
  68
  69                 if (mOutputInfoHandle)
  70                         ml_tensors_info_destroy(mOutputInfoHandle);
  71
  72                 if (mInputDataHandle)
  73                         ml_tensors_data_destroy(mInputDataHandle);
  74
  75                 if (mOutputDataHandle)
  76                         ml_tensors_data_destroy(mOutputDataHandle);
  77
  78                 mInputInfoHandle = NULL;
  79                 mOutputInfoHandle = NULL;
  80                 mInputDataHandle = NULL;
  81                 mOutputDataHandle = NULL;
  82         }
  83
  84         int InferenceMLAPI::SetPrivateData(void *data)
  85         {
  86                 LOGI("ENTER");
  87
  88                 inference_backend_type_e type =
  89                                 *(static_cast<inference_backend_type_e *>(data));
  90
  91                 if (INFERENCE_BACKEND_NONE >= type || INFERENCE_BACKEND_MAX <= type ||
  92                                 INFERENCE_BACKEND_OPENCV == type) {
  93                         LOGE("Invalid backend type.(%d)", type);
  94                         return INFERENCE_ENGINE_ERROR_NOT_SUPPORTED;
  95                 }
  96
  97                 mPluginType = type;
  98                 LOGI("backend type.(%d)", type);
  99                 LOGI("LEAVE");
 100
 101                 return INFERENCE_ENGINE_ERROR_NONE;
 102         }
 103
 104         int InferenceMLAPI::SetTargetDevices(int types)
 105         {
 106                 LOGI("ENTER");
 107
 108                 LOGI("Inference targets are, ");
 109                 if (types & INFERENCE_TARGET_CPU) {
 110                         mTargetDevice |= INFERENCE_TARGET_CPU;
 111                         LOGI("CPU");
 112                 }
 113
 114                 if (types & INFERENCE_TARGET_GPU) {
 115                         mTargetDevice |= INFERENCE_TARGET_GPU;
 116                         LOGI("GPU");
 117                 }
 118
 119                 if (types & INFERENCE_TARGET_CUSTOM) {
 120                         mTargetDevice |= INFERENCE_TARGET_CUSTOM;
 121                         LOGI("NPU");
 122                 }
 123
 124                 LOGI("LEAVE");
 125
 126                 return INFERENCE_ENGINE_ERROR_NONE;
 127         }
 128
 129         int InferenceMLAPI::SetCLTuner(const inference_engine_cltuner *cltuner)
 130         {
 131                 LOGI("ENTER");
 132
 133                 // TODO. let's wait until CLTuner feature is ready for NNFW tensor
 134                 //       filter which is a ONERT runtime backend of MLAPI.
 135
 136                 LOGI("LEAVE");
 137
 138                 return INFERENCE_ENGINE_ERROR_NONE;
 139         }
 140
 141         int InferenceMLAPI::SetTensorInfo(ml_tensors_info_h& tensor_info,
 142                                                                           inference_engine_layer_property& layer_property)
 143         {
 144                 int     err = ml_tensors_info_set_count(tensor_info, layer_property.layers.size());
 145                 if (err != ML_ERROR_NONE) {
 146                         LOGE("Failed to set tensor count(%d).", err);
 147                         return INFERENCE_ENGINE_ERROR_INVALID_PARAMETER;
 148                 }
 149
 150                 size_t layer_idx = 0;
 151
 152                 for (auto& iter : layer_property.layers) {
 153                         inference_engine_tensor_info& info = iter.second;
 154
 155                         int tensor_type = 0;
 156
 157                         try {
 158                                 tensor_type = ConvertTensorTypeToMLAPI(info.data_type);
 159                         } catch (const std::invalid_argument& ex) {
 160                                 LOGE("Error (%s) (%d)", ex.what(), info.data_type);
 161                                 return INFERENCE_ENGINE_ERROR_INVALID_PARAMETER;
 162                         }
 163
 164                         err = ml_tensors_info_set_tensor_type(tensor_info, layer_idx, static_cast<ml_tensor_type_e>(tensor_type));
 165                         if (err != ML_ERROR_NONE) {
 166                                 LOGE("Failed to set tensor type(%d).", err);
 167                                 return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
 168                         }
 169
 170                         // TODO. nnstreamer needs fixed dimention with 4 for nntrainer tensor filter. Why??
 171                         std::vector<unsigned int> indim(4, 1);
 172
 173                         LOGI("Input tensor(%zu) shape:", layer_idx);
 174
 175                         std::copy(info.shape.begin(), info.shape.end(), indim.begin());
 176
 177                         for (auto& shape_value : indim)
 178                                 LOGI("%u", shape_value);
 179
 180                         err = ml_tensors_info_set_tensor_dimension(tensor_info, layer_idx, indim.data());
 181                         if (err != ML_ERROR_NONE) {
 182                                 LOGE("Failed to set tensor dimension(%d).", err);
 183                                 return INFERENCE_ENGINE_ERROR_INVALID_PARAMETER;
 184                         }
 185
 186                         layer_idx++;
 187                 }
 188
 189                 return INFERENCE_ENGINE_ERROR_NONE;
 190         }
 191
 192         int InferenceMLAPI::CreateMLAPITensorInfo(ml_tensors_info_h& tensor_info,
 193                                                                                           inference_engine_layer_property& layer_property)
 194         {
 195                 if (layer_property.layers.empty()) {
 196                         LOGE("input or output property is empty.");
 197                         return INFERENCE_ENGINE_ERROR_INVALID_PARAMETER;
 198                 }
 199
 200                 int err = ml_tensors_info_create(&tensor_info);
 201                 if (err != ML_ERROR_NONE) {
 202                         LOGE("Failed to create tensor info(%d).", err);
 203                         return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
 204                 }
 205
 206                 err = SetTensorInfo(tensor_info, layer_property);
 207                 if (err != INFERENCE_ENGINE_ERROR_NONE)
 208                         ml_tensors_info_destroy(tensor_info);
 209
 210                 return err;
 211         }
 212
 213         std::tuple<ml_nnfw_type_e, ml_nnfw_hw_e> InferenceMLAPI::GetNNFWInfo()
 214         {
 215                 switch (mPluginType) {
 216                 case INFERENCE_BACKEND_NPU_VIVANTE:
 217                         LOGI("Vivante tensor filter will be used.");
 218                         return std::make_tuple(ML_NNFW_TYPE_VIVANTE, ML_NNFW_HW_ANY);
 219
 220                 case INFERENCE_BACKEND_ONE:
 221                         LOGI("NNFW tensor filter will be used.");
 222
 223                         if (mTargetDevice == INFERENCE_TARGET_CPU) {
 224                                 LOGI("Target device is NEON.");
 225                                 return std::make_tuple(ML_NNFW_TYPE_NNFW, ML_NNFW_HW_CPU_NEON);
 226                         } else if (mTargetDevice == INFERENCE_TARGET_GPU) {
 227                                 LOGI("Target device is GPU");
 228                                 return std::make_tuple(ML_NNFW_TYPE_NNFW, ML_NNFW_HW_GPU);
 229                         }
 230
 231                         LOGE("Invalid inference target device type.");
 232                         throw std::invalid_argument("invalid tensor type.");
 233
 234                 case INFERENCE_BACKEND_ARMNN:
 235                         LOGI("ARMNN tensor filter will be used.");
 236                         return std::make_tuple(ML_NNFW_TYPE_ARMNN, ML_NNFW_HW_ANY);
 237
 238                 case INFERENCE_BACKEND_TFLITE:
 239                         LOGI("TFLITE tensor filter will be used.");
 240                         return std::make_tuple(ML_NNFW_TYPE_TENSORFLOW_LITE, ML_NNFW_HW_ANY);
 241
 242                 case INFERENCE_BACKEND_SNPE:
 243                         LOGI("SNPE tensor filter will be used.");
 244                         return std::make_tuple(ML_NNFW_TYPE_SNPE, ML_NNFW_HW_ANY);
 245
 246                 case INFERENCE_BACKEND_NNTRAINER:
 247                         LOGI("NNTRAINER tensor filter will be used.");
 248                         return std::make_tuple(ML_NNFW_TYPE_NNTR_INF, ML_NNFW_HW_ANY);
 249
 250                 default:
 251                         LOGE("Invalid plugin type.");
 252                         throw std::invalid_argument("invalid tensor type.");
 253                 }
 254         }
 255
 256         bool InferenceMLAPI::IsFileReadable(const std::string& path)
 257         {
 258                 if (access(path.c_str(), R_OK) == -1) {
 259                         LOGE("file [%s] is not readable, errno(%d)", path.c_str(), errno);
 260                         return false;
 261                 }
 262
 263                 return true;
 264         }
 265
 266         std::string InferenceMLAPI::GetModelPath(const std::vector<std::string>& model_paths)
 267         {
 268                 switch (mPluginType) {
 269                 case INFERENCE_BACKEND_NPU_VIVANTE:
 270                         if (!IsFileReadable(model_paths[0]) ||
 271                                 !IsFileReadable(model_paths[1]))
 272                                 throw std::runtime_error("invalid path");
 273
 274                         // ML Single API of MLAPI requires model_paths rule like below,
 275                         // "so library file path,nb model file path" or vise versa.
 276                         return model_paths[0] + "," + model_paths[1];
 277
 278                 case INFERENCE_BACKEND_ONE:
 279                         /* fall through */
 280                 case INFERENCE_BACKEND_ARMNN:
 281                         /* fall through */
 282                 case INFERENCE_BACKEND_TFLITE:
 283                         /* fall through */
 284                 case INFERENCE_BACKEND_SNPE:
 285                         /* fall through */
 286                 case INFERENCE_BACKEND_NNTRAINER:
 287                         if (!IsFileReadable(model_paths[0]))
 288                                 throw std::runtime_error("invalid path");
 289                         return model_paths[0];
 290
 291                 default:
 292                         throw std::runtime_error("shouldn't be reach here");
 293                 }
 294         }
 295
 296         const char *InferenceMLAPI::GetCustomProp()
 297         {
 298                 if (mPluginType != INFERENCE_BACKEND_SNPE)
 299                         return "";
 300
 301                 return mTargetDevice == INFERENCE_TARGET_CPU ? "RUNTIME:CPU" :
 302                            mTargetDevice == INFERENCE_TARGET_GPU ? "RUNTIME:GPU" : "RUNTIME:DSP";
 303         }
 304
 305         int InferenceMLAPI::Load(std::vector<std::string> model_paths,
 306                                                          inference_model_format_e model_format)
 307         {
 308                 LOGI("ENTER");
 309
 310                 std::string model_str;
 311
 312                 ml_nnfw_type_e nnfw_type = ML_NNFW_TYPE_ANY;
 313                 ml_nnfw_hw_e nnfw_hw = ML_NNFW_HW_ANY;
 314
 315                 try {
 316                         std::tie(nnfw_type, nnfw_hw) = GetNNFWInfo();
 317                         model_str = GetModelPath(model_paths);
 318                 } catch (const std::invalid_argument& ex) {
 319                         LOGE("Get NNFW info Error (%s)", ex.what());
 320                         return INFERENCE_ENGINE_ERROR_INVALID_PARAMETER;
 321                 } catch (const std::runtime_error& ex) {
 322                         LOGE("Get model path Error (%s)", ex.what());
 323                         return INFERENCE_ENGINE_ERROR_INVALID_PATH;
 324                 }
 325
 326                 LOGI("Model name = %s", model_str.c_str());
 327
 328                 ml_tensors_info_h in_info = NULL, out_info = NULL;
 329
 330                 // In case of nntrainer tensor filter, input and output tensor
 331                 // information is needed to load a given model.
 332                 if (mPluginType == INFERENCE_BACKEND_NNTRAINER) {
 333                         int ret = CreateMLAPITensorInfo(in_info, mInputProperty);
 334                         if (ret != INFERENCE_ENGINE_ERROR_NONE)
 335                                 return ret;
 336
 337                         ret = CreateMLAPITensorInfo(out_info, mOutputProperty);
 338                         if (ret != INFERENCE_ENGINE_ERROR_NONE) {
 339                                 ml_tensors_info_destroy(in_info);
 340                                 return ret;
 341                         }
 342                 }
 343
 344                 int err = ml_single_open_full(&mSingle, model_str.c_str(), in_info, out_info,
 345                                                                  nnfw_type, nnfw_hw, GetCustomProp());
 346                 if (err != ML_ERROR_NONE) {
 347                         LOGE("Failed to request ml_single_open_full(%d).", err);
 348                         ml_tensors_info_destroy(in_info);
 349                         ml_tensors_info_destroy(out_info);
 350                         return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
 351                 }
 352
 353                 ml_tensors_info_destroy(in_info);
 354                 ml_tensors_info_destroy(out_info);
 355
 356                 if (mInputInfoHandle) {
 357                         ml_tensors_info_destroy(mInputInfoHandle);
 358                         mInputInfoHandle = NULL;
 359                 }
 360
 361                 err = ml_single_get_input_info(mSingle, &mInputInfoHandle);
 362                 if (err != ML_ERROR_NONE) {
 363                         LOGE("Failed to request ml_single_get_input_info(%d).", err);
 364                         return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
 365                 }
 366
 367                 if (mOutputInfoHandle) {
 368                         ml_tensors_info_destroy(mOutputInfoHandle);
 369                         mOutputInfoHandle = NULL;
 370                 }
 371
 372                 err = ml_single_get_output_info(mSingle, &mOutputInfoHandle);
 373                 if (err != ML_ERROR_NONE) {
 374                         LOGE("Failed to request ml_single_get_output_info(%d).", err);
 375                         ml_tensors_info_destroy(mOutputInfoHandle);
 376                         return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
 377                 }
 378
 379                 err = UpdateTensorsInfo();
 380                 if (err != INFERENCE_ENGINE_ERROR_NONE) {
 381                         ml_single_close(mSingle);
 382                         mSingle = NULL;
 383                 }
 384
 385                 LOGI("LEAVE");
 386
 387                 return err;
 388         }
 389
 390         int InferenceMLAPI::GetInputTensorBuffers(
 391                         std::map<std::string, inference_engine_tensor_buffer> &buffers)
 392         {
 393                 LOGI("ENTER");
 394
 395                 // TODO. Implement this function according to a given ML Single API backend properly.
 396
 397                 // ML Single API will always provide internal tensor buffers so
 398                 // get the tensor buffers back to Mediavision framework so that
 399                 // Mediavision framework doesn't allocate the tensor buffers internally.
 400
 401                 buffers.clear();
 402
 403                 int ret = INFERENCE_ENGINE_ERROR_NONE;
 404
 405                 // TODO. Below is test code, should we allocate new buffer for every inference?
 406                 if (mInputDataHandle == NULL) {
 407                         ret = ml_tensors_data_create(mInputInfoHandle, &mInputDataHandle);
 408                         if (ret != ML_ERROR_NONE) {
 409                                 LOGE("Failed to request ml_tensors_data_create(%d).", ret);
 410                                 return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
 411                         }
 412                 }
 413
 414                 // TODO. Cache tensor info and reduce function call in UpdateTensorsInfo()
 415                 for (auto& input : mDesignated_inputs) {
 416                         inference_engine_tensor_buffer in_buffer;
 417                         ml_tensor_type_e in_type;
 418
 419                         ret = ml_tensors_data_get_tensor_data(mInputDataHandle, input.second, &in_buffer.buffer, &in_buffer.size);
 420                         if (ret != ML_ERROR_NONE) {
 421                                 LOGE("Failed to request ml_tensors_data_get_tensor_data(%d).", ret);
 422                                 ml_tensors_data_destroy(mInputDataHandle);
 423
 424                                 return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
 425                         }
 426
 427                         LOGE("buffer = %p, size = %zu\n", in_buffer.buffer, in_buffer.size);
 428
 429                         ret = ml_tensors_info_get_tensor_type(mInputInfoHandle, input.second, &in_type);
 430                         if (ret != ML_ERROR_NONE) {
 431                                 LOGE("Failed to request ml_tensors_info_get_tensor_type(%d).", ret);
 432                                 ml_tensors_data_destroy(mInputDataHandle);
 433
 434                                 return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
 435                         }
 436
 437                         LOGI("input tensor type = %d", in_type);
 438
 439                         int type = 0;
 440
 441                         try {
 442                                 type = ConvertTensorTypeToInternal(in_type);
 443                         } catch (const std::invalid_argument& ex) {
 444                                 LOGE("Error (%s) (%d)", ex.what(), in_type);
 445                                 ml_tensors_data_destroy(mInputDataHandle);
 446
 447                                 return INFERENCE_ENGINE_ERROR_INVALID_PARAMETER;
 448                         }
 449
 450                         in_buffer.data_type = static_cast<inference_tensor_data_type_e>(type);
 451                         in_buffer.owner_is_backend = 1;
 452
 453                         buffers.insert(std::make_pair(input.first, in_buffer));
 454                 }
 455
 456                 LOGI("LEAVE");
 457
 458                 return INFERENCE_ENGINE_ERROR_NONE;
 459         }
 460
 461         int InferenceMLAPI::GetOutputTensorBuffers(
 462                         std::map<std::string, inference_engine_tensor_buffer> &buffers)
 463         {
 464                 LOGI("ENTER");
 465
 466                 // TODO. Need to check if model file loading is done.
 467
 468                 // ML Single API will always provide internal tensor buffers so
 469                 // get the tensor buffers back to Mediavision framework so that
 470                 // Mediavision framework doesn't allocate the tensor buffers internally.
 471
 472                 buffers.clear();
 473
 474                 int ret = INFERENCE_ENGINE_ERROR_NONE;
 475
 476                 // TODO. Below is test code, should we allocate new buffer for every inference?
 477                 if (mOutputDataHandle == NULL) {
 478                         ret = ml_tensors_data_create(mOutputInfoHandle, &mOutputDataHandle);
 479                         if (ret != ML_ERROR_NONE) {
 480                                 LOGE("Failed to request ml_tensors_data_create(%d).", ret);
 481                                 return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
 482                         }
 483                 }
 484
 485                 // TODO. Cache tensor info and reduce function call in UpdateTensorsInfo()
 486                 for (auto& output : mDesignated_outputs) {
 487                         inference_engine_tensor_buffer out_buffer;
 488                         ml_tensor_type_e out_type;
 489
 490                         ret = ml_tensors_data_get_tensor_data(mOutputDataHandle, output.second, &out_buffer.buffer, &out_buffer.size);
 491                         if (ret != ML_ERROR_NONE) {
 492                                 LOGE("Failed to request ml_tensors_data_get_tensor_data(%d).", ret);
 493                                 ml_tensors_data_destroy(mOutputDataHandle);
 494
 495                                 return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
 496                         }
 497
 498                         LOGE("buffer = %p, size = %zu\n", out_buffer.buffer, out_buffer.size);
 499
 500                         ret = ml_tensors_info_get_tensor_type(mOutputInfoHandle, output.second, &out_type);
 501                         if (ret != ML_ERROR_NONE) {
 502                                 LOGE("Failed to request ml_tensors_info_get_tensor_type(%d).", ret);
 503                                 ml_tensors_data_destroy(mOutputDataHandle);
 504
 505                                 return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
 506                         }
 507
 508                         LOGI("output tensor type = %d", out_type);
 509
 510                         int type = 0;
 511
 512                         try {
 513                                 type = ConvertTensorTypeToInternal(out_type);
 514                         } catch (const std::invalid_argument& ex) {
 515                                 LOGE("Error (%s) (%d)", ex.what(), out_type);
 516                                 ml_tensors_data_destroy(mOutputDataHandle);
 517
 518                                 return INFERENCE_ENGINE_ERROR_INVALID_PARAMETER;
 519                         }
 520
 521                         out_buffer.data_type = static_cast<inference_tensor_data_type_e>(type);
 522                         out_buffer.owner_is_backend = 1;
 523
 524                         buffers.insert(std::make_pair(output.first, out_buffer));
 525                 }
 526
 527                 LOGI("LEAVE");
 528
 529                 return INFERENCE_ENGINE_ERROR_NONE;
 530         }
 531
 532         int InferenceMLAPI::GetInputLayerProperty(
 533                         inference_engine_layer_property &property)
 534         {
 535                 LOGI("ENTER");
 536
 537                 // TODO. Need to check if model file loading is done.
 538                 int ret = INFERENCE_ENGINE_ERROR_NONE;
 539
 540                 for (auto& input : mDesignated_inputs) {
 541                         inference_engine_tensor_info tensor_info;
 542                         ml_tensor_type_e in_type;
 543                         ml_tensor_dimension in_dim;
 544                         size_t in_size = 1;
 545
 546                         ret = ml_tensors_info_get_tensor_type(mInputInfoHandle, input.second, &in_type);
 547                         if (ret != ML_ERROR_NONE) {
 548                                 LOGE("Failed to request ml_tensors_info_get_tensor_type(%d).",
 549                                          ret);
 550                                 return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
 551                         }
 552
 553                         LOGI("input tensor type = %d", in_type);
 554
 555                         int type = 0;
 556
 557                         try {
 558                                 type = ConvertTensorTypeToInternal(in_type);
 559                         } catch (const std::invalid_argument& ex) {
 560                                 LOGE("Error (%s) (%d)", ex.what(), in_type);
 561                                 return INFERENCE_ENGINE_ERROR_INVALID_PARAMETER;
 562                         }
 563
 564                         ret = ml_tensors_info_get_tensor_dimension(mInputInfoHandle, input.second, in_dim);
 565                         if (ret != ML_ERROR_NONE) {
 566                                 LOGE("Failed to request ml_tensors_info_get_tensor_dimension(%d).",
 567                                          ret);
 568                                 return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
 569                         }
 570
 571                         LOGI("Input tensor dimension:");
 572                         for (unsigned int shape_idx = 0; shape_idx < ML_TENSOR_RANK_LIMIT; ++shape_idx) {
 573                                 tensor_info.shape.push_back(in_dim[shape_idx]);
 574                                 in_size *= static_cast<size_t>(in_dim[shape_idx]);
 575                                 LOGI("%u", in_dim[shape_idx]);
 576                         }
 577
 578                         LOGI("input tensor size = %zu", in_size);
 579
 580                         LOGI("input tensor name = %s", input.first.c_str());
 581
 582                         tensor_info.data_type = static_cast<inference_tensor_data_type_e>(type);
 583                         tensor_info.size = in_size;
 584                         tensor_info.shape_type = INFERENCE_TENSOR_SHAPE_NCHW;
 585
 586                         property.layers.insert(std::make_pair(input.first, tensor_info));
 587
 588                         // TODO. Compare tensor info from engine to one from a given property.
 589                 }
 590
 591                 LOGI("LEAVE");
 592
 593                 return INFERENCE_ENGINE_ERROR_NONE;
 594         }
 595
 596         int InferenceMLAPI::GetOutputLayerProperty(
 597                         inference_engine_layer_property &property)
 598         {
 599                 LOGI("ENTER");
 600
 601                 // TODO. Need to check if model file loading is done.
 602                 int ret = INFERENCE_ENGINE_ERROR_NONE;
 603
 604                 for (auto& output : mDesignated_outputs) {
 605                         inference_engine_tensor_info tensor_info;
 606                         ml_tensor_type_e out_type;
 607                         unsigned int out_dim[ML_TENSOR_RANK_LIMIT];
 608                         size_t out_size = 1;
 609
 610                         ret = ml_tensors_info_get_tensor_type(mOutputInfoHandle, output.second, &out_type);
 611                         if (ret != ML_ERROR_NONE) {
 612                                 LOGE("Failed to request ml_tensors_info_get_tensor_type(%d).",
 613                                          ret);
 614                                 return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
 615                         }
 616
 617                         LOGI("output tensor type = %d", out_type);
 618
 619                         int type = 0;
 620
 621                         try {
 622                                 type = ConvertTensorTypeToInternal(out_type);
 623                         } catch (const std::invalid_argument& ex) {
 624                                 LOGE("Error (%s) (%d)", ex.what(), out_type);
 625                                 return INFERENCE_ENGINE_ERROR_INVALID_PARAMETER;
 626                         }
 627
 628                         ret = ml_tensors_info_get_tensor_dimension(mOutputInfoHandle, output.second, out_dim);
 629                         if (ret != ML_ERROR_NONE) {
 630                                 LOGE("Failed to request ml_tensors_info_get_tensor_dimension(%d).",
 631                                          ret);
 632                                 return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
 633                         }
 634
 635                         int shape_size = 0;
 636
 637                         LOGI("Output tensor dimension:");
 638
 639                         for (unsigned int shape_idx = 0; shape_idx < ML_TENSOR_RANK_LIMIT; ++shape_idx) {
 640                                 out_size *= static_cast<size_t>(out_dim[shape_idx]);
 641
 642                                 if (out_dim[shape_idx] == 1 && shape_size == 0)
 643                                         shape_size = shape_idx;
 644
 645                                 LOGI("%d", out_dim[shape_idx]);
 646                         }
 647
 648                         LOGI("Shape size of output tensor : %d", shape_size);
 649                         LOGI("Reversed output tensor dimension:");
 650
 651                         // Reverse shape order.
 652                         for (int idx = shape_size; idx >= 0; --idx) {
 653                                 tensor_info.shape.push_back(out_dim[idx]);
 654                                 LOGI("%u", out_dim[idx]);
 655                         }
 656
 657                         LOGI("output tensor size = %zu", out_size);
 658
 659                         LOGI("output tensor name = %s", output.first.c_str());
 660
 661                         tensor_info.data_type = static_cast<inference_tensor_data_type_e>(type);
 662                         tensor_info.size = out_size;
 663                         tensor_info.shape_type = INFERENCE_TENSOR_SHAPE_NCHW;
 664
 665                         property.layers.insert(std::make_pair(output.first, tensor_info));
 666
 667                         // TODO. Compare tensor info from engine to one from a given property.
 668                 }
 669
 670                 LOGI("LEAVE");
 671
 672                 return INFERENCE_ENGINE_ERROR_NONE;
 673         }
 674
 675         int InferenceMLAPI::SetInputLayerProperty(
 676                         inference_engine_layer_property &property)
 677         {
 678                 LOGI("ENTER");
 679
 680                 for (auto& layer : property.layers) {
 681                         LOGI("input layer name = %s", layer.first.c_str());
 682                 }
 683
 684                 mDesignated_inputs.clear();
 685                 std::map<std::string, int>().swap(mDesignated_inputs);
 686
 687                 // TODO. Request input property information to a given ML Single API of nnstreamer backend,
 688                 // and set it instead of user-given one,
 689                 // Call UpdateTensorsInfo() after requesting input info.
 690
 691                 mInputProperty = property;
 692
 693                 LOGI("LEAVE");
 694
 695                 return INFERENCE_ENGINE_ERROR_NONE;
 696         }
 697
 698         int InferenceMLAPI::SetOutputLayerProperty(
 699                         inference_engine_layer_property &property)
 700         {
 701                 LOGI("ENTER");
 702
 703                 for (auto& layer : property.layers) {
 704                         LOGI("output layer name = %s", layer.first.c_str());
 705                 }
 706
 707                 mDesignated_outputs.clear();
 708                 std::map<std::string, int>().swap(mDesignated_outputs);
 709
 710                 // TODO. Request output property information to a given ML Single API of nnstreamer backend,
 711                 // and set it instead of user-given one,
 712                 // Call UpdateTensorsInfo() after requesting output info.
 713
 714                 mOutputProperty = property;
 715
 716                 LOGI("LEAVE");
 717
 718                 return INFERENCE_ENGINE_ERROR_NONE;
 719         }
 720
 721         int InferenceMLAPI::GetBackendCapacity(inference_engine_capacity *capacity)
 722         {
 723                 LOGI("ENTER");
 724
 725                 if (capacity == NULL) {
 726                         LOGE("Bad pointer.");
 727                         return INFERENCE_ENGINE_ERROR_INVALID_PARAMETER;
 728                 }
 729
 730                 switch (mPluginType) {
 731                 case INFERENCE_BACKEND_NPU_VIVANTE:
 732                         capacity->supported_accel_devices = INFERENCE_TARGET_CUSTOM;
 733                         break;
 734                 case INFERENCE_BACKEND_SNPE:
 735                         capacity->supported_accel_devices = INFERENCE_TARGET_GPU |
 736                                                                                                 INFERENCE_TARGET_CPU |
 737                                                                                                 INFERENCE_TARGET_CUSTOM;
 738                         break;
 739                 default:
 740                         capacity->supported_accel_devices = INFERENCE_TARGET_GPU |
 741                                                                                                 INFERENCE_TARGET_CPU;
 742                         break;
 743                 }
 744
 745                 LOGI("LEAVE");
 746
 747                 return INFERENCE_ENGINE_ERROR_NONE;
 748         }
 749
 750         int InferenceMLAPI::CheckTensorBuffers(
 751                         std::map<std::string, inference_engine_tensor_buffer> &input_buffers,
 752                         std::map<std::string, inference_engine_tensor_buffer> &output_buffers)
 753         {
 754                 LOGI("ENTER");
 755
 756                 LOGI("LEAVE");
 757
 758                 return INFERENCE_ENGINE_ERROR_NONE;
 759         }
 760
 761         int InferenceMLAPI::ConvertTensorTypeToInternal(int tensor_type)
 762         {
 763                 LOGI("ENTER");
 764
 765                 int converted_type = 0;
 766
 767                 switch (tensor_type) {
 768                 case ML_TENSOR_TYPE_FLOAT32:
 769                         converted_type = INFERENCE_TENSOR_DATA_TYPE_FLOAT32;
 770                         break;
 771                 case ML_TENSOR_TYPE_UINT8:
 772                         converted_type = INFERENCE_TENSOR_DATA_TYPE_UINT8;
 773                         break;
 774                 case ML_TENSOR_TYPE_UINT16:
 775                         converted_type = INFERENCE_TENSOR_DATA_TYPE_UINT16;
 776                         break;
 777                 case ML_TENSOR_TYPE_INT64:
 778                         converted_type = INFERENCE_TENSOR_DATA_TYPE_INT64;
 779                         break;
 780                 case ML_TENSOR_TYPE_UINT64:
 781                         converted_type = INFERENCE_TENSOR_DATA_TYPE_UINT64;
 782                         break;
 783                 default:
 784                         throw std::invalid_argument("invalid tensor type.");
 785                 }
 786
 787                 LOGI("LEAVE");
 788
 789                 return converted_type;
 790         }
 791
 792         int InferenceMLAPI::ConvertTensorTypeToMLAPI(int tensor_type)
 793         {
 794                 LOGI("ENTER");
 795
 796                 int converted_type = 0;
 797
 798                 switch (tensor_type) {
 799                 case INFERENCE_TENSOR_DATA_TYPE_FLOAT32:
 800                         converted_type = ML_TENSOR_TYPE_FLOAT32;
 801                         break;
 802                 case INFERENCE_TENSOR_DATA_TYPE_UINT8:
 803                         converted_type = ML_TENSOR_TYPE_UINT8;
 804                         break;
 805                 case INFERENCE_TENSOR_DATA_TYPE_UINT16:
 806                         converted_type = ML_TENSOR_TYPE_UINT16;
 807                         break;
 808                 case INFERENCE_TENSOR_DATA_TYPE_INT64:
 809                         converted_type = ML_TENSOR_TYPE_INT64;
 810                         break;
 811                 case INFERENCE_TENSOR_DATA_TYPE_UINT64:
 812                         converted_type = ML_TENSOR_TYPE_UINT64;
 813                         break;
 814                 default:
 815                         throw std::invalid_argument("invalid tensor type.");
 816                 }
 817
 818                 LOGI("LEAVE");
 819
 820                 return converted_type;
 821         }
 822
 823         int InferenceMLAPI::UpdateTensorsInfo()
 824         {
 825                 LOGI("ENTER");
 826
 827                 if (!mSingle) {
 828                         LOGE("Invalid state, single-shot handle is not initialized.");
 829                         return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
 830                 }
 831
 832                 unsigned int input_tensor_cnt = 0;
 833
 834                 // If user-given input layer information exists then use it.
 835                 if (!mInputProperty.layers.empty()) {
 836                         for (auto& iter : mInputProperty.layers) {
 837                                 LOGI("index:%d with name %s", input_tensor_cnt, iter.first.c_str());
 838                                 mDesignated_inputs.insert(std::make_pair(iter.first, input_tensor_cnt));
 839                                 input_tensor_cnt++;
 840                         }
 841                 // Otherwise, request input layer information to tensor filter.
 842                 } else {
 843                         int ret = ml_tensors_info_get_count(mInputInfoHandle, &input_tensor_cnt);
 844                         if (ret != ML_ERROR_NONE || !input_tensor_cnt) {
 845                                 LOGE("Failed to request ml_tensors_info_get_count(%d).", ret);
 846                                 return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
 847                         }
 848
 849                         for(unsigned int index = 0; index < input_tensor_cnt; ++index) {
 850                                 char *in_name = NULL;
 851                                 ret = ml_tensors_info_get_tensor_name(mInputInfoHandle, index, &in_name);
 852                                 LOGI("index:%d with name %s", index, in_name);
 853                                 if (ret != ML_ERROR_NONE) {
 854                                         LOGE("Failed to request ml_tensors_info_get_tensor_name(%d).", ret);
 855                                         return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
 856                                 }
 857
 858                                 if (in_name == NULL)
 859                                         continue;
 860
 861                                 mDesignated_inputs.insert(std::make_pair(std::string(in_name), index));
 862                                 free(in_name);
 863                         }
 864                 }
 865
 866                 LOGI("input tensor count = %u", input_tensor_cnt);
 867
 868                 unsigned int output_tensor_cnt = 0;
 869
 870                 // If user-given output layer information exists then use it.
 871                 if (!mOutputProperty.layers.empty()) {
 872                         int index = 0;
 873                         for(auto& iter : mOutputProperty.layers){
 874                                 LOGI("index:%d with name %s", index, iter.first.c_str());
 875                                 mDesignated_outputs.insert(std::make_pair(iter.first, index));
 876                                 index++;
 877                         }
 878
 879                         output_tensor_cnt = index;
 880                 // Otherwise, request output layer information to tensor filter.
 881                 } else {
 882                         int ret = ml_tensors_info_get_count(mOutputInfoHandle, &output_tensor_cnt);
 883                         if (ret != ML_ERROR_NONE || output_tensor_cnt == 0) {
 884                                 LOGE("Failed to request ml_tensors_info_get_count(%d).", output_tensor_cnt);
 885                                 return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
 886                         }
 887
 888                         for (unsigned int index = 0; index < output_tensor_cnt; ++index) {
 889                                 char *out_name = NULL;
 890
 891                                 ret = ml_tensors_info_get_tensor_name(mOutputInfoHandle, index, &out_name);
 892                                 LOGI("index:%u with name %s", index, out_name);
 893                                 if (ret != ML_ERROR_NONE) {
 894                                         LOGE("Failed to request ml_tensors_info_get_tensor_name(%d).",
 895                                                  ret);
 896                                         return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
 897                                 }
 898
 899                                 if (out_name == NULL)
 900                                         continue;
 901
 902                                 mDesignated_outputs.insert(std::make_pair(std::string(out_name), index));
 903                                 free(out_name);
 904                         }
 905                 }
 906
 907                 LOGI("output tensor count = %u", output_tensor_cnt);
 908
 909                 LOGI("LEAVE");
 910                 return INFERENCE_ENGINE_ERROR_NONE;
 911         }
 912
 913         int InferenceMLAPI::Run(
 914                         std::map<std::string, inference_engine_tensor_buffer> &input_buffers,
 915                         std::map<std::string, inference_engine_tensor_buffer> &output_buffers)
 916         {
 917                 LOGI("ENTER");
 918
 919                 // Make sure to check if tensor buffer count and binding info one are same.
 920                 int err = CheckTensorBuffers(input_buffers, output_buffers);
 921                 if (err != INFERENCE_ENGINE_ERROR_NONE) {
 922                         return err;
 923                 }
 924
 925 #if defined(ENABLE_FAST)
 926                 err = ml_single_invoke_fast(mSingle, mInputDataHandle, mOutputDataHandle);
 927                 if (err != ML_ERROR_NONE) {
 928                         LOGE("Failed to request ml_single_invoke_fast(%d).", err);
 929                         return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
 930                 }
 931 #else
 932                 ml_tensors_data_h out_data = NULL;
 933                 void *data_ptr;
 934                 size_t data_size;
 935                 unsigned int out_cnt;
 936
 937                 err = ml_tensors_info_get_count(mOutputInfoHandle, &out_cnt);
 938                 if (err != ML_ERROR_NONE) {
 939                         LOGE("Failed to request ml_tensors_info_get_count(%d).", err);
 940                         return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
 941                 }
 942
 943                 // Be carefull, ml_single_invoke() returns newly allocated output handle.
 944                 err = ml_single_invoke(mSingle, mInputDataHandle, &out_data);
 945                 if (err != ML_ERROR_NONE) {
 946                         LOGE("Failed to request ml_single_invoke(%d).", err);
 947                         return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
 948                 }
 949
 950                 for (unsigned int i = 0; i < out_cnt; ++i) {
 951                         err = ml_tensors_data_get_tensor_data(out_data, i, &data_ptr, &data_size);
 952                         if (err != ML_ERROR_NONE) {
 953                                 LOGE("Failed to request ml_tensors_data_get_tensor_data(%d).", err);
 954                                 ml_tensors_data_destroy(out_data);
 955                                 return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
 956                         }
 957
 958                         // TODO. Remove memcpy() using ml_single_invoke_fill() later.
 959                         memcpy(output_buffers[i].buffer, data_ptr, output_buffers[i].size);
 960                         LOGI("Output tensor[%u] = %zu", i, output_buffers[i].size);
 961                 }
 962
 963                 ml_tensors_data_destroy(out_data);
 964 #endif
 965                 LOGI("LEAVE");
 966
 967                 return INFERENCE_ENGINE_ERROR_NONE;
 968         }
 969
 970         extern "C"
 971         {
 972                 class IInferenceEngineCommon *EngineCommonInit(void)
 973                 {
 974                         LOGI("ENTER");
 975
 976                         InferenceMLAPI *engine = new InferenceMLAPI();
 977
 978                         LOGI("LEAVE");
 979
 980                         return engine;
 981                 }
 982
 983                 void EngineCommonDestroy(class IInferenceEngineCommon *engine)
 984                 {
 985                         LOGI("ENTER");
 986
 987                         delete engine;
 988
 989                         LOGI("LEAVE");
 990                 }
 991         }
 992 } /* MLAPIImpl */
 993 } /* InferenceEngineImpl */