src/inference_engine_mlapi.cpp

   1 /**
   2  * Copyright (c) 2020 Samsung Electronics Co., Ltd All Rights Reserved
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  * http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 #include <inference_engine_error.h>
  18 #include "inference_engine_mlapi_private.h"
  19
  20 #include <fstream>
  21 #include <iostream>
  22 #include <unistd.h>
  23 #include <time.h>
  24 #include <queue>
  25
  26 namespace InferenceEngineImpl
  27 {
  28 namespace MLAPIImpl
  29 {
  30         InferenceMLAPI::InferenceMLAPI(void) :
  31                         mPluginType(),
  32                         mTargetDevice(),
  33                         mSingle(),
  34                         mDesignated_inputs(),
  35                         mDesignated_outputs(),
  36                         mInputProperty(),
  37                         mOutputProperty(),
  38                         mInputTensorBuffer(),
  39                         mOutputTensorBuffer(),
  40                         mInputTensorInfo(),
  41                         mOutputTensorInfo()
  42         {
  43                 LOGI("ENTER");
  44
  45                 LOGI("LEAVE");
  46         }
  47
  48         InferenceMLAPI::~InferenceMLAPI()
  49         {
  50                 mDesignated_inputs.clear();
  51                 std::vector<std::string>().swap(mDesignated_inputs);
  52
  53                 mDesignated_outputs.clear();
  54                 std::vector<std::string>().swap(mDesignated_outputs);
  55         }
  56
  57         int InferenceMLAPI::SetPrivateData(void *data)
  58         {
  59                 LOGI("ENTER");
  60
  61                 inference_backend_type_e type =
  62                                 *(static_cast<inference_backend_type_e *>(data));
  63
  64                 if (INFERENCE_BACKEND_NNFW != type && INFERENCE_BACKEND_MLAPI != type) {
  65                         LOGE("Invalid backend type.");
  66                         return INFERENCE_ENGINE_ERROR_NOT_SUPPORTED;
  67                 }
  68
  69                 mPluginType = type;
  70
  71                 LOGI("LEAVE");
  72
  73                 return INFERENCE_ENGINE_ERROR_NONE;
  74         }
  75
  76         int InferenceMLAPI::SetTargetDevices(int types)
  77         {
  78                 LOGI("ENTER");
  79
  80                 LOGI("Inference targets are, ");
  81                 if (types & INFERENCE_TARGET_CPU) {
  82                         mTargetDevice |= INFERENCE_TARGET_CPU;
  83                         LOGI("CPU");
  84                 }
  85
  86                 if (types & INFERENCE_TARGET_GPU) {
  87                         mTargetDevice |= INFERENCE_TARGET_GPU;
  88                         LOGI("GPU");
  89                 }
  90
  91                 if (types & INFERENCE_TARGET_CUSTOM) {
  92                         mTargetDevice |= INFERENCE_TARGET_CUSTOM;
  93                         LOGI("NPU");
  94                 }
  95
  96                 LOGI("LEAVE");
  97
  98                 return INFERENCE_ENGINE_ERROR_NONE;
  99         }
 100
 101         int InferenceMLAPI::Load(std::vector<std::string> model_paths,
 102                                                          inference_model_format_e model_format)
 103         {
 104                 LOGI("ENTER");
 105
 106                 // ML Single API of MLAPI requires model_paths rule like below,
 107                 // "so library file path,nb model file path" or vise versa.
 108                 std::string model_str(model_paths[0] + "," + model_paths[1]);
 109
 110                 LOGI("Model name = %s", model_str.c_str());
 111
 112                 // TODO. Set NNFW backend type and HW type properly.
 113
 114                 ml_nnfw_type_e nnfw_type;
 115                 ml_nnfw_hw_e nnfw_hw;
 116
 117                 switch (mPluginType) {
 118                 case INFERENCE_BACKEND_MLAPI:
 119                         // For now, backend type is MLAPI and target device type is CUSTOM then
 120                         // we will use Vivante NPU.
 121                         // TODO. other NPU should be considered later. I.e., SRNPU.
 122                         if ((mTargetDevice & INFERENCE_TARGET_CUSTOM) ==
 123                                 INFERENCE_TARGET_CUSTOM) {
 124                                 nnfw_type = ML_NNFW_TYPE_VIVANTE;
 125                                 nnfw_hw = ML_NNFW_HW_ANY;
 126                                 LOGI("Vivante tensor filter will be used.");
 127                         } else {
 128                                 LOGE("Invalid target device type.");
 129                                 return INFERENCE_ENGINE_ERROR_NOT_SUPPORTED;
 130                         }
 131                         break;
 132                 case INFERENCE_BACKEND_NNFW:
 133                         nnfw_type = ML_NNFW_TYPE_NNFW;
 134                         if (mTargetDevice == INFERENCE_TARGET_CPU) {
 135                                 nnfw_hw = ML_NNFW_HW_CPU_NEON;
 136                                 LOGI("Target device is NEON.");
 137                         } else if (mTargetDevice == INFERENCE_TARGET_GPU) {
 138                                 nnfw_hw = ML_NNFW_HW_GPU;
 139                                 LOGI("Target device is GPU");
 140                         } else {
 141                                 LOGE("Invalid inference target device type.");
 142                                 return INFERENCE_ENGINE_ERROR_INVALID_PARAMETER;
 143                         }
 144                         LOGI("NNFW tensor filter will be used.");
 145                         break;
 146                 // TODO.
 147                 default:
 148                         LOGE("Invalid plugin type.");
 149                         return INFERENCE_ENGINE_ERROR_INVALID_PARAMETER;
 150                 }
 151
 152                 int ret = ml_single_open(&mSingle, model_str.c_str(), NULL, NULL,
 153                                                                  nnfw_type, nnfw_hw);
 154                 if (ret != ML_ERROR_NONE) {
 155                         LOGE("Failed to request ml_single_open(%d).", ret);
 156                         return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
 157                 }
 158
 159                 LOGI("LEAVE");
 160
 161                 return INFERENCE_ENGINE_ERROR_NONE;
 162         }
 163
 164         int InferenceMLAPI::GetInputTensorBuffers(
 165                         std::vector<inference_engine_tensor_buffer> &buffers)
 166         {
 167                 LOGI("ENTER");
 168
 169                 // TODO. Implement this function according to a given ML Single API backend properly.
 170
 171                 LOGI("LEAVE");
 172
 173                 return INFERENCE_ENGINE_ERROR_NONE;
 174         }
 175
 176         int InferenceMLAPI::GetOutputTensorBuffers(
 177                         std::vector<inference_engine_tensor_buffer> &buffers)
 178         {
 179                 LOGI("ENTER");
 180
 181                 // Output tensor buffers will be allocated by a backend plugin of ML Single API of nnstreamer
 182                 // So add a null tensor buffer object. This buffer will be updated at Run callback.
 183
 184                 // Caution. this tensor buffer will be checked by upper framework to verity if
 185                 //                      the tensor buffer object is valid or not so fill dummy data to the tensor buffer.
 186
 187                 // TODO. Consider multiple output tensors.
 188
 189                 inference_engine_tensor_buffer tensor_buf = {
 190                         0,
 191                 };
 192                 tensor_buf.data_type = INFERENCE_TENSOR_DATA_TYPE_FLOAT16;
 193                 tensor_buf.buffer = (void *) 1;
 194                 tensor_buf.size = 1;
 195                 tensor_buf.owner_is_backend = 1;
 196                 buffers.push_back(tensor_buf);
 197
 198                 LOGI("LEAVE");
 199
 200                 return INFERENCE_ENGINE_ERROR_NONE;
 201         }
 202
 203         int InferenceMLAPI::GetInputLayerProperty(
 204                         inference_engine_layer_property &property)
 205         {
 206                 LOGI("ENTER");
 207
 208                 ml_tensors_info_h in_info = NULL;
 209
 210                 // TODO. Need to check if model file loading is done.
 211
 212                 int ret = ml_single_get_input_info(mSingle, &in_info);
 213                 if (ret != ML_ERROR_NONE) {
 214                         LOGE("Failed to request ml_single_get_input_info(%d).", ret);
 215                         return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
 216                 }
 217
 218                 unsigned int cnt;
 219                 ret = ml_tensors_info_get_count(in_info, &cnt);
 220                 if (ret != ML_ERROR_NONE) {
 221                         LOGE("Failed to request ml_tensors_info_get_count(%d).", ret);
 222                         return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
 223                 }
 224
 225                 LOGI("input tensor count = %u", cnt);
 226
 227                 for (unsigned int i = 0; i < cnt; ++i) {
 228                         ml_tensor_type_e in_type;
 229                         unsigned int in_dim;
 230                         char *in_name = NULL;
 231                         size_t in_size;
 232
 233                         ret = ml_tensors_info_get_tensor_type(in_info, i, &in_type);
 234                         if (ret != ML_ERROR_NONE) {
 235                                 LOGE("Failed to request ml_tensors_info_get_tensor_type(%d).",
 236                                          ret);
 237                                 return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
 238                         }
 239
 240                         LOGI("input tensor type = %d", in_type);
 241
 242                         ret = ml_tensors_info_get_tensor_dimension(in_info, i, &in_dim);
 243                         if (ret != ML_ERROR_NONE) {
 244                                 LOGE("Failed to request ml_tensors_info_get_tensor_dimension(%d).",
 245                                          ret);
 246                                 return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
 247                         }
 248
 249                         LOGI("input tensor dimension = %u", in_dim);
 250
 251                         ret = ml_tensors_info_get_tensor_name(in_info, i, &in_name);
 252                         if (ret != ML_ERROR_NONE) {
 253                                 LOGE("Failed to request ml_tensors_info_get_tensor_name(%d).",
 254                                          ret);
 255                                 return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
 256                         }
 257
 258                         LOGI("input tensor name = %s", in_name);
 259
 260                         ret = ml_tensors_info_get_tensor_size(in_info, i, &in_size);
 261                         if (ret != ML_ERROR_NONE) {
 262                                 LOGE("Failed to request ml_tensors_info_get_tensor_size(%d).",
 263                                          ret);
 264                                 return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
 265                         }
 266
 267                         LOGI("input tensor size = %u", in_size);
 268
 269                         // TODO. Compare tensor info from engine to one from a given property.
 270                 }
 271
 272                 property.layer_names = mInputProperty.layer_names;
 273
 274                 std::vector<inference_engine_tensor_info>::iterator iter;
 275                 for (iter = mInputProperty.tensor_infos.begin();
 276                          iter != mInputProperty.tensor_infos.end(); iter++) {
 277                         inference_engine_tensor_info tensor_info = *iter;
 278                         property.tensor_infos.push_back(tensor_info);
 279                 }
 280
 281                 LOGI("LEAVE");
 282
 283                 return INFERENCE_ENGINE_ERROR_NONE;
 284         }
 285
 286         int InferenceMLAPI::GetOutputLayerProperty(
 287                         inference_engine_layer_property &property)
 288         {
 289                 LOGI("ENTER");
 290
 291                 property.layer_names = mOutputProperty.layer_names;
 292
 293                 inference_engine_tensor_info tensor_info;
 294
 295                 // TODO. Set tensor info from a given ML Single API of nnstreamer backend instead of fixed one.
 296
 297                 tensor_info.data_type = INFERENCE_TENSOR_DATA_TYPE_FLOAT16;
 298                 tensor_info.shape = { 1, 1001 };
 299                 tensor_info.size = 1001;
 300                 property.tensor_infos.push_back(tensor_info);
 301
 302                 LOGI("LEAVE");
 303
 304                 return INFERENCE_ENGINE_ERROR_NONE;
 305         }
 306
 307         int InferenceMLAPI::SetInputLayerProperty(
 308                         inference_engine_layer_property &property)
 309         {
 310                 LOGI("ENTER");
 311
 312                 std::vector<std::string>::iterator iter;
 313                 for (iter = property.layer_names.begin();
 314                          iter != property.layer_names.end(); iter++) {
 315                         std::string name = *iter;
 316                         LOGI("input layer name = %s", name.c_str());
 317                 }
 318
 319                 mDesignated_inputs.clear();
 320                 std::vector<std::string>().swap(mDesignated_inputs);
 321
 322                 // TODO. Request input property information to a given ML Single API of nnstreamer backend,
 323                 // and set it instead of user-given one,
 324
 325                 mDesignated_inputs = property.layer_names;
 326                 mInputProperty = property;
 327
 328                 LOGI("LEAVE");
 329
 330                 return INFERENCE_ENGINE_ERROR_NONE;
 331         }
 332
 333         int InferenceMLAPI::SetOutputLayerProperty(
 334                         inference_engine_layer_property &property)
 335         {
 336                 LOGI("ENTER");
 337
 338                 std::vector<std::string>::iterator iter;
 339                 for (iter = property.layer_names.begin();
 340                          iter != property.layer_names.end(); iter++) {
 341                         std::string name = *iter;
 342                         LOGI("output layer name = %s", name.c_str());
 343                 }
 344
 345                 mDesignated_outputs.clear();
 346                 std::vector<std::string>().swap(mDesignated_outputs);
 347
 348                 // TODO. Request output property information to a given ML Single API of nnstreamer backend,
 349                 // and set it instead of user-given one,
 350
 351                 mDesignated_outputs = property.layer_names;
 352                 mOutputProperty = property;
 353
 354                 LOGI("LEAVE");
 355
 356                 return INFERENCE_ENGINE_ERROR_NONE;
 357         }
 358
 359         int InferenceMLAPI::GetBackendCapacity(inference_engine_capacity *capacity)
 360         {
 361                 LOGI("ENTER");
 362
 363                 if (capacity == NULL) {
 364                         LOGE("Bad pointer.");
 365                         return INFERENCE_ENGINE_ERROR_INVALID_PARAMETER;
 366                 }
 367
 368                 // TODO. flag supported accel device types according to a given ML Single API of nnstreamer backend.
 369                 if (mPluginType == INFERENCE_BACKEND_MLAPI) {
 370                         capacity->supported_accel_devices = INFERENCE_TARGET_CUSTOM;
 371                 } else {
 372                         capacity->supported_accel_devices = INFERENCE_TARGET_GPU |
 373                                                                                                 INFERENCE_TARGET_CPU;
 374                 }
 375
 376                 LOGI("LEAVE");
 377
 378                 return INFERENCE_ENGINE_ERROR_NONE;
 379         }
 380
 381         int InferenceMLAPI::CheckTensorBuffers(
 382                         std::vector<inference_engine_tensor_buffer> &input_buffers,
 383                         std::vector<inference_engine_tensor_buffer> &output_buffers)
 384         {
 385                 LOGI("ENTER");
 386
 387                 LOGI("LEAVE");
 388
 389                 return INFERENCE_ENGINE_ERROR_NONE;
 390         }
 391
 392         int InferenceMLAPI::Run(
 393                         std::vector<inference_engine_tensor_buffer> &input_buffers,
 394                         std::vector<inference_engine_tensor_buffer> &output_buffers)
 395         {
 396                 LOGI("ENTER");
 397
 398                 // Make sure to check if tensor buffer count and binding info one are same.
 399                 int err = CheckTensorBuffers(input_buffers, output_buffers);
 400                 if (err != INFERENCE_ENGINE_ERROR_NONE) {
 401                         return err;
 402                 }
 403
 404                 ml_tensors_info_h in_info = NULL;
 405
 406                 err = ml_single_get_input_info(mSingle, &in_info);
 407                 if (err != ML_ERROR_NONE) {
 408                         LOGE("Failed to request ml_single_get_input_info(%d).", err);
 409                         return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
 410                 }
 411
 412                 ml_tensors_data_h input_data = NULL;
 413                 err = ml_tensors_data_create(in_info, &input_data);
 414                 if (err != ML_ERROR_NONE) {
 415                         LOGE("Failed to request ml_tensors_data_create(%d).", err);
 416                         return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
 417                 }
 418
 419                 unsigned int cnt;
 420                 err = ml_tensors_info_get_count(in_info, &cnt);
 421                 if (err != ML_ERROR_NONE) {
 422                         LOGE("Failed to request ml_tensors_info_get_count(%d).", err);
 423                         return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
 424                 }
 425
 426                 for (unsigned int i = 0; i < cnt; ++i) {
 427                         LOGI("index(%d) : buffer = %p, size = %u\n", i,
 428                                  input_buffers[i].buffer, input_buffers[i].size);
 429                         err = ml_tensors_data_set_tensor_data(input_data, i,
 430                                                                                                   input_buffers[i].buffer,
 431                                                                                                   input_buffers[i].size);
 432                         if (err != ML_ERROR_NONE) {
 433                                 LOGE("Failed to request ml_tensors_data_set_tensor_data(%d).",
 434                                          err);
 435                                 return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
 436                         }
 437                 }
 438
 439                 ml_tensors_data_h output_data = NULL;
 440                 err = ml_single_invoke(mSingle, input_data, &output_data);
 441                 if (err != ML_ERROR_NONE) {
 442                         LOGE("Failed to request ml_single_invoke(%d).", err);
 443                         return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
 444                 }
 445
 446                 // TODO. Consider mutiple output tensors.
 447
 448                 err = ml_tensors_data_get_tensor_data(
 449                                 output_data, 0, (void **) &output_buffers[0].buffer,
 450                                 &output_buffers[0].size);
 451                 if (err != ML_ERROR_NONE) {
 452                         LOGE("Failed to request ml_tensors_data_get_tensor_data(%d).", err);
 453                         return INFERENCE_ENGINE_ERROR_INVALID_OPERATION;
 454                 }
 455
 456                 LOGI("Output tensor = %u", output_buffers[0].size);
 457
 458                 LOGI("LEAVE");
 459
 460                 return INFERENCE_ENGINE_ERROR_NONE;
 461         }
 462
 463         extern "C"
 464         {
 465                 class IInferenceEngineCommon *EngineCommonInit(void)
 466                 {
 467                         LOGI("ENTER");
 468
 469                         InferenceMLAPI *engine = new InferenceMLAPI();
 470
 471                         LOGI("LEAVE");
 472
 473                         return engine;
 474                 }
 475
 476                 void EngineCommonDestroy(class IInferenceEngineCommon *engine)
 477                 {
 478                         LOGI("ENTER");
 479
 480                         delete engine;
 481
 482                         LOGI("LEAVE");
 483                 }
 484         }
 485 } /* MLAPIImpl */
 486 } /* InferenceEngineImpl */