mv_machine_learning/object_detection/src/object_detection.cpp

   1 /**
   2  * Copyright (c) 2022 Samsung Electronics Co., Ltd All Rights Reserved
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  * http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 #include <string.h>
  18 #include <fstream>
  19 #include <map>
  20 #include <memory>
  21
  22 #include "machine_learning_exception.h"
  23 #include "mv_machine_learning_common.h"
  24 #include "mv_object_detection_config.h"
  25 #include "object_detection.h"
  26
  27 using namespace std;
  28 using namespace mediavision::inference;
  29 using namespace MediaVision::Common;
  30 using namespace mediavision::common;
  31 using namespace mediavision::machine_learning::exception;
  32
  33 namespace mediavision
  34 {
  35 namespace machine_learning
  36 {
  37 ObjectDetection::ObjectDetection(ObjectDetectionTaskType task_type, shared_ptr<MachineLearningConfig> config)
  38                 : _task_type(task_type), _config(config)
  39 {
  40         _inference = make_unique<Inference>();
  41 }
  42
  43 void ObjectDetection::preDestroy()
  44 {
  45         if (!_async_manager)
  46                 return;
  47
  48         _async_manager->stop();
  49 }
  50
  51 ObjectDetectionTaskType ObjectDetection::getTaskType()
  52 {
  53         return _task_type;
  54 }
  55
  56 void ObjectDetection::getEngineList()
  57 {
  58         for (auto idx = MV_INFERENCE_BACKEND_NONE + 1; idx < MV_INFERENCE_BACKEND_MAX; ++idx) {
  59                 auto backend = _inference->getSupportedInferenceBackend(idx);
  60                 // TODO. we need to describe what inference engines are supported by each Task API,
  61                 //       and based on it, below inference engine types should be checked
  62                 //       if a given type is supported by this Task API later. As of now, tflite only.
  63                 if (backend.second == true && backend.first.compare("tflite") == 0)
  64                         _valid_backends.push_back(backend.first);
  65         }
  66 }
  67
  68 void ObjectDetection::getDeviceList(const char *engine_type)
  69 {
  70         // TODO. add device types available for a given engine type later.
  71         //       In default, cpu and gpu only.
  72         _valid_devices.push_back("cpu");
  73         _valid_devices.push_back("gpu");
  74 }
  75
  76 void ObjectDetection::setEngineInfo(std::string engine_type_name, std::string device_type_name)
  77 {
  78         if (engine_type_name.empty() || device_type_name.empty())
  79                 throw InvalidParameter("Invalid engine info.");
  80
  81         transform(engine_type_name.begin(), engine_type_name.end(), engine_type_name.begin(), ::toupper);
  82         transform(device_type_name.begin(), device_type_name.end(), device_type_name.begin(), ::toupper);
  83
  84         int engine_type = GetBackendType(engine_type_name);
  85         int device_type = GetDeviceType(device_type_name);
  86
  87         if (engine_type == MEDIA_VISION_ERROR_INVALID_PARAMETER || device_type == MEDIA_VISION_ERROR_INVALID_PARAMETER)
  88                 throw InvalidParameter("backend or target device type not found.");
  89
  90         _config->setBackendType(engine_type);
  91         _config->setTargetDeviceType(device_type);
  92
  93         LOGI("Engine type : %s => %d, Device type : %s => %d", engine_type_name.c_str(), engine_type,
  94                  device_type_name.c_str(), device_type);
  95 }
  96
  97 void ObjectDetection::getNumberOfEngines(unsigned int *number_of_engines)
  98 {
  99         if (!_valid_backends.empty()) {
 100                 *number_of_engines = _valid_backends.size();
 101                 return;
 102         }
 103
 104         getEngineList();
 105         *number_of_engines = _valid_backends.size();
 106 }
 107
 108 void ObjectDetection::getEngineType(unsigned int engine_index, char **engine_type)
 109 {
 110         if (!_valid_backends.empty()) {
 111                 if (_valid_backends.size() <= engine_index)
 112                         throw InvalidParameter("Invalid engine index.");
 113
 114                 *engine_type = const_cast<char *>(_valid_backends[engine_index].data());
 115                 return;
 116         }
 117
 118         getEngineList();
 119
 120         if (_valid_backends.size() <= engine_index)
 121                 throw InvalidParameter("Invalid engine index.");
 122
 123         *engine_type = const_cast<char *>(_valid_backends[engine_index].data());
 124 }
 125
 126 void ObjectDetection::getNumberOfDevices(const char *engine_type, unsigned int *number_of_devices)
 127 {
 128         if (!_valid_devices.empty()) {
 129                 *number_of_devices = _valid_devices.size();
 130                 return;
 131         }
 132
 133         getDeviceList(engine_type);
 134         *number_of_devices = _valid_devices.size();
 135 }
 136
 137 void ObjectDetection::getDeviceType(const char *engine_type, const unsigned int device_index, char **device_type)
 138 {
 139         if (!_valid_devices.empty()) {
 140                 if (_valid_devices.size() <= device_index)
 141                         throw InvalidParameter("Invalid device index.");
 142
 143                 *device_type = const_cast<char *>(_valid_devices[device_index].data());
 144                 return;
 145         }
 146
 147         getDeviceList(engine_type);
 148
 149         if (_valid_devices.size() <= device_index)
 150                 throw InvalidParameter("Invalid device index.");
 151
 152         *device_type = const_cast<char *>(_valid_devices[device_index].data());
 153 }
 154
 155 void ObjectDetection::loadLabel()
 156 {
 157         if (_config->getLabelFilePath().empty())
 158                 return;
 159
 160         ifstream readFile;
 161
 162         _labels.clear();
 163         readFile.open(_config->getLabelFilePath().c_str());
 164
 165         if (readFile.fail())
 166                 throw InvalidOperation("Fail to open " + _config->getLabelFilePath() + " file.");
 167
 168         string line;
 169
 170         while (getline(readFile, line))
 171                 _labels.push_back(line);
 172
 173         readFile.close();
 174 }
 175
 176 void ObjectDetection::configure()
 177 {
 178         _config->loadMetaFile(make_unique<ObjectDetectionParser>(static_cast<int>(_task_type)));
 179         loadLabel();
 180
 181         int ret = _inference->bind(_config->getBackendType(), _config->getTargetDeviceType());
 182         if (ret != MEDIA_VISION_ERROR_NONE)
 183                 throw InvalidOperation("Fail to bind a backend engine.");
 184 }
 185
 186 void ObjectDetection::prepare()
 187 {
 188         int ret = _inference->configureInputMetaInfo(_config->getInputMetaMap());
 189         if (ret != MEDIA_VISION_ERROR_NONE)
 190                 throw InvalidOperation("Fail to configure input tensor info from meta file.");
 191
 192         ret = _inference->configureOutputMetaInfo(_config->getOutputMetaMap());
 193         if (ret != MEDIA_VISION_ERROR_NONE)
 194                 throw InvalidOperation("Fail to configure output tensor info from meta file.");
 195
 196         _inference->configureModelFiles("", _config->getModelFilePath(), "");
 197
 198         // Request to load model files to a backend engine.
 199         ret = _inference->load();
 200         if (ret != MEDIA_VISION_ERROR_NONE)
 201                 throw InvalidOperation("Fail to load model files.");
 202 }
 203
 204 shared_ptr<MetaInfo> ObjectDetection::getInputMetaInfo()
 205 {
 206         TensorBuffer &tensor_buffer = _inference->getInputTensorBuffer();
 207         IETensorBuffer &tensor_info_map = tensor_buffer.getIETensorBuffer();
 208
 209         // TODO. consider using multiple tensors later.
 210         if (tensor_info_map.size() != 1)
 211                 throw InvalidOperation("Input tensor count not invalid.");
 212
 213         auto tensor_buffer_iter = tensor_info_map.begin();
 214
 215         // Get the meta information corresponding to a given input tensor name.
 216         return _config->getInputMetaMap()[tensor_buffer_iter->first];
 217 }
 218
 219 template<typename T>
 220 void ObjectDetection::preprocess(mv_source_h &mv_src, shared_ptr<MetaInfo> metaInfo, vector<T> &inputVector)
 221 {
 222         LOGI("ENTER");
 223
 224         PreprocessConfig config = { false,
 225                                                                 metaInfo->colorSpace,
 226                                                                 metaInfo->dataType,
 227                                                                 metaInfo->getChannel(),
 228                                                                 metaInfo->getWidth(),
 229                                                                 metaInfo->getHeight() };
 230
 231         auto normalization = static_pointer_cast<DecodingNormal>(metaInfo->decodingTypeMap.at(DecodingType::NORMAL));
 232         if (normalization) {
 233                 config.normalize = normalization->use;
 234                 config.mean = normalization->mean;
 235                 config.std = normalization->std;
 236         }
 237
 238         auto quantization =
 239                         static_pointer_cast<DecodingQuantization>(metaInfo->decodingTypeMap.at(DecodingType::QUANTIZATION));
 240         if (quantization) {
 241                 config.quantize = quantization->use;
 242                 config.scale = quantization->scale;
 243                 config.zeropoint = quantization->zeropoint;
 244         }
 245
 246         _preprocess.setConfig(config);
 247         _preprocess.run<T>(mv_src, inputVector);
 248
 249         LOGI("LEAVE");
 250 }
 251
 252 template<typename T> void ObjectDetection::inference(vector<vector<T> > &inputVectors)
 253 {
 254         LOGI("ENTER");
 255
 256         int ret = _inference->run<T>(inputVectors);
 257         if (ret != MEDIA_VISION_ERROR_NONE)
 258                 throw InvalidOperation("Fail to run inference");
 259
 260         LOGI("LEAVE");
 261 }
 262
 263 template<typename T> void ObjectDetection::perform(mv_source_h &mv_src, shared_ptr<MetaInfo> metaInfo)
 264 {
 265         vector<T> inputVector;
 266
 267         preprocess<T>(mv_src, metaInfo, inputVector);
 268
 269         vector<vector<T> > inputVectors = { inputVector };
 270
 271         inference<T>(inputVectors);
 272
 273         // TODO. Update operation status here.
 274 }
 275
 276 void ObjectDetection::perform(mv_source_h &mv_src)
 277 {
 278         shared_ptr<MetaInfo> metaInfo = getInputMetaInfo();
 279         if (metaInfo->dataType == MV_INFERENCE_DATA_UINT8)
 280                 perform<unsigned char>(mv_src, metaInfo);
 281         else if (metaInfo->dataType == MV_INFERENCE_DATA_FLOAT32)
 282                 perform<float>(mv_src, metaInfo);
 283         else
 284                 throw InvalidOperation("Invalid model data type.");
 285 }
 286
 287 template<typename T> void ObjectDetection::performAsync(ObjectDetectionInput &input, shared_ptr<MetaInfo> metaInfo)
 288 {
 289         if (!_async_manager) {
 290                 _async_manager = make_unique<AsyncManager<ObjectDetectionResult> >([this]() {
 291                         AsyncInputQueue<T> inputQueue = _async_manager->popFromInput<T>();
 292
 293                         inference<T>(inputQueue.inputs);
 294
 295                         ObjectDetectionResult &resultQueue = result();
 296
 297                         resultQueue.frame_number = inputQueue.frame_number;
 298                         _async_manager->pushToOutput(resultQueue);
 299                 });
 300         }
 301
 302         vector<T> inputVector;
 303
 304         preprocess<T>(input.inference_src, metaInfo, inputVector);
 305
 306         vector<vector<T> > inputVectors = { inputVector };
 307
 308         _async_manager->push(inputVectors);
 309 }
 310
 311 void ObjectDetection::performAsync(ObjectDetectionInput &input)
 312 {
 313         shared_ptr<MetaInfo> metaInfo = getInputMetaInfo();
 314
 315         if (metaInfo->dataType == MV_INFERENCE_DATA_UINT8) {
 316                 performAsync<unsigned char>(input, metaInfo);
 317         } else if (metaInfo->dataType == MV_INFERENCE_DATA_FLOAT32) {
 318                 performAsync<float>(input, metaInfo);
 319                 // TODO
 320         } else {
 321                 throw InvalidOperation("Invalid model data type.");
 322         }
 323 }
 324
 325 ObjectDetectionResult &ObjectDetection::getOutput()
 326 {
 327         if (_async_manager) {
 328                 if (!_async_manager->isWorking())
 329                         throw InvalidOperation("Object detection has been already destroyed so invalid operation.");
 330
 331                 _current_result = _async_manager->pop();
 332         } else {
 333                 // TODO. Check if inference request is completed or not here.
 334                 //       If not then throw an exception.
 335                 _current_result = result();
 336         }
 337
 338         return _current_result;
 339 }
 340
 341 ObjectDetectionResult &ObjectDetection::getOutputCache()
 342 {
 343         return _current_result;
 344 }
 345
 346 void ObjectDetection::getOutputNames(vector<string> &names)
 347 {
 348         TensorBuffer &tensor_buffer_obj = _inference->getOutputTensorBuffer();
 349         IETensorBuffer &ie_tensor_buffer = tensor_buffer_obj.getIETensorBuffer();
 350
 351         for (IETensorBuffer::iterator it = ie_tensor_buffer.begin(); it != ie_tensor_buffer.end(); it++)
 352                 names.push_back(it->first);
 353 }
 354
 355 void ObjectDetection::getOutputTensor(string target_name, vector<float> &tensor)
 356 {
 357         TensorBuffer &tensor_buffer_obj = _inference->getOutputTensorBuffer();
 358
 359         inference_engine_tensor_buffer *tensor_buffer = tensor_buffer_obj.getTensorBuffer(target_name);
 360         if (!tensor_buffer)
 361                 throw InvalidOperation("Fail to get tensor buffer.");
 362
 363         auto raw_buffer = static_cast<float *>(tensor_buffer->buffer);
 364
 365         copy(&raw_buffer[0], &raw_buffer[tensor_buffer->size / sizeof(float)], back_inserter(tensor));
 366 }
 367
 368 template void ObjectDetection::preprocess<float>(mv_source_h &mv_src, shared_ptr<MetaInfo> metaInfo,
 369                                                                                                  vector<float> &inputVector);
 370 template void ObjectDetection::inference<float>(vector<vector<float> > &inputVectors);
 371 template void ObjectDetection::perform<float>(mv_source_h &mv_src, shared_ptr<MetaInfo> metaInfo);
 372 template void ObjectDetection::performAsync<float>(ObjectDetectionInput &input, shared_ptr<MetaInfo> metaInfo);
 373
 374 template void ObjectDetection::preprocess<unsigned char>(mv_source_h &mv_src, shared_ptr<MetaInfo> metaInfo,
 375                                                                                                                  vector<unsigned char> &inputVector);
 376 template void ObjectDetection::inference<unsigned char>(vector<vector<unsigned char> > &inputVectors);
 377 template void ObjectDetection::perform<unsigned char>(mv_source_h &mv_src, shared_ptr<MetaInfo> metaInfo);
 378 template void ObjectDetection::performAsync<unsigned char>(ObjectDetectionInput &input, shared_ptr<MetaInfo> metaInfo);
 379
 380 }
 381 }