mv_machine_learning/object_detection/src/object_detection.cpp

   1 /**
   2  * Copyright (c) 2022 Samsung Electronics Co., Ltd All Rights Reserved
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  * http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 #include <string.h>
  18 #include <fstream>
  19 #include <map>
  20 #include <memory>
  21
  22 #include "machine_learning_exception.h"
  23 #include "mv_machine_learning_common.h"
  24 #include "mv_object_detection_config.h"
  25 #include "object_detection.h"
  26
  27 using namespace std;
  28 using namespace mediavision::inference;
  29 using namespace MediaVision::Common;
  30 using namespace mediavision::common;
  31 using namespace mediavision::machine_learning::exception;
  32
  33 namespace mediavision
  34 {
  35 namespace machine_learning
  36 {
  37 template<typename T>
  38 ObjectDetection<T>::ObjectDetection(ObjectDetectionTaskType task_type, shared_ptr<MachineLearningConfig> config)
  39                 : _task_type(task_type), _config(config)
  40 {
  41         _inference = make_unique<Inference>();
  42 }
  43
  44 template<typename T> void ObjectDetection<T>::preDestroy()
  45 {
  46         if (!_async_manager)
  47                 return;
  48
  49         _async_manager->stop();
  50 }
  51
  52 template<typename T> ObjectDetectionTaskType ObjectDetection<T>::getTaskType()
  53 {
  54         return _task_type;
  55 }
  56
  57 template<typename T> void ObjectDetection<T>::getEngineList()
  58 {
  59         for (auto idx = MV_INFERENCE_BACKEND_NONE + 1; idx < MV_INFERENCE_BACKEND_MAX; ++idx) {
  60                 auto backend = _inference->getSupportedInferenceBackend(idx);
  61                 // TODO. we need to describe what inference engines are supported by each Task API,
  62                 //       and based on it, below inference engine types should be checked
  63                 //       if a given type is supported by this Task API later. As of now, tflite only.
  64                 if (backend.second == true && backend.first.compare("tflite") == 0)
  65                         _valid_backends.push_back(backend.first);
  66         }
  67 }
  68
  69 template<typename T> void ObjectDetection<T>::getDeviceList(const char *engine_type)
  70 {
  71         // TODO. add device types available for a given engine type later.
  72         //       In default, cpu and gpu only.
  73         _valid_devices.push_back("cpu");
  74         _valid_devices.push_back("gpu");
  75 }
  76
  77 template<typename T> void ObjectDetection<T>::setEngineInfo(std::string engine_type_name, std::string device_type_name)
  78 {
  79         if (engine_type_name.empty() || device_type_name.empty())
  80                 throw InvalidParameter("Invalid engine info.");
  81
  82         transform(engine_type_name.begin(), engine_type_name.end(), engine_type_name.begin(), ::toupper);
  83         transform(device_type_name.begin(), device_type_name.end(), device_type_name.begin(), ::toupper);
  84
  85         int engine_type = GetBackendType(engine_type_name);
  86         int device_type = GetDeviceType(device_type_name);
  87
  88         if (engine_type == MEDIA_VISION_ERROR_INVALID_PARAMETER || device_type == MEDIA_VISION_ERROR_INVALID_PARAMETER)
  89                 throw InvalidParameter("backend or target device type not found.");
  90
  91         _config->setBackendType(engine_type);
  92         _config->setTargetDeviceType(device_type);
  93
  94         LOGI("Engine type : %s => %d, Device type : %s => %d", engine_type_name.c_str(), engine_type,
  95                  device_type_name.c_str(), device_type);
  96 }
  97
  98 template<typename T> void ObjectDetection<T>::getNumberOfEngines(unsigned int *number_of_engines)
  99 {
 100         if (!_valid_backends.empty()) {
 101                 *number_of_engines = _valid_backends.size();
 102                 return;
 103         }
 104
 105         getEngineList();
 106         *number_of_engines = _valid_backends.size();
 107 }
 108
 109 template<typename T> void ObjectDetection<T>::getEngineType(unsigned int engine_index, char **engine_type)
 110 {
 111         if (!_valid_backends.empty()) {
 112                 if (_valid_backends.size() <= engine_index)
 113                         throw InvalidParameter("Invalid engine index.");
 114
 115                 *engine_type = const_cast<char *>(_valid_backends[engine_index].data());
 116                 return;
 117         }
 118
 119         getEngineList();
 120
 121         if (_valid_backends.size() <= engine_index)
 122                 throw InvalidParameter("Invalid engine index.");
 123
 124         *engine_type = const_cast<char *>(_valid_backends[engine_index].data());
 125 }
 126
 127 template<typename T>
 128 void ObjectDetection<T>::getNumberOfDevices(const char *engine_type, unsigned int *number_of_devices)
 129 {
 130         if (!_valid_devices.empty()) {
 131                 *number_of_devices = _valid_devices.size();
 132                 return;
 133         }
 134
 135         getDeviceList(engine_type);
 136         *number_of_devices = _valid_devices.size();
 137 }
 138
 139 template<typename T>
 140 void ObjectDetection<T>::getDeviceType(const char *engine_type, const unsigned int device_index, char **device_type)
 141 {
 142         if (!_valid_devices.empty()) {
 143                 if (_valid_devices.size() <= device_index)
 144                         throw InvalidParameter("Invalid device index.");
 145
 146                 *device_type = const_cast<char *>(_valid_devices[device_index].data());
 147                 return;
 148         }
 149
 150         getDeviceList(engine_type);
 151
 152         if (_valid_devices.size() <= device_index)
 153                 throw InvalidParameter("Invalid device index.");
 154
 155         *device_type = const_cast<char *>(_valid_devices[device_index].data());
 156 }
 157
 158 template<typename T> void ObjectDetection<T>::loadLabel()
 159 {
 160         if (_config->getLabelFilePath().empty())
 161                 return;
 162
 163         ifstream readFile;
 164
 165         _labels.clear();
 166         readFile.open(_config->getLabelFilePath().c_str());
 167
 168         if (readFile.fail())
 169                 throw InvalidOperation("Fail to open " + _config->getLabelFilePath() + " file.");
 170
 171         string line;
 172
 173         while (getline(readFile, line))
 174                 _labels.push_back(line);
 175
 176         readFile.close();
 177 }
 178
 179 template<typename T> void ObjectDetection<T>::configure()
 180 {
 181         loadLabel();
 182
 183         int ret = _inference->bind(_config->getBackendType(), _config->getTargetDeviceType());
 184         if (ret != MEDIA_VISION_ERROR_NONE)
 185                 throw InvalidOperation("Fail to bind a backend engine.");
 186 }
 187
 188 template<typename T> void ObjectDetection<T>::prepare()
 189 {
 190         int ret = _inference->configureInputMetaInfo(_config->getInputMetaMap());
 191         if (ret != MEDIA_VISION_ERROR_NONE)
 192                 throw InvalidOperation("Fail to configure input tensor info from meta file.");
 193
 194         ret = _inference->configureOutputMetaInfo(_config->getOutputMetaMap());
 195         if (ret != MEDIA_VISION_ERROR_NONE)
 196                 throw InvalidOperation("Fail to configure output tensor info from meta file.");
 197
 198         _inference->configureModelFiles("", _config->getModelFilePath(), "");
 199
 200         // Request to load model files to a backend engine.
 201         ret = _inference->load();
 202         if (ret != MEDIA_VISION_ERROR_NONE)
 203                 throw InvalidOperation("Fail to load model files.");
 204 }
 205
 206 template<typename T> shared_ptr<MetaInfo> ObjectDetection<T>::getInputMetaInfo()
 207 {
 208         TensorBuffer &tensor_buffer = _inference->getInputTensorBuffer();
 209         IETensorBuffer &tensor_info_map = tensor_buffer.getIETensorBuffer();
 210
 211         // TODO. consider using multiple tensors later.
 212         if (tensor_info_map.size() != 1)
 213                 throw InvalidOperation("Input tensor count not invalid.");
 214
 215         auto tensor_buffer_iter = tensor_info_map.begin();
 216
 217         // Get the meta information corresponding to a given input tensor name.
 218         return _config->getInputMetaMap()[tensor_buffer_iter->first];
 219 }
 220
 221 template<typename T>
 222 void ObjectDetection<T>::preprocess(mv_source_h &mv_src, shared_ptr<MetaInfo> metaInfo, vector<T> &inputVector)
 223 {
 224         LOGI("ENTER");
 225
 226         PreprocessConfig config = { false,
 227                                                                 metaInfo->colorSpace,
 228                                                                 metaInfo->dataType,
 229                                                                 metaInfo->getChannel(),
 230                                                                 metaInfo->getWidth(),
 231                                                                 metaInfo->getHeight() };
 232
 233         auto normalization = static_pointer_cast<DecodingNormal>(metaInfo->decodingTypeMap.at(DecodingType::NORMAL));
 234         if (normalization) {
 235                 config.normalize = normalization->use;
 236                 config.mean = normalization->mean;
 237                 config.std = normalization->std;
 238         }
 239
 240         auto quantization =
 241                         static_pointer_cast<DecodingQuantization>(metaInfo->decodingTypeMap.at(DecodingType::QUANTIZATION));
 242         if (quantization) {
 243                 config.quantize = quantization->use;
 244                 config.scale = quantization->scale;
 245                 config.zeropoint = quantization->zeropoint;
 246         }
 247
 248         _preprocess.setConfig(config);
 249         _preprocess.run<T>(mv_src, inputVector);
 250
 251         LOGI("LEAVE");
 252 }
 253
 254 template<typename T> void ObjectDetection<T>::inference(vector<vector<T> > &inputVectors)
 255 {
 256         LOGI("ENTER");
 257
 258         int ret = _inference->run<T>(inputVectors);
 259         if (ret != MEDIA_VISION_ERROR_NONE)
 260                 throw InvalidOperation("Fail to run inference");
 261
 262         LOGI("LEAVE");
 263 }
 264
 265 template<typename T> void ObjectDetection<T>::perform(mv_source_h &mv_src)
 266 {
 267         shared_ptr<MetaInfo> metaInfo = getInputMetaInfo();
 268         vector<T> inputVector;
 269
 270         preprocess(mv_src, metaInfo, inputVector);
 271
 272         vector<vector<T> > inputVectors = { inputVector };
 273         inference(inputVectors);
 274 }
 275
 276 template<typename T> void ObjectDetection<T>::performAsync(ObjectDetectionInput &input)
 277 {
 278         if (!_async_manager) {
 279                 _async_manager = make_unique<AsyncManager<ObjectDetectionResult> >([this]() {
 280                         AsyncInputQueue<T> inputQueue = _async_manager->popFromInput<T>();
 281
 282                         inference(inputQueue.inputs);
 283
 284                         ObjectDetectionResult &resultQueue = result();
 285
 286                         resultQueue.frame_number = inputQueue.frame_number;
 287                         _async_manager->pushToOutput(resultQueue);
 288                 });
 289         }
 290
 291         shared_ptr<MetaInfo> metaInfo = getInputMetaInfo();
 292         vector<T> inputVector;
 293
 294         preprocess(input.inference_src, metaInfo, inputVector);
 295
 296         vector<vector<T> > inputVectors = { inputVector };
 297         _async_manager->push(inputVectors);
 298 }
 299
 300 template<typename T> ObjectDetectionResult &ObjectDetection<T>::getOutput()
 301 {
 302         if (_async_manager) {
 303                 if (!_async_manager->isWorking())
 304                         throw InvalidOperation("Object detection has been already destroyed so invalid operation.");
 305
 306                 _current_result = _async_manager->pop();
 307         } else {
 308                 // TODO. Check if inference request is completed or not here.
 309                 //       If not then throw an exception.
 310                 _current_result = result();
 311         }
 312
 313         return _current_result;
 314 }
 315
 316 template<typename T> ObjectDetectionResult &ObjectDetection<T>::getOutputCache()
 317 {
 318         return _current_result;
 319 }
 320
 321 template<typename T> void ObjectDetection<T>::getOutputNames(vector<string> &names)
 322 {
 323         TensorBuffer &tensor_buffer_obj = _inference->getOutputTensorBuffer();
 324         IETensorBuffer &ie_tensor_buffer = tensor_buffer_obj.getIETensorBuffer();
 325
 326         for (IETensorBuffer::iterator it = ie_tensor_buffer.begin(); it != ie_tensor_buffer.end(); it++)
 327                 names.push_back(it->first);
 328 }
 329
 330 template<typename T> void ObjectDetection<T>::getOutputTensor(string target_name, vector<float> &tensor)
 331 {
 332         TensorBuffer &tensor_buffer_obj = _inference->getOutputTensorBuffer();
 333
 334         inference_engine_tensor_buffer *tensor_buffer = tensor_buffer_obj.getTensorBuffer(target_name);
 335         if (!tensor_buffer)
 336                 throw InvalidOperation("Fail to get tensor buffer.");
 337
 338         auto raw_buffer = static_cast<float *>(tensor_buffer->buffer);
 339
 340         copy(&raw_buffer[0], &raw_buffer[tensor_buffer->size / sizeof(float)], back_inserter(tensor));
 341 }
 342
 343 template class ObjectDetection<float>;
 344 template class ObjectDetection<unsigned char>;
 345
 346 }
 347 }