8216b30c1db9b02867995990ed339a84b8482dd3
[platform/core/api/mediavision.git] / mv_machine_learning / object_detection / src / object_detection.cpp
1 /**
2  * Copyright (c) 2022 Samsung Electronics Co., Ltd All Rights Reserved
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 #include <string.h>
18 #include <fstream>
19 #include <map>
20 #include <memory>
21
22 #include "machine_learning_exception.h"
23 #include "mv_machine_learning_common.h"
24 #include "mv_object_detection_config.h"
25 #include "object_detection.h"
26
27 using namespace std;
28 using namespace mediavision::inference;
29 using namespace MediaVision::Common;
30 using namespace mediavision::common;
31 using namespace mediavision::machine_learning::exception;
32
33 namespace mediavision
34 {
35 namespace machine_learning
36 {
37 ObjectDetection::ObjectDetection(ObjectDetectionTaskType task_type, shared_ptr<MachineLearningConfig> config)
38                 : _task_type(task_type), _config(config)
39 {
40         _inference = make_unique<Inference>();
41 }
42
43 void ObjectDetection::preDestroy()
44 {
45         if (!_async_manager)
46                 return;
47
48         _async_manager->stop();
49 }
50
51 ObjectDetectionTaskType ObjectDetection::getTaskType()
52 {
53         return _task_type;
54 }
55
56 void ObjectDetection::getEngineList()
57 {
58         for (auto idx = MV_INFERENCE_BACKEND_NONE + 1; idx < MV_INFERENCE_BACKEND_MAX; ++idx) {
59                 auto backend = _inference->getSupportedInferenceBackend(idx);
60                 // TODO. we need to describe what inference engines are supported by each Task API,
61                 //       and based on it, below inference engine types should be checked
62                 //       if a given type is supported by this Task API later. As of now, tflite only.
63                 if (backend.second == true && backend.first.compare("tflite") == 0)
64                         _valid_backends.push_back(backend.first);
65         }
66 }
67
68 void ObjectDetection::getDeviceList(const char *engine_type)
69 {
70         // TODO. add device types available for a given engine type later.
71         //       In default, cpu and gpu only.
72         _valid_devices.push_back("cpu");
73         _valid_devices.push_back("gpu");
74 }
75
76 void ObjectDetection::setEngineInfo(std::string engine_type_name, std::string device_type_name)
77 {
78         if (engine_type_name.empty() || device_type_name.empty())
79                 throw InvalidParameter("Invalid engine info.");
80
81         transform(engine_type_name.begin(), engine_type_name.end(), engine_type_name.begin(), ::toupper);
82         transform(device_type_name.begin(), device_type_name.end(), device_type_name.begin(), ::toupper);
83
84         int engine_type = GetBackendType(engine_type_name);
85         int device_type = GetDeviceType(device_type_name);
86
87         if (engine_type == MEDIA_VISION_ERROR_INVALID_PARAMETER || device_type == MEDIA_VISION_ERROR_INVALID_PARAMETER)
88                 throw InvalidParameter("backend or target device type not found.");
89
90         _config->setBackendType(engine_type);
91         _config->setTargetDeviceType(device_type);
92
93         LOGI("Engine type : %s => %d, Device type : %s => %d", engine_type_name.c_str(), engine_type,
94                  device_type_name.c_str(), device_type);
95 }
96
97 void ObjectDetection::getNumberOfEngines(unsigned int *number_of_engines)
98 {
99         if (!_valid_backends.empty()) {
100                 *number_of_engines = _valid_backends.size();
101                 return;
102         }
103
104         getEngineList();
105         *number_of_engines = _valid_backends.size();
106 }
107
108 void ObjectDetection::getEngineType(unsigned int engine_index, char **engine_type)
109 {
110         if (!_valid_backends.empty()) {
111                 if (_valid_backends.size() <= engine_index)
112                         throw InvalidParameter("Invalid engine index.");
113
114                 *engine_type = const_cast<char *>(_valid_backends[engine_index].data());
115                 return;
116         }
117
118         getEngineList();
119
120         if (_valid_backends.size() <= engine_index)
121                 throw InvalidParameter("Invalid engine index.");
122
123         *engine_type = const_cast<char *>(_valid_backends[engine_index].data());
124 }
125
126 void ObjectDetection::getNumberOfDevices(const char *engine_type, unsigned int *number_of_devices)
127 {
128         if (!_valid_devices.empty()) {
129                 *number_of_devices = _valid_devices.size();
130                 return;
131         }
132
133         getDeviceList(engine_type);
134         *number_of_devices = _valid_devices.size();
135 }
136
137 void ObjectDetection::getDeviceType(const char *engine_type, const unsigned int device_index, char **device_type)
138 {
139         if (!_valid_devices.empty()) {
140                 if (_valid_devices.size() <= device_index)
141                         throw InvalidParameter("Invalid device index.");
142
143                 *device_type = const_cast<char *>(_valid_devices[device_index].data());
144                 return;
145         }
146
147         getDeviceList(engine_type);
148
149         if (_valid_devices.size() <= device_index)
150                 throw InvalidParameter("Invalid device index.");
151
152         *device_type = const_cast<char *>(_valid_devices[device_index].data());
153 }
154
155 void ObjectDetection::loadLabel()
156 {
157         if (_config->getLabelFilePath().empty())
158                 return;
159
160         ifstream readFile;
161
162         _labels.clear();
163         readFile.open(_config->getLabelFilePath().c_str());
164
165         if (readFile.fail())
166                 throw InvalidOperation("Fail to open " + _config->getLabelFilePath() + " file.");
167
168         string line;
169
170         while (getline(readFile, line))
171                 _labels.push_back(line);
172
173         readFile.close();
174 }
175
176 void ObjectDetection::configure()
177 {
178         _config->loadMetaFile(make_unique<ObjectDetectionParser>(static_cast<int>(_task_type)));
179         loadLabel();
180
181         int ret = _inference->bind(_config->getBackendType(), _config->getTargetDeviceType());
182         if (ret != MEDIA_VISION_ERROR_NONE)
183                 throw InvalidOperation("Fail to bind a backend engine.");
184 }
185
186 void ObjectDetection::prepare()
187 {
188         int ret = _inference->configureInputMetaInfo(_config->getInputMetaMap());
189         if (ret != MEDIA_VISION_ERROR_NONE)
190                 throw InvalidOperation("Fail to configure input tensor info from meta file.");
191
192         ret = _inference->configureOutputMetaInfo(_config->getOutputMetaMap());
193         if (ret != MEDIA_VISION_ERROR_NONE)
194                 throw InvalidOperation("Fail to configure output tensor info from meta file.");
195
196         _inference->configureModelFiles("", _config->getModelFilePath(), "");
197
198         // Request to load model files to a backend engine.
199         ret = _inference->load();
200         if (ret != MEDIA_VISION_ERROR_NONE)
201                 throw InvalidOperation("Fail to load model files.");
202 }
203
204 shared_ptr<MetaInfo> ObjectDetection::getInputMetaInfo()
205 {
206         TensorBuffer &tensor_buffer = _inference->getInputTensorBuffer();
207         IETensorBuffer &tensor_info_map = tensor_buffer.getIETensorBuffer();
208
209         // TODO. consider using multiple tensors later.
210         if (tensor_info_map.size() != 1)
211                 throw InvalidOperation("Input tensor count not invalid.");
212
213         auto tensor_buffer_iter = tensor_info_map.begin();
214
215         // Get the meta information corresponding to a given input tensor name.
216         return _config->getInputMetaMap()[tensor_buffer_iter->first];
217 }
218
219 template<typename T>
220 void ObjectDetection::preprocess(mv_source_h &mv_src, shared_ptr<MetaInfo> metaInfo, vector<T> &inputVector)
221 {
222         LOGI("ENTER");
223
224         PreprocessConfig config = { false,
225                                                                 metaInfo->colorSpace,
226                                                                 metaInfo->dataType,
227                                                                 metaInfo->getChannel(),
228                                                                 metaInfo->getWidth(),
229                                                                 metaInfo->getHeight() };
230
231         auto normalization = static_pointer_cast<DecodingNormal>(metaInfo->decodingTypeMap.at(DecodingType::NORMAL));
232         if (normalization) {
233                 config.normalize = normalization->use;
234                 config.mean = normalization->mean;
235                 config.std = normalization->std;
236         }
237
238         auto quantization =
239                         static_pointer_cast<DecodingQuantization>(metaInfo->decodingTypeMap.at(DecodingType::QUANTIZATION));
240         if (quantization) {
241                 config.quantize = quantization->use;
242                 config.scale = quantization->scale;
243                 config.zeropoint = quantization->zeropoint;
244         }
245
246         _preprocess.setConfig(config);
247         _preprocess.run<T>(mv_src, inputVector);
248
249         LOGI("LEAVE");
250 }
251
252 template<typename T> void ObjectDetection::inference(vector<vector<T> > &inputVectors)
253 {
254         LOGI("ENTER");
255
256         int ret = _inference->run<T>(inputVectors);
257         if (ret != MEDIA_VISION_ERROR_NONE)
258                 throw InvalidOperation("Fail to run inference");
259
260         LOGI("LEAVE");
261 }
262
263 template<typename T> void ObjectDetection::perform(mv_source_h &mv_src, shared_ptr<MetaInfo> metaInfo)
264 {
265         vector<T> inputVector;
266
267         preprocess<T>(mv_src, metaInfo, inputVector);
268
269         vector<vector<T> > inputVectors = { inputVector };
270
271         inference<T>(inputVectors);
272
273         // TODO. Update operation status here.
274 }
275
276 void ObjectDetection::perform(mv_source_h &mv_src)
277 {
278         shared_ptr<MetaInfo> metaInfo = getInputMetaInfo();
279         if (metaInfo->dataType == MV_INFERENCE_DATA_UINT8)
280                 perform<unsigned char>(mv_src, metaInfo);
281         else if (metaInfo->dataType == MV_INFERENCE_DATA_FLOAT32)
282                 perform<float>(mv_src, metaInfo);
283         else
284                 throw InvalidOperation("Invalid model data type.");
285 }
286
287 template<typename T> void ObjectDetection::performAsync(ObjectDetectionInput &input, shared_ptr<MetaInfo> metaInfo)
288 {
289         if (!_async_manager) {
290                 _async_manager = make_unique<AsyncManager<ObjectDetectionResult> >([this]() {
291                         AsyncInputQueue<T> inputQueue = _async_manager->popFromInput<T>();
292
293                         inference<T>(inputQueue.inputs);
294
295                         ObjectDetectionResult &resultQueue = result();
296
297                         resultQueue.frame_number = inputQueue.frame_number;
298                         _async_manager->pushToOutput(resultQueue);
299                 });
300         }
301
302         vector<T> inputVector;
303
304         preprocess<T>(input.inference_src, metaInfo, inputVector);
305
306         vector<vector<T> > inputVectors = { inputVector };
307
308         _async_manager->push(inputVectors);
309 }
310
311 void ObjectDetection::performAsync(ObjectDetectionInput &input)
312 {
313         shared_ptr<MetaInfo> metaInfo = getInputMetaInfo();
314
315         if (metaInfo->dataType == MV_INFERENCE_DATA_UINT8) {
316                 performAsync<unsigned char>(input, metaInfo);
317         } else if (metaInfo->dataType == MV_INFERENCE_DATA_FLOAT32) {
318                 performAsync<float>(input, metaInfo);
319                 // TODO
320         } else {
321                 throw InvalidOperation("Invalid model data type.");
322         }
323 }
324
325 ObjectDetectionResult &ObjectDetection::getOutput()
326 {
327         if (_async_manager) {
328                 if (!_async_manager->isWorking())
329                         throw InvalidOperation("Object detection has been already destroyed so invalid operation.");
330
331                 _current_result = _async_manager->pop();
332         } else {
333                 // TODO. Check if inference request is completed or not here.
334                 //       If not then throw an exception.
335                 _current_result = result();
336         }
337
338         return _current_result;
339 }
340
341 ObjectDetectionResult &ObjectDetection::getOutputCache()
342 {
343         return _current_result;
344 }
345
346 void ObjectDetection::getOutputNames(vector<string> &names)
347 {
348         TensorBuffer &tensor_buffer_obj = _inference->getOutputTensorBuffer();
349         IETensorBuffer &ie_tensor_buffer = tensor_buffer_obj.getIETensorBuffer();
350
351         for (IETensorBuffer::iterator it = ie_tensor_buffer.begin(); it != ie_tensor_buffer.end(); it++)
352                 names.push_back(it->first);
353 }
354
355 void ObjectDetection::getOutputTensor(string target_name, vector<float> &tensor)
356 {
357         TensorBuffer &tensor_buffer_obj = _inference->getOutputTensorBuffer();
358
359         inference_engine_tensor_buffer *tensor_buffer = tensor_buffer_obj.getTensorBuffer(target_name);
360         if (!tensor_buffer)
361                 throw InvalidOperation("Fail to get tensor buffer.");
362
363         auto raw_buffer = static_cast<float *>(tensor_buffer->buffer);
364
365         copy(&raw_buffer[0], &raw_buffer[tensor_buffer->size / sizeof(float)], back_inserter(tensor));
366 }
367
368 template void ObjectDetection::preprocess<float>(mv_source_h &mv_src, shared_ptr<MetaInfo> metaInfo,
369                                                                                                  vector<float> &inputVector);
370 template void ObjectDetection::inference<float>(vector<vector<float> > &inputVectors);
371 template void ObjectDetection::perform<float>(mv_source_h &mv_src, shared_ptr<MetaInfo> metaInfo);
372 template void ObjectDetection::performAsync<float>(ObjectDetectionInput &input, shared_ptr<MetaInfo> metaInfo);
373
374 template void ObjectDetection::preprocess<unsigned char>(mv_source_h &mv_src, shared_ptr<MetaInfo> metaInfo,
375                                                                                                                  vector<unsigned char> &inputVector);
376 template void ObjectDetection::inference<unsigned char>(vector<vector<unsigned char> > &inputVectors);
377 template void ObjectDetection::perform<unsigned char>(mv_source_h &mv_src, shared_ptr<MetaInfo> metaInfo);
378 template void ObjectDetection::performAsync<unsigned char>(ObjectDetectionInput &input, shared_ptr<MetaInfo> metaInfo);
379
380 }
381 }