mv_machine_learning: convert ObjectDetection class into template class
[platform/core/api/mediavision.git] / mv_machine_learning / object_detection / src / object_detection.cpp
1 /**
2  * Copyright (c) 2022 Samsung Electronics Co., Ltd All Rights Reserved
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 #include <string.h>
18 #include <fstream>
19 #include <map>
20 #include <memory>
21
22 #include "machine_learning_exception.h"
23 #include "mv_machine_learning_common.h"
24 #include "mv_object_detection_config.h"
25 #include "object_detection.h"
26
27 using namespace std;
28 using namespace mediavision::inference;
29 using namespace MediaVision::Common;
30 using namespace mediavision::common;
31 using namespace mediavision::machine_learning::exception;
32
33 namespace mediavision
34 {
35 namespace machine_learning
36 {
37 template<typename T>
38 ObjectDetection<T>::ObjectDetection(ObjectDetectionTaskType task_type, shared_ptr<MachineLearningConfig> config)
39                 : _task_type(task_type), _config(config)
40 {
41         _inference = make_unique<Inference>();
42 }
43
44 template<typename T> void ObjectDetection<T>::preDestroy()
45 {
46         if (!_async_manager)
47                 return;
48
49         _async_manager->stop();
50 }
51
52 template<typename T> ObjectDetectionTaskType ObjectDetection<T>::getTaskType()
53 {
54         return _task_type;
55 }
56
57 template<typename T> void ObjectDetection<T>::getEngineList()
58 {
59         for (auto idx = MV_INFERENCE_BACKEND_NONE + 1; idx < MV_INFERENCE_BACKEND_MAX; ++idx) {
60                 auto backend = _inference->getSupportedInferenceBackend(idx);
61                 // TODO. we need to describe what inference engines are supported by each Task API,
62                 //       and based on it, below inference engine types should be checked
63                 //       if a given type is supported by this Task API later. As of now, tflite only.
64                 if (backend.second == true && backend.first.compare("tflite") == 0)
65                         _valid_backends.push_back(backend.first);
66         }
67 }
68
69 template<typename T> void ObjectDetection<T>::getDeviceList(const char *engine_type)
70 {
71         // TODO. add device types available for a given engine type later.
72         //       In default, cpu and gpu only.
73         _valid_devices.push_back("cpu");
74         _valid_devices.push_back("gpu");
75 }
76
77 template<typename T> void ObjectDetection<T>::setEngineInfo(std::string engine_type_name, std::string device_type_name)
78 {
79         if (engine_type_name.empty() || device_type_name.empty())
80                 throw InvalidParameter("Invalid engine info.");
81
82         transform(engine_type_name.begin(), engine_type_name.end(), engine_type_name.begin(), ::toupper);
83         transform(device_type_name.begin(), device_type_name.end(), device_type_name.begin(), ::toupper);
84
85         int engine_type = GetBackendType(engine_type_name);
86         int device_type = GetDeviceType(device_type_name);
87
88         if (engine_type == MEDIA_VISION_ERROR_INVALID_PARAMETER || device_type == MEDIA_VISION_ERROR_INVALID_PARAMETER)
89                 throw InvalidParameter("backend or target device type not found.");
90
91         _config->setBackendType(engine_type);
92         _config->setTargetDeviceType(device_type);
93
94         LOGI("Engine type : %s => %d, Device type : %s => %d", engine_type_name.c_str(), engine_type,
95                  device_type_name.c_str(), device_type);
96 }
97
98 template<typename T> void ObjectDetection<T>::getNumberOfEngines(unsigned int *number_of_engines)
99 {
100         if (!_valid_backends.empty()) {
101                 *number_of_engines = _valid_backends.size();
102                 return;
103         }
104
105         getEngineList();
106         *number_of_engines = _valid_backends.size();
107 }
108
109 template<typename T> void ObjectDetection<T>::getEngineType(unsigned int engine_index, char **engine_type)
110 {
111         if (!_valid_backends.empty()) {
112                 if (_valid_backends.size() <= engine_index)
113                         throw InvalidParameter("Invalid engine index.");
114
115                 *engine_type = const_cast<char *>(_valid_backends[engine_index].data());
116                 return;
117         }
118
119         getEngineList();
120
121         if (_valid_backends.size() <= engine_index)
122                 throw InvalidParameter("Invalid engine index.");
123
124         *engine_type = const_cast<char *>(_valid_backends[engine_index].data());
125 }
126
127 template<typename T>
128 void ObjectDetection<T>::getNumberOfDevices(const char *engine_type, unsigned int *number_of_devices)
129 {
130         if (!_valid_devices.empty()) {
131                 *number_of_devices = _valid_devices.size();
132                 return;
133         }
134
135         getDeviceList(engine_type);
136         *number_of_devices = _valid_devices.size();
137 }
138
139 template<typename T>
140 void ObjectDetection<T>::getDeviceType(const char *engine_type, const unsigned int device_index, char **device_type)
141 {
142         if (!_valid_devices.empty()) {
143                 if (_valid_devices.size() <= device_index)
144                         throw InvalidParameter("Invalid device index.");
145
146                 *device_type = const_cast<char *>(_valid_devices[device_index].data());
147                 return;
148         }
149
150         getDeviceList(engine_type);
151
152         if (_valid_devices.size() <= device_index)
153                 throw InvalidParameter("Invalid device index.");
154
155         *device_type = const_cast<char *>(_valid_devices[device_index].data());
156 }
157
158 template<typename T> void ObjectDetection<T>::loadLabel()
159 {
160         if (_config->getLabelFilePath().empty())
161                 return;
162
163         ifstream readFile;
164
165         _labels.clear();
166         readFile.open(_config->getLabelFilePath().c_str());
167
168         if (readFile.fail())
169                 throw InvalidOperation("Fail to open " + _config->getLabelFilePath() + " file.");
170
171         string line;
172
173         while (getline(readFile, line))
174                 _labels.push_back(line);
175
176         readFile.close();
177 }
178
179 template<typename T> void ObjectDetection<T>::configure()
180 {
181         loadLabel();
182
183         int ret = _inference->bind(_config->getBackendType(), _config->getTargetDeviceType());
184         if (ret != MEDIA_VISION_ERROR_NONE)
185                 throw InvalidOperation("Fail to bind a backend engine.");
186 }
187
188 template<typename T> void ObjectDetection<T>::prepare()
189 {
190         int ret = _inference->configureInputMetaInfo(_config->getInputMetaMap());
191         if (ret != MEDIA_VISION_ERROR_NONE)
192                 throw InvalidOperation("Fail to configure input tensor info from meta file.");
193
194         ret = _inference->configureOutputMetaInfo(_config->getOutputMetaMap());
195         if (ret != MEDIA_VISION_ERROR_NONE)
196                 throw InvalidOperation("Fail to configure output tensor info from meta file.");
197
198         _inference->configureModelFiles("", _config->getModelFilePath(), "");
199
200         // Request to load model files to a backend engine.
201         ret = _inference->load();
202         if (ret != MEDIA_VISION_ERROR_NONE)
203                 throw InvalidOperation("Fail to load model files.");
204 }
205
206 template<typename T> shared_ptr<MetaInfo> ObjectDetection<T>::getInputMetaInfo()
207 {
208         TensorBuffer &tensor_buffer = _inference->getInputTensorBuffer();
209         IETensorBuffer &tensor_info_map = tensor_buffer.getIETensorBuffer();
210
211         // TODO. consider using multiple tensors later.
212         if (tensor_info_map.size() != 1)
213                 throw InvalidOperation("Input tensor count not invalid.");
214
215         auto tensor_buffer_iter = tensor_info_map.begin();
216
217         // Get the meta information corresponding to a given input tensor name.
218         return _config->getInputMetaMap()[tensor_buffer_iter->first];
219 }
220
221 template<typename T>
222 void ObjectDetection<T>::preprocess(mv_source_h &mv_src, shared_ptr<MetaInfo> metaInfo, vector<T> &inputVector)
223 {
224         LOGI("ENTER");
225
226         PreprocessConfig config = { false,
227                                                                 metaInfo->colorSpace,
228                                                                 metaInfo->dataType,
229                                                                 metaInfo->getChannel(),
230                                                                 metaInfo->getWidth(),
231                                                                 metaInfo->getHeight() };
232
233         auto normalization = static_pointer_cast<DecodingNormal>(metaInfo->decodingTypeMap.at(DecodingType::NORMAL));
234         if (normalization) {
235                 config.normalize = normalization->use;
236                 config.mean = normalization->mean;
237                 config.std = normalization->std;
238         }
239
240         auto quantization =
241                         static_pointer_cast<DecodingQuantization>(metaInfo->decodingTypeMap.at(DecodingType::QUANTIZATION));
242         if (quantization) {
243                 config.quantize = quantization->use;
244                 config.scale = quantization->scale;
245                 config.zeropoint = quantization->zeropoint;
246         }
247
248         _preprocess.setConfig(config);
249         _preprocess.run<T>(mv_src, inputVector);
250
251         LOGI("LEAVE");
252 }
253
254 template<typename T> void ObjectDetection<T>::inference(vector<vector<T> > &inputVectors)
255 {
256         LOGI("ENTER");
257
258         int ret = _inference->run<T>(inputVectors);
259         if (ret != MEDIA_VISION_ERROR_NONE)
260                 throw InvalidOperation("Fail to run inference");
261
262         LOGI("LEAVE");
263 }
264
265 template<typename T> void ObjectDetection<T>::perform(mv_source_h &mv_src)
266 {
267         shared_ptr<MetaInfo> metaInfo = getInputMetaInfo();
268         vector<T> inputVector;
269
270         preprocess(mv_src, metaInfo, inputVector);
271
272         vector<vector<T> > inputVectors = { inputVector };
273         inference(inputVectors);
274 }
275
276 template<typename T> void ObjectDetection<T>::performAsync(ObjectDetectionInput &input)
277 {
278         if (!_async_manager) {
279                 _async_manager = make_unique<AsyncManager<ObjectDetectionResult> >([this]() {
280                         AsyncInputQueue<T> inputQueue = _async_manager->popFromInput<T>();
281
282                         inference(inputQueue.inputs);
283
284                         ObjectDetectionResult &resultQueue = result();
285
286                         resultQueue.frame_number = inputQueue.frame_number;
287                         _async_manager->pushToOutput(resultQueue);
288                 });
289         }
290
291         shared_ptr<MetaInfo> metaInfo = getInputMetaInfo();
292         vector<T> inputVector;
293
294         preprocess(input.inference_src, metaInfo, inputVector);
295
296         vector<vector<T> > inputVectors = { inputVector };
297         _async_manager->push(inputVectors);
298 }
299
300 template<typename T> ObjectDetectionResult &ObjectDetection<T>::getOutput()
301 {
302         if (_async_manager) {
303                 if (!_async_manager->isWorking())
304                         throw InvalidOperation("Object detection has been already destroyed so invalid operation.");
305
306                 _current_result = _async_manager->pop();
307         } else {
308                 // TODO. Check if inference request is completed or not here.
309                 //       If not then throw an exception.
310                 _current_result = result();
311         }
312
313         return _current_result;
314 }
315
316 template<typename T> ObjectDetectionResult &ObjectDetection<T>::getOutputCache()
317 {
318         return _current_result;
319 }
320
321 template<typename T> void ObjectDetection<T>::getOutputNames(vector<string> &names)
322 {
323         TensorBuffer &tensor_buffer_obj = _inference->getOutputTensorBuffer();
324         IETensorBuffer &ie_tensor_buffer = tensor_buffer_obj.getIETensorBuffer();
325
326         for (IETensorBuffer::iterator it = ie_tensor_buffer.begin(); it != ie_tensor_buffer.end(); it++)
327                 names.push_back(it->first);
328 }
329
330 template<typename T> void ObjectDetection<T>::getOutputTensor(string target_name, vector<float> &tensor)
331 {
332         TensorBuffer &tensor_buffer_obj = _inference->getOutputTensorBuffer();
333
334         inference_engine_tensor_buffer *tensor_buffer = tensor_buffer_obj.getTensorBuffer(target_name);
335         if (!tensor_buffer)
336                 throw InvalidOperation("Fail to get tensor buffer.");
337
338         auto raw_buffer = static_cast<float *>(tensor_buffer->buffer);
339
340         copy(&raw_buffer[0], &raw_buffer[tensor_buffer->size / sizeof(float)], back_inserter(tensor));
341 }
342
343 template class ObjectDetection<float>;
344 template class ObjectDetection<unsigned char>;
345
346 }
347 }