mv_machine_learning: use carnel notation
[platform/core/api/mediavision.git] / mv_machine_learning / face_recognition / src / face_recognition.cpp
1 /**
2  * Copyright (c) 2022 Samsung Electronics Co., Ltd All Rights Reserved
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 #include <string.h>
18 #include <fstream>
19 #include <istream>
20 #include <tuple>
21 #include <map>
22 #include <algorithm>
23
24 #include <sys/stat.h>
25
26 #include <dlog.h>
27 #include <mv_common.h>
28 #include <mv_private.h>
29
30 #include "machine_learning_exception.h"
31 #include "TensorBuffer.h"
32 #include "face_recognition.h"
33 #include "face_recognition_fvm.h"
34 #include "face_recognition_dsm.h"
35 #include "file_util.h"
36
37 using namespace std;
38 using namespace mediavision::inference;
39 using namespace TrainingEngineInterface::Common;
40 using namespace mediavision::machine_learning::face_recognition::status;
41 using namespace mediavision::machine_learning::exception;
42
43 namespace mediavision
44 {
45 namespace machine_learning
46 {
47 FaceRecognition::FaceRecognition()
48                 : _status(NONE)
49                 , _internal()
50                 , _backbone()
51                 , _backbone_model_info()
52                 , _training_model()
53                 , _label_manager()
54                 , _config()
55                 , _result()
56 {}
57
58 FaceRecognition::~FaceRecognition()
59 {
60         if (_label_manager)
61                 _label_manager->clear();
62 }
63
64 void FaceRecognition::checkFeatureVectorFile(string fv_file_name, string new_fv_file_name)
65 {
66         // Change new feature vector file to existing one in case that current process is terminated just after removing existing feature vector file but
67         // new feature vector file isn't changed to existing one yet.
68         if (FaceRecogUtil::isFileExist(new_fv_file_name) && !FaceRecogUtil::isFileExist(fv_file_name)) {
69                 int ret = ::rename(new_fv_file_name.c_str(), fv_file_name.c_str());
70                 if (ret)
71                         throw InvalidOperation("Fail to rename new feature vector file to original one.");
72
73                 return;
74         }
75
76         // Make sure to remove a temp file in case that current process is terminated just after generating new feature vector file
77         // which is not correct file but existing one isn't removed. In this case, existing file is used again.
78         if (FaceRecogUtil::isFileExist(new_fv_file_name)) {
79                 int ret = ::remove(new_fv_file_name.c_str());
80                 if (ret)
81                         throw InvalidOperation("Fail to remove new feature vector file.");
82         }
83 }
84
85 void FaceRecognition::storeDataSet(unique_ptr<DataSetManager> &data_set, unsigned int label_cnt)
86 {
87         try {
88                 unique_ptr<FeatureVectorManager> fvm = make_unique<FaceRecognitionFVM>(_config.feature_vector_file_path);
89                 unique_ptr<FeatureVectorManager> fvm_new =
90                                 make_unique<FaceRecognitionFVM>(_config.feature_vector_file_path + ".new");
91
92                 // Make sure feature vector file.
93                 checkFeatureVectorFile(fvm->getFileName(), fvm_new->getFileName());
94
95                 // 1. Write feature vector and it's label index.
96                 fvm_new->storeData(data_set->getData(), data_set->getLabelIdx());
97
98                 // 2. Write feature vector header.
99                 fvm_new->writeHeader(data_set->getFeaVecSize(), label_cnt, data_set->getData().size());
100
101                 int ret = 0;
102
103                 // 3. Change new data file to existing one.
104                 if (FaceRecogUtil::isFileExist(fvm->getFileName())) {
105                         ret = ::remove(fvm->getFileName().c_str());
106                         if (ret)
107                                 throw InvalidOperation("Fail to remove feature vector file.");
108                 }
109
110                 ret = ::rename(fvm_new->getFileName().c_str(), fvm->getFileName().c_str());
111                 if (ret)
112                         throw InvalidOperation("Fail to rename new feature vector file to original one.");
113         } catch (const BaseException &e) {
114                 LOGE("%s", e.what());
115                 throw e;
116         }
117 }
118
119 void FaceRecognition::setConfig(FaceRecognitionConfig &config)
120 {
121         _config = config;
122 }
123
124 std::vector<model_layer_info> &FaceRecognition::getBackboneInputLayerInfo()
125 {
126         return _backbone_model_info->getInputLayerInfo();
127 }
128
129 int FaceRecognition::getVecFromMvSource(mv_source_h img_src, std::vector<float> &out_vec)
130 {
131         mv_colorspace_e colorspace = MEDIA_VISION_COLORSPACE_INVALID;
132         unsigned int width = 0, height = 0, bufferSize = 0;
133         unsigned char *buffer = NULL;
134
135         if (mv_source_get_width(img_src, &width) != MEDIA_VISION_ERROR_NONE ||
136                 mv_source_get_height(img_src, &height) != MEDIA_VISION_ERROR_NONE ||
137                 mv_source_get_colorspace(img_src, &colorspace) != MEDIA_VISION_ERROR_NONE ||
138                 mv_source_get_buffer(img_src, &buffer, &bufferSize))
139                 return MEDIA_VISION_ERROR_INVALID_PARAMETER;
140
141         // TODO. Let's support various color spaces.
142
143         if (colorspace != MEDIA_VISION_COLORSPACE_RGB888) {
144                 LOGE("Not Supported format!\n");
145                 return MEDIA_VISION_ERROR_NOT_SUPPORTED_FORMAT;
146         }
147
148         vector<model_layer_info> &input_layer_info = getBackboneInputLayerInfo();
149         // TODO. consider multiple tensor info.
150         size_t re_width = input_layer_info[0].tensor_info.shape[0];
151         size_t re_height = input_layer_info[0].tensor_info.shape[1];
152
153         LOGD("Convert mv source(WxH) : %d x %d => %zu x %zu", width, height, re_width, re_height);
154
155         FeatureVectorManager::getVecFromRGB(buffer, out_vec, width, height, re_width, re_height);
156
157         return MEDIA_VISION_ERROR_NONE;
158 }
159
160 int FaceRecognition::initialize()
161 {
162         _training_model = make_unique<SimpleShot>(_config.training_engine_backend_type, _config.training_target_device_type,
163                                                                                           _config.input_tensor_shape, _config.internal_model_file_path);
164
165         _internal = make_unique<Inference>();
166
167         int ret = _internal->bind(_config.inference_engine_backend_type, _config.inference_target_device_type);
168         if (ret != MEDIA_VISION_ERROR_NONE)
169                 return ret;
170
171         _status = INITIALIZED;
172
173         return MEDIA_VISION_ERROR_NONE;
174 }
175
176 void FaceRecognition::importLabel()
177 {
178         try {
179                 // Prepare can be called several times after initialization is done so previous data should be dropped.
180                 _label_manager.reset();
181                 _label_manager = make_unique<LabelManager>(_config.label_file_path, _config.decision_threshold);
182
183                 // Update label manager from a given label file.
184                 int cnt = _label_manager->importLabel();
185
186                 LOGD("%d labels have been imported", cnt);
187         } catch (const BaseException &e) {
188                 LOGE("%s", e.what());
189                 throw e;
190         }
191 }
192
193 int FaceRecognition::registerNewFace(std::vector<float> &input_vec, string label_name)
194 {
195         if (_status < INITIALIZED) {
196                 LOGE("Initialization not ready yet. (%u)", _status);
197                 return MEDIA_VISION_ERROR_INVALID_OPERATION;
198         }
199
200         // TODO. consider data augmentation.
201         try {
202                 // Import label data from a label file.
203                 importLabel();
204
205                 // 1. Store only label names to label file, which aren't duplicated.
206                 bool duplicated = _label_manager->addLabelToMap(label_name, label_name);
207                 if (!duplicated) {
208                         int ret = _label_manager->addLabelToFile(label_name);
209                         if (ret == 0)
210                                 return MEDIA_VISION_ERROR_INVALID_OPERATION;
211                 }
212
213                 // Get label index and count.
214                 unsigned int label_idx = _label_manager->getLabelIndex(label_name);
215                 unsigned int label_cnt = _label_manager->getMaxLabel();
216
217                 _training_model->configureModel(label_cnt);
218
219                 unique_ptr<DataSetManager> data_set = make_unique<FaceRecognitionDSM>();
220
221                 data_set->clear();
222
223                 // Load existing feature vectors if the feature vector file exists.
224                 if (FaceRecogUtil::isFileExist(_config.feature_vector_file_path) == true) {
225                         LOGI("feature vector file already exists so it loads the file first.");
226                         data_set->loadDataSet(_config.feature_vector_file_path, label_cnt);
227                 }
228
229                 // Add new feature vector only in case that feature vector count of given label_idx is less then 5.
230                 // It means that only 5 set of feature vector per a label is valid.
231                 // TODO. According to feature vector priority, new feature vector should be added.
232                 if (data_set->isFeatureVectorAllowed(label_idx))
233                         data_set->addDataSet(input_vec, label_idx, label_cnt);
234
235                 _training_model->applyDataSet(data_set);
236                 _training_model->compile();
237                 _training_model->train();
238
239                 // TODO. apply feature vector priority policy here.
240                 // We can get weight trained from training engine.
241                 // _training_model->getWeights(&weights, &size, "centroid_knn1");
242
243                 // Store dataset to feature vector file.
244                 storeDataSet(data_set, label_cnt);
245
246                 // label_cnt can be changed every time the training is performed and all data set will be used for the training
247                 // again in this case. So make sure to clear previous data set before next training.
248                 _training_model->clearDataSet(data_set);
249                 _status = REGISTERED;
250         } catch (const BaseException &e) {
251                 LOGE("%s", e.what());
252                 return e.getError();
253         }
254
255         return MEDIA_VISION_ERROR_NONE;
256 }
257
258 int FaceRecognition::getAnswer()
259 {
260         int answer_idx;
261
262         string result_str;
263
264         try {
265                 for (auto &r : _result.raw_data)
266                         result_str += to_string(r) + " ";
267
268                 LOGD("raw data = %s", result_str.c_str());
269
270                 answer_idx = max_element(_result.raw_data.begin(), _result.raw_data.end()) - _result.raw_data.begin();
271
272                 // Check decision threshold.
273                 if (_result.raw_data[answer_idx] < _label_manager->getDecisionThreshold()) {
274                         throw NoData("Not meet decision threshold.");
275                 }
276
277                 float weighted = _result.raw_data[answer_idx] * _label_manager->getDecisionWeight();
278
279                 // Check decision weight threshold.
280                 for (auto &r : _result.raw_data) {
281                         if (_result.raw_data[answer_idx] == r)
282                                 continue;
283
284                         if (weighted < r)
285                                 throw NoData("Not meet decision weight threshold");
286                 }
287
288                 _result.label_idx = answer_idx;
289                 _result.is_valid = true;
290         } catch (const BaseException &e) {
291                 LOGE("%s", e.what());
292                 return e.getError();
293         }
294
295         return MEDIA_VISION_ERROR_NONE;
296 }
297
298 int FaceRecognition::recognizeFace(std::vector<float> &input_vec)
299 {
300         if (_status < INITIALIZED) {
301                 LOGE("Initialization not ready yet.(%u)", _status);
302                 return MEDIA_VISION_ERROR_INVALID_OPERATION;
303         }
304
305         if (!FaceRecogUtil::isFileExist(_config.internal_model_file_path)) {
306                 LOGE("Internal model file(%s) doesn't exist.", _config.internal_model_file_path.c_str());
307                 return MEDIA_VISION_ERROR_INVALID_PATH;
308         }
309
310         TrainingEngineBackendInfo engine_info = _training_model->getTrainingEngineInfo();
311         vector<string> &input_layers = engine_info.input_layer_names;
312         vector<inference_engine_tensor_info> &input_tensor_info = engine_info.input_tensor_info;
313         vector<string> &output_layers = engine_info.output_layer_names;
314         vector<inference_engine_tensor_info> &output_tensor_info = engine_info.output_tensor_info;
315
316         // Face Recognition has following steps
317         // ------------------------------------
318         // 1. Import label data to in-memory from a file.
319         // 2. Load internal model.
320         // 3. Do an inference with the internal model to get a label.
321         // 4. Get the label best suitable to the output tensor and return the label.
322
323         try {
324                 // Import label data from a label file.
325                 importLabel();
326
327                 // Tensor order is NCHW.
328                 size_t width = input_tensor_info[0].shape[0];
329                 size_t height = input_tensor_info[0].shape[1];
330                 size_t ch = input_tensor_info[0].shape[2];
331
332                 _internal->configureInputInfo(width, height, 1, ch, 1.0f, 0.0f, MV_INFERENCE_DATA_FLOAT32, input_layers);
333                 // Output tensor size should be a number of labels so update it.
334                 output_tensor_info[0].shape[0] = _label_manager->getMaxLabel();
335                 _internal->configureOutputInfo(output_layers, output_tensor_info);
336
337                 _internal->configureModelFiles("", _config.internal_model_file_path, "");
338
339                 // Load the trained internal model.
340                 int ret = _internal->load();
341                 if (ret != INFERENCE_ENGINE_ERROR_NONE) {
342                         LOGE("Fail to Load.");
343                         return MEDIA_VISION_ERROR_INVALID_OPERATION;
344                 }
345
346                 std::vector<std::vector<float> > input_tensors = { input_vec };
347
348                 // Do inference to the internal model.
349                 ret = _internal->run(input_tensors);
350                 if (ret != INFERENCE_ENGINE_ERROR_NONE) {
351                         LOGE("fail to inference internal model.");
352                         return MEDIA_VISION_ERROR_INVALID_OPERATION;
353                 }
354
355                 // output layer size should be 1.
356                 TensorBuffer tensorBuffer = _internal->getOutputTensorBuffer();
357                 inference_engine_tensor_buffer *internal_output_buffer = tensorBuffer.getTensorBuffer(output_layers[0]);
358                 if (!internal_output_buffer) {
359                         LOGE("fail to get internal output tensor buffer.");
360                         return MEDIA_VISION_ERROR_INVALID_PARAMETER;
361                 }
362
363                 auto raw_buffer = static_cast<float *>(internal_output_buffer->buffer);
364
365                 _result.raw_data.clear();
366                 copy(raw_buffer, raw_buffer + internal_output_buffer->size / sizeof(float), back_inserter(_result.raw_data));
367                 _status = INFERENCED;
368
369                 return getAnswer();
370         } catch (const BaseException &e) {
371                 LOGE("%s", e.what());
372                 return e.getError();
373         }
374
375         return MEDIA_VISION_ERROR_INVALID_OPERATION;
376 }
377
378 int FaceRecognition::deleteLabel(string label_name)
379 {
380         if (_status < INITIALIZED) {
381                 LOGE("Initialization not ready yet.(%u)", _status);
382                 return MEDIA_VISION_ERROR_INVALID_OPERATION;
383         }
384
385         // Deleting a given label is to remove existing registered person from label and feature vector files.
386
387         try {
388                 // Import label data from a label file.
389                 importLabel();
390
391                 if (_label_manager->isExist(label_name) == false) {
392                         LOGE("%s doesn't exist in label file.", label_name.c_str());
393                         return MEDIA_VISION_ERROR_INVALID_OPERATION;
394                 }
395
396                 unsigned int target_label_idx = _label_manager->getLabelIndex(label_name);
397
398                 auto label_cnt_ori = _label_manager->getMaxLabel();
399
400                 // Get label count after removing a given label from the label file.
401                 _label_manager->removeLabel(label_name);
402
403                 auto label_cnt = _label_manager->getMaxLabel();
404                 unique_ptr<FeatureVectorManager> fvm = make_unique<FaceRecognitionFVM>(_config.feature_vector_file_path);
405                 unique_ptr<FeatureVectorManager> fvm_new =
406                                 make_unique<FaceRecognitionFVM>(_config.feature_vector_file_path + ".new");
407
408                 // Make sure feature vector file.
409                 checkFeatureVectorFile(fvm->getFileName(), fvm_new->getFileName());
410
411                 unique_ptr<DataSetManager> data_set = make_unique<FaceRecognitionDSM>();
412
413                 // feature vectors corresponding to given label aren't removed yet from feature vector file.
414                 // So label_cnt_ori is needed.
415                 data_set->loadDataSet(fvm->getFileName(), label_cnt_ori);
416
417                 vector<vector<float> > feature_vectors_old = data_set->getData();
418                 vector<unsigned int> label_idx_vectors_old = data_set->getLabelIdx();
419                 vector<vector<float> > feature_vectors_new;
420                 vector<unsigned int> label_idx_vectors_new;
421
422                 size_t data_set_cnt = 0;
423
424                 // Write existing feature vectors and its one-hot encoding table with updated label.
425                 for (unsigned int idx = 0; idx < feature_vectors_old.size(); ++idx) {
426                         // Except the data sets with a given target_label_idx.
427                         if (label_idx_vectors_old[idx] == target_label_idx)
428                                 continue;
429
430                         // One-hot encoding table should be updated.
431                         // Assume that below label file exists for example,
432                         //     In label file
433                         //     -------------
434                         //     offset 0 : label 1
435                         //     offset 1 : label 2
436                         //     offset 2 : label 3
437                         //
438                         //     One hot encoding table should be updated like below after removing label 1,
439                         //     In label file
440                         //     -------------
441                         //     offset 0 : label 2
442                         //     offset 1 : label 3
443                         //
444                         // So if the index of removed label less than remaining index then decrease each index.
445                         if (label_idx_vectors_old[idx] > target_label_idx)
446                                 label_idx_vectors_old[idx]--;
447
448                         feature_vectors_new.push_back(feature_vectors_old[idx]);
449                         label_idx_vectors_new.push_back(label_idx_vectors_old[idx]);
450                         data_set_cnt++;
451                 }
452
453                 // Retrain only in case that feature vectors exist.
454                 if (data_set_cnt > 0) {
455                         fvm_new->storeData(feature_vectors_new, label_idx_vectors_new);
456                         fvm_new->writeHeader(feature_vectors_new[0].size(), label_cnt, data_set_cnt);
457
458                         int ret = 0;
459
460                         if (FaceRecogUtil::isFileExist(fvm->getFileName())) {
461                                 // Change new data file to existing one.
462                                 ret = ::remove(fvm->getFileName().c_str());
463                                 if (ret)
464                                         throw InvalidOperation("Fail to remove feature vector file.");
465                         }
466
467                         ret = ::rename(fvm_new->getFileName().c_str(), fvm->getFileName().c_str());
468                         if (ret)
469                                 throw InvalidOperation("Fail to rename new feature vector file to original one.");
470
471                         _training_model->configureModel(label_cnt);
472                         unique_ptr<DataSetManager> new_data_set = make_unique<FaceRecognitionDSM>();
473                         new_data_set->clear();
474
475                         // TODO. Remove existing internal model file.
476
477                         new_data_set->loadDataSet(_config.feature_vector_file_path, label_cnt);
478                         _training_model->applyDataSet(new_data_set);
479                         _training_model->compile();
480                         _training_model->train();
481
482                         // TODO. apply feature vector priority policy here.
483                         // We can get weight trained from NNTrainer.
484                         // _training_model->getWeights(&weights, &size, "centroid_knn1");
485                 } else {
486                         _training_model->removeModel();
487                         fvm->remove();
488                         _label_manager->removeFile();
489
490                         LOGD("No training data so removed all relevant files.");
491                 }
492
493                 _status = DELETED;
494         } catch (const BaseException &e) {
495                 LOGE("%s", e.what());
496                 return e.getError();
497         }
498
499         return MEDIA_VISION_ERROR_NONE;
500 }
501
502 int FaceRecognition::getLabel(const char **out_label)
503 {
504         if (_status != INFERENCED) {
505                 LOGE("Inference not completed yet. (%d)", _status);
506                 return MEDIA_VISION_ERROR_INVALID_OPERATION;
507         }
508
509         try {
510                 _label_manager->getLabelString(_result.label, _result.label_idx);
511         } catch (const BaseException &e) {
512                 LOGE("%s", e.what());
513                 return e.getError();
514         }
515
516         *out_label = _result.label.c_str();
517
518         return MEDIA_VISION_ERROR_NONE;
519 }
520
521 FaceRecognitionResult &FaceRecognition::result()
522 {
523         if (!_result.is_valid)
524                 throw NoData("Inference result not ready yet.");
525
526         importLabel();
527
528         if (!_label_manager)
529                 throw NoData("Label file doesn't exist.");
530
531         try {
532                 _label_manager->getLabelString(_result.label, _result.label_idx);
533         } catch (const BaseException &e) {
534                 LOGE("%s", e.what());
535                 throw e;
536         }
537
538         return _result;
539 }
540
541 } // machine_learning
542 } // mediavision