2 * Copyright (c) 2022 Samsung Electronics Co., Ltd All Rights Reserved
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
27 #include <mv_common.h>
28 #include <mv_private.h>
30 #include "machine_learning_exception.h"
31 #include "TensorBuffer.h"
32 #include "face_recognition.h"
33 #include "face_recognition_fvm.h"
34 #include "face_recognition_dsm.h"
35 #include "file_util.h"
38 using namespace mediavision::inference;
39 using namespace TrainingEngineInterface::Common;
40 using namespace mediavision::machine_learning::face_recognition::status;
41 using namespace mediavision::machine_learning::exception;
45 namespace machine_learning
47 FaceRecognition::FaceRecognition()
51 , _backbone_model_info()
58 FaceRecognition::~FaceRecognition()
61 _label_manager->clear();
64 void FaceRecognition::checkFeatureVectorFile(string fv_file_name, string new_fv_file_name)
66 // Change new feature vector file to existing one in case that current process is terminated just after removing existing feature vector file but
67 // new feature vector file isn't changed to existing one yet.
68 if (FaceRecogUtil::isFileExist(new_fv_file_name) && !FaceRecogUtil::isFileExist(fv_file_name)) {
69 int ret = ::rename(new_fv_file_name.c_str(), fv_file_name.c_str());
71 throw InvalidOperation("Fail to rename new feature vector file to original one.");
76 // Make sure to remove a temp file in case that current process is terminated just after generating new feature vector file
77 // which is not correct file but existing one isn't removed. In this case, existing file is used again.
78 if (FaceRecogUtil::isFileExist(new_fv_file_name)) {
79 int ret = ::remove(new_fv_file_name.c_str());
81 throw InvalidOperation("Fail to remove new feature vector file.");
85 void FaceRecognition::storeDataSet(unique_ptr<DataSetManager> &data_set, unsigned int label_cnt)
88 unique_ptr<FeatureVectorManager> fvm = make_unique<FaceRecognitionFVM>(_config.feature_vector_file_path);
89 unique_ptr<FeatureVectorManager> fvm_new =
90 make_unique<FaceRecognitionFVM>(_config.feature_vector_file_path + ".new");
92 // Make sure feature vector file.
93 checkFeatureVectorFile(fvm->getFileName(), fvm_new->getFileName());
95 // 1. Write feature vector and it's label index.
96 fvm_new->storeData(data_set->getData(), data_set->getLabelIdx());
98 // 2. Write feature vector header.
99 fvm_new->writeHeader(data_set->getFeaVecSize(), label_cnt, data_set->getData().size());
103 // 3. Change new data file to existing one.
104 if (FaceRecogUtil::isFileExist(fvm->getFileName())) {
105 ret = ::remove(fvm->getFileName().c_str());
107 throw InvalidOperation("Fail to remove feature vector file.");
110 ret = ::rename(fvm_new->getFileName().c_str(), fvm->getFileName().c_str());
112 throw InvalidOperation("Fail to rename new feature vector file to original one.");
113 } catch (const BaseException &e) {
114 LOGE("%s", e.what());
119 void FaceRecognition::setConfig(FaceRecognitionConfig &config)
124 std::vector<model_layer_info> &FaceRecognition::getBackboneInputLayerInfo()
126 return _backbone_model_info->getInputLayerInfo();
129 int FaceRecognition::getVecFromMvSource(mv_source_h img_src, std::vector<float> &out_vec)
131 mv_colorspace_e colorspace = MEDIA_VISION_COLORSPACE_INVALID;
132 unsigned int width = 0, height = 0, bufferSize = 0;
133 unsigned char *buffer = NULL;
135 if (mv_source_get_width(img_src, &width) != MEDIA_VISION_ERROR_NONE ||
136 mv_source_get_height(img_src, &height) != MEDIA_VISION_ERROR_NONE ||
137 mv_source_get_colorspace(img_src, &colorspace) != MEDIA_VISION_ERROR_NONE ||
138 mv_source_get_buffer(img_src, &buffer, &bufferSize))
139 return MEDIA_VISION_ERROR_INVALID_PARAMETER;
141 // TODO. Let's support various color spaces.
143 if (colorspace != MEDIA_VISION_COLORSPACE_RGB888) {
144 LOGE("Not Supported format!\n");
145 return MEDIA_VISION_ERROR_NOT_SUPPORTED_FORMAT;
148 vector<model_layer_info> &input_layer_info = getBackboneInputLayerInfo();
149 // TODO. consider multiple tensor info.
150 size_t re_width = input_layer_info[0].tensor_info.shape[0];
151 size_t re_height = input_layer_info[0].tensor_info.shape[1];
153 LOGD("Convert mv source(WxH) : %d x %d => %zu x %zu", width, height, re_width, re_height);
155 FeatureVectorManager::getVecFromRGB(buffer, out_vec, width, height, re_width, re_height);
157 return MEDIA_VISION_ERROR_NONE;
160 int FaceRecognition::initialize()
162 _training_model = make_unique<SimpleShot>(_config.training_engine_backend_type, _config.training_target_device_type,
163 _config.input_tensor_shape, _config.internal_model_file_path);
165 _internal = make_unique<Inference>();
167 int ret = _internal->bind(_config.inference_engine_backend_type, _config.inference_target_device_type);
168 if (ret != MEDIA_VISION_ERROR_NONE)
171 _status = INITIALIZED;
173 return MEDIA_VISION_ERROR_NONE;
176 void FaceRecognition::importLabel()
179 // Prepare can be called several times after initialization is done so previous data should be dropped.
180 _label_manager.reset();
181 _label_manager = make_unique<LabelManager>(_config.label_file_path, _config.decision_threshold);
183 // Update label manager from a given label file.
184 int cnt = _label_manager->importLabel();
186 LOGD("%d labels have been imported", cnt);
187 } catch (const BaseException &e) {
188 LOGE("%s", e.what());
193 int FaceRecognition::registerNewFace(std::vector<float> &input_vec, string label_name)
195 if (_status < INITIALIZED) {
196 LOGE("Initialization not ready yet. (%u)", _status);
197 return MEDIA_VISION_ERROR_INVALID_OPERATION;
200 // TODO. consider data augmentation.
202 // Import label data from a label file.
205 // 1. Store only label names to label file, which aren't duplicated.
206 bool duplicated = _label_manager->addLabelToMap(label_name, label_name);
208 int ret = _label_manager->addLabelToFile(label_name);
210 return MEDIA_VISION_ERROR_INVALID_OPERATION;
213 // Get label index and count.
214 unsigned int label_idx = _label_manager->getLabelIndex(label_name);
215 unsigned int label_cnt = _label_manager->getMaxLabel();
217 _training_model->configureModel(label_cnt);
219 unique_ptr<DataSetManager> data_set = make_unique<FaceRecognitionDSM>();
223 // Load existing feature vectors if the feature vector file exists.
224 if (FaceRecogUtil::isFileExist(_config.feature_vector_file_path) == true) {
225 LOGI("feature vector file already exists so it loads the file first.");
226 data_set->loadDataSet(_config.feature_vector_file_path, label_cnt);
229 // Add new feature vector only in case that feature vector count of given label_idx is less then 5.
230 // It means that only 5 set of feature vector per a label is valid.
231 // TODO. According to feature vector priority, new feature vector should be added.
232 if (data_set->isFeatureVectorAllowed(label_idx))
233 data_set->addDataSet(input_vec, label_idx, label_cnt);
235 _training_model->applyDataSet(data_set);
236 _training_model->compile();
237 _training_model->train();
239 // TODO. apply feature vector priority policy here.
240 // We can get weight trained from training engine.
241 // _training_model->getWeights(&weights, &size, "centroid_knn1");
243 // Store dataset to feature vector file.
244 storeDataSet(data_set, label_cnt);
246 // label_cnt can be changed every time the training is performed and all data set will be used for the training
247 // again in this case. So make sure to clear previous data set before next training.
248 _training_model->clearDataSet(data_set);
249 _status = REGISTERED;
250 } catch (const BaseException &e) {
251 LOGE("%s", e.what());
255 return MEDIA_VISION_ERROR_NONE;
258 int FaceRecognition::getAnswer()
265 for (auto &r : _result.raw_data)
266 result_str += to_string(r) + " ";
268 LOGD("raw data = %s", result_str.c_str());
270 answer_idx = max_element(_result.raw_data.begin(), _result.raw_data.end()) - _result.raw_data.begin();
272 // Check decision threshold.
273 if (_result.raw_data[answer_idx] < _label_manager->getDecisionThreshold()) {
274 throw NoData("Not meet decision threshold.");
277 float weighted = _result.raw_data[answer_idx] * _label_manager->getDecisionWeight();
279 // Check decision weight threshold.
280 for (auto &r : _result.raw_data) {
281 if (_result.raw_data[answer_idx] == r)
285 throw NoData("Not meet decision weight threshold");
288 _result.label_idx = answer_idx;
289 _result.is_valid = true;
290 } catch (const BaseException &e) {
291 LOGE("%s", e.what());
295 return MEDIA_VISION_ERROR_NONE;
298 int FaceRecognition::recognizeFace(std::vector<float> &input_vec)
300 if (_status < INITIALIZED) {
301 LOGE("Initialization not ready yet.(%u)", _status);
302 return MEDIA_VISION_ERROR_INVALID_OPERATION;
305 if (!FaceRecogUtil::isFileExist(_config.internal_model_file_path)) {
306 LOGE("Internal model file(%s) doesn't exist.", _config.internal_model_file_path.c_str());
307 return MEDIA_VISION_ERROR_INVALID_PATH;
310 TrainingEngineBackendInfo engine_info = _training_model->getTrainingEngineInfo();
311 vector<string> &input_layers = engine_info.input_layer_names;
312 vector<inference_engine_tensor_info> &input_tensor_info = engine_info.input_tensor_info;
313 vector<string> &output_layers = engine_info.output_layer_names;
314 vector<inference_engine_tensor_info> &output_tensor_info = engine_info.output_tensor_info;
316 // Face Recognition has following steps
317 // ------------------------------------
318 // 1. Import label data to in-memory from a file.
319 // 2. Load internal model.
320 // 3. Do an inference with the internal model to get a label.
321 // 4. Get the label best suitable to the output tensor and return the label.
324 // Import label data from a label file.
327 // Tensor order is NCHW.
328 size_t width = input_tensor_info[0].shape[0];
329 size_t height = input_tensor_info[0].shape[1];
330 size_t ch = input_tensor_info[0].shape[2];
332 _internal->configureInputInfo(width, height, 1, ch, 1.0f, 0.0f, MV_INFERENCE_DATA_FLOAT32, input_layers);
333 // Output tensor size should be a number of labels so update it.
334 output_tensor_info[0].shape[0] = _label_manager->getMaxLabel();
335 _internal->configureOutputInfo(output_layers, output_tensor_info);
337 _internal->configureModelFiles("", _config.internal_model_file_path, "");
339 // Load the trained internal model.
340 int ret = _internal->load();
341 if (ret != INFERENCE_ENGINE_ERROR_NONE) {
342 LOGE("Fail to Load.");
343 return MEDIA_VISION_ERROR_INVALID_OPERATION;
346 std::vector<std::vector<float> > input_tensors = { input_vec };
348 // Do inference to the internal model.
349 ret = _internal->run(input_tensors);
350 if (ret != INFERENCE_ENGINE_ERROR_NONE) {
351 LOGE("fail to inference internal model.");
352 return MEDIA_VISION_ERROR_INVALID_OPERATION;
355 // output layer size should be 1.
356 TensorBuffer tensorBuffer = _internal->getOutputTensorBuffer();
357 inference_engine_tensor_buffer *internal_output_buffer = tensorBuffer.getTensorBuffer(output_layers[0]);
358 if (!internal_output_buffer) {
359 LOGE("fail to get internal output tensor buffer.");
360 return MEDIA_VISION_ERROR_INVALID_PARAMETER;
363 auto raw_buffer = static_cast<float *>(internal_output_buffer->buffer);
365 _result.raw_data.clear();
366 copy(raw_buffer, raw_buffer + internal_output_buffer->size / sizeof(float), back_inserter(_result.raw_data));
367 _status = INFERENCED;
370 } catch (const BaseException &e) {
371 LOGE("%s", e.what());
375 return MEDIA_VISION_ERROR_INVALID_OPERATION;
378 int FaceRecognition::deleteLabel(string label_name)
380 if (_status < INITIALIZED) {
381 LOGE("Initialization not ready yet.(%u)", _status);
382 return MEDIA_VISION_ERROR_INVALID_OPERATION;
385 // Deleting a given label is to remove existing registered person from label and feature vector files.
388 // Import label data from a label file.
391 if (_label_manager->isExist(label_name) == false) {
392 LOGE("%s doesn't exist in label file.", label_name.c_str());
393 return MEDIA_VISION_ERROR_INVALID_OPERATION;
396 unsigned int target_label_idx = _label_manager->getLabelIndex(label_name);
398 auto label_cnt_ori = _label_manager->getMaxLabel();
400 // Get label count after removing a given label from the label file.
401 _label_manager->removeLabel(label_name);
403 auto label_cnt = _label_manager->getMaxLabel();
404 unique_ptr<FeatureVectorManager> fvm = make_unique<FaceRecognitionFVM>(_config.feature_vector_file_path);
405 unique_ptr<FeatureVectorManager> fvm_new =
406 make_unique<FaceRecognitionFVM>(_config.feature_vector_file_path + ".new");
408 // Make sure feature vector file.
409 checkFeatureVectorFile(fvm->getFileName(), fvm_new->getFileName());
411 unique_ptr<DataSetManager> data_set = make_unique<FaceRecognitionDSM>();
413 // feature vectors corresponding to given label aren't removed yet from feature vector file.
414 // So label_cnt_ori is needed.
415 data_set->loadDataSet(fvm->getFileName(), label_cnt_ori);
417 vector<vector<float> > feature_vectors_old = data_set->getData();
418 vector<unsigned int> label_idx_vectors_old = data_set->getLabelIdx();
419 vector<vector<float> > feature_vectors_new;
420 vector<unsigned int> label_idx_vectors_new;
422 size_t data_set_cnt = 0;
424 // Write existing feature vectors and its one-hot encoding table with updated label.
425 for (unsigned int idx = 0; idx < feature_vectors_old.size(); ++idx) {
426 // Except the data sets with a given target_label_idx.
427 if (label_idx_vectors_old[idx] == target_label_idx)
430 // One-hot encoding table should be updated.
431 // Assume that below label file exists for example,
434 // offset 0 : label 1
435 // offset 1 : label 2
436 // offset 2 : label 3
438 // One hot encoding table should be updated like below after removing label 1,
441 // offset 0 : label 2
442 // offset 1 : label 3
444 // So if the index of removed label less than remaining index then decrease each index.
445 if (label_idx_vectors_old[idx] > target_label_idx)
446 label_idx_vectors_old[idx]--;
448 feature_vectors_new.push_back(feature_vectors_old[idx]);
449 label_idx_vectors_new.push_back(label_idx_vectors_old[idx]);
453 // Retrain only in case that feature vectors exist.
454 if (data_set_cnt > 0) {
455 fvm_new->storeData(feature_vectors_new, label_idx_vectors_new);
456 fvm_new->writeHeader(feature_vectors_new[0].size(), label_cnt, data_set_cnt);
460 if (FaceRecogUtil::isFileExist(fvm->getFileName())) {
461 // Change new data file to existing one.
462 ret = ::remove(fvm->getFileName().c_str());
464 throw InvalidOperation("Fail to remove feature vector file.");
467 ret = ::rename(fvm_new->getFileName().c_str(), fvm->getFileName().c_str());
469 throw InvalidOperation("Fail to rename new feature vector file to original one.");
471 _training_model->configureModel(label_cnt);
472 unique_ptr<DataSetManager> new_data_set = make_unique<FaceRecognitionDSM>();
473 new_data_set->clear();
475 // TODO. Remove existing internal model file.
477 new_data_set->loadDataSet(_config.feature_vector_file_path, label_cnt);
478 _training_model->applyDataSet(new_data_set);
479 _training_model->compile();
480 _training_model->train();
482 // TODO. apply feature vector priority policy here.
483 // We can get weight trained from NNTrainer.
484 // _training_model->getWeights(&weights, &size, "centroid_knn1");
486 _training_model->removeModel();
488 _label_manager->removeFile();
490 LOGD("No training data so removed all relevant files.");
494 } catch (const BaseException &e) {
495 LOGE("%s", e.what());
499 return MEDIA_VISION_ERROR_NONE;
502 int FaceRecognition::getLabel(const char **out_label)
504 if (_status != INFERENCED) {
505 LOGE("Inference not completed yet. (%d)", _status);
506 return MEDIA_VISION_ERROR_INVALID_OPERATION;
510 _label_manager->getLabelString(_result.label, _result.label_idx);
511 } catch (const BaseException &e) {
512 LOGE("%s", e.what());
516 *out_label = _result.label.c_str();
518 return MEDIA_VISION_ERROR_NONE;
521 FaceRecognitionResult &FaceRecognition::result()
523 if (!_result.is_valid)
524 throw NoData("Inference result not ready yet.");
529 throw NoData("Label file doesn't exist.");
532 _label_manager->getLabelString(_result.label, _result.label_idx);
533 } catch (const BaseException &e) {
534 LOGE("%s", e.what());
541 } // machine_learning