mv_machine_learning/face_recognition/src/face_recognition.cpp

   1 /**
   2  * Copyright (c) 2022 Samsung Electronics Co., Ltd All Rights Reserved
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  * http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 #include <string.h>
  18 #include <fstream>
  19 #include <istream>
  20 #include <tuple>
  21 #include <map>
  22 #include <algorithm>
  23
  24 #include <sys/stat.h>
  25
  26 #include <dlog.h>
  27 #include <mv_common.h>
  28 #include <mv_private.h>
  29
  30 #include "machine_learning_exception.h"
  31 #include "TensorBuffer.h"
  32 #include "face_recognition.h"
  33 #include "face_recognition_fvm.h"
  34 #include "face_recognition_dsm.h"
  35 #include "file_util.h"
  36
  37 using namespace std;
  38 using namespace mediavision::inference;
  39 using namespace TrainingEngineInterface::Common;
  40 using namespace mediavision::machine_learning::face_recognition::status;
  41 using namespace mediavision::machine_learning::exception;
  42
  43 namespace mediavision
  44 {
  45 namespace machine_learning
  46 {
  47 FaceRecognition::FaceRecognition()
  48                 : _status(NONE)
  49                 , _internal()
  50                 , _backbone()
  51                 , _backbone_model_info()
  52                 , _training_model()
  53                 , _label_manager()
  54                 , _config()
  55                 , _result()
  56 {}
  57
  58 FaceRecognition::~FaceRecognition()
  59 {
  60         if (_label_manager)
  61                 _label_manager->clear();
  62 }
  63
  64 void FaceRecognition::checkFeatureVectorFile(string fv_file_name, string new_fv_file_name)
  65 {
  66         // Change new feature vector file to existing one in case that current process is terminated just after removing existing feature vector file but
  67         // new feature vector file isn't changed to existing one yet.
  68         if (FaceRecogUtil::isFileExist(new_fv_file_name) && !FaceRecogUtil::isFileExist(fv_file_name)) {
  69                 int ret = ::rename(new_fv_file_name.c_str(), fv_file_name.c_str());
  70                 if (ret)
  71                         throw InvalidOperation("Fail to rename new feature vector file to original one.");
  72
  73                 return;
  74         }
  75
  76         // Make sure to remove a temp file in case that current process is terminated just after generating new feature vector file
  77         // which is not correct file but existing one isn't removed. In this case, existing file is used again.
  78         if (FaceRecogUtil::isFileExist(new_fv_file_name)) {
  79                 int ret = ::remove(new_fv_file_name.c_str());
  80                 if (ret)
  81                         throw InvalidOperation("Fail to remove new feature vector file.");
  82         }
  83 }
  84
  85 void FaceRecognition::storeDataSet(unique_ptr<DataSetManager> &data_set, unsigned int label_cnt)
  86 {
  87         try {
  88                 unique_ptr<FeatureVectorManager> fvm = make_unique<FaceRecognitionFVM>(_config.feature_vector_file_path);
  89                 unique_ptr<FeatureVectorManager> fvm_new =
  90                                 make_unique<FaceRecognitionFVM>(_config.feature_vector_file_path + ".new");
  91
  92                 // Make sure feature vector file.
  93                 checkFeatureVectorFile(fvm->getFileName(), fvm_new->getFileName());
  94
  95                 // 1. Write feature vector and it's label index.
  96                 fvm_new->storeData(data_set->getData(), data_set->getLabelIdx());
  97
  98                 // 2. Write feature vector header.
  99                 fvm_new->writeHeader(data_set->getFeaVecSize(), label_cnt, data_set->getData().size());
 100
 101                 int ret = 0;
 102
 103                 // 3. Change new data file to existing one.
 104                 if (FaceRecogUtil::isFileExist(fvm->getFileName())) {
 105                         ret = ::remove(fvm->getFileName().c_str());
 106                         if (ret)
 107                                 throw InvalidOperation("Fail to remove feature vector file.");
 108                 }
 109
 110                 ret = ::rename(fvm_new->getFileName().c_str(), fvm->getFileName().c_str());
 111                 if (ret)
 112                         throw InvalidOperation("Fail to rename new feature vector file to original one.");
 113         } catch (const BaseException &e) {
 114                 LOGE("%s", e.what());
 115                 throw e;
 116         }
 117 }
 118
 119 void FaceRecognition::setConfig(FaceRecognitionConfig &config)
 120 {
 121         _config = config;
 122 }
 123
 124 std::vector<model_layer_info> &FaceRecognition::getBackboneInputLayerInfo()
 125 {
 126         return _backbone_model_info->getInputLayerInfo();
 127 }
 128
 129 int FaceRecognition::getVecFromMvSource(mv_source_h img_src, std::vector<float> &out_vec)
 130 {
 131         mv_colorspace_e colorspace = MEDIA_VISION_COLORSPACE_INVALID;
 132         unsigned int width = 0, height = 0, bufferSize = 0;
 133         unsigned char *buffer = NULL;
 134
 135         if (mv_source_get_width(img_src, &width) != MEDIA_VISION_ERROR_NONE ||
 136                 mv_source_get_height(img_src, &height) != MEDIA_VISION_ERROR_NONE ||
 137                 mv_source_get_colorspace(img_src, &colorspace) != MEDIA_VISION_ERROR_NONE ||
 138                 mv_source_get_buffer(img_src, &buffer, &bufferSize))
 139                 return MEDIA_VISION_ERROR_INVALID_PARAMETER;
 140
 141         // TODO. Let's support various color spaces.
 142
 143         if (colorspace != MEDIA_VISION_COLORSPACE_RGB888) {
 144                 LOGE("Not Supported format!\n");
 145                 return MEDIA_VISION_ERROR_NOT_SUPPORTED_FORMAT;
 146         }
 147
 148         vector<model_layer_info> &input_layer_info = getBackboneInputLayerInfo();
 149         // TODO. consider multiple tensor info.
 150         size_t re_width = input_layer_info[0].tensor_info.shape[0];
 151         size_t re_height = input_layer_info[0].tensor_info.shape[1];
 152
 153         LOGD("Convert mv source(WxH) : %d x %d => %zu x %zu", width, height, re_width, re_height);
 154
 155         FeatureVectorManager::getVecFromRGB(buffer, out_vec, width, height, re_width, re_height);
 156
 157         return MEDIA_VISION_ERROR_NONE;
 158 }
 159
 160 int FaceRecognition::initialize()
 161 {
 162         _training_model = make_unique<SimpleShot>(_config.training_engine_backend_type, _config.training_target_device_type,
 163                                                                                           _config.input_tensor_shape, _config.internal_model_file_path);
 164
 165         _internal = make_unique<Inference>();
 166
 167         int ret = _internal->bind(_config.inference_engine_backend_type, _config.inference_target_device_type);
 168         if (ret != MEDIA_VISION_ERROR_NONE)
 169                 return ret;
 170
 171         _status = INITIALIZED;
 172
 173         return MEDIA_VISION_ERROR_NONE;
 174 }
 175
 176 void FaceRecognition::importLabel()
 177 {
 178         try {
 179                 // Prepare can be called several times after initialization is done so previous data should be dropped.
 180                 _label_manager.reset();
 181                 _label_manager = make_unique<LabelManager>(_config.label_file_path, _config.decision_threshold);
 182
 183                 // Update label manager from a given label file.
 184                 int cnt = _label_manager->importLabel();
 185
 186                 LOGD("%d labels have been imported", cnt);
 187         } catch (const BaseException &e) {
 188                 LOGE("%s", e.what());
 189                 throw e;
 190         }
 191 }
 192
 193 int FaceRecognition::registerNewFace(std::vector<float> &input_vec, string label_name)
 194 {
 195         if (_status < INITIALIZED) {
 196                 LOGE("Initialization not ready yet. (%u)", _status);
 197                 return MEDIA_VISION_ERROR_INVALID_OPERATION;
 198         }
 199
 200         // TODO. consider data augmentation.
 201         try {
 202                 // Import label data from a label file.
 203                 importLabel();
 204
 205                 // 1. Store only label names to label file, which aren't duplicated.
 206                 bool duplicated = _label_manager->addLabelToMap(label_name, label_name);
 207                 if (!duplicated) {
 208                         int ret = _label_manager->addLabelToFile(label_name);
 209                         if (ret == 0)
 210                                 return MEDIA_VISION_ERROR_INVALID_OPERATION;
 211                 }
 212
 213                 // Get label index and count.
 214                 unsigned int label_idx = _label_manager->getLabelIndex(label_name);
 215                 unsigned int label_cnt = _label_manager->getMaxLabel();
 216
 217                 _training_model->configureModel(label_cnt);
 218
 219                 unique_ptr<DataSetManager> data_set = make_unique<FaceRecognitionDSM>();
 220
 221                 data_set->clear();
 222
 223                 // Load existing feature vectors if the feature vector file exists.
 224                 if (FaceRecogUtil::isFileExist(_config.feature_vector_file_path) == true) {
 225                         LOGI("feature vector file already exists so it loads the file first.");
 226                         data_set->loadDataSet(_config.feature_vector_file_path, label_cnt);
 227                 }
 228
 229                 // Add new feature vector only in case that feature vector count of given label_idx is less then 5.
 230                 // It means that only 5 set of feature vector per a label is valid.
 231                 // TODO. According to feature vector priority, new feature vector should be added.
 232                 if (data_set->isFeatureVectorAllowed(label_idx))
 233                         data_set->addDataSet(input_vec, label_idx, label_cnt);
 234
 235                 _training_model->applyDataSet(data_set);
 236                 _training_model->compile();
 237                 _training_model->train();
 238
 239                 // TODO. apply feature vector priority policy here.
 240                 // We can get weight trained from training engine.
 241                 // _training_model->getWeights(&weights, &size, "centroid_knn1");
 242
 243                 // Store dataset to feature vector file.
 244                 storeDataSet(data_set, label_cnt);
 245
 246                 // label_cnt can be changed every time the training is performed and all data set will be used for the training
 247                 // again in this case. So make sure to clear previous data set before next training.
 248                 _training_model->clearDataSet(data_set);
 249                 _status = REGISTERED;
 250         } catch (const BaseException &e) {
 251                 LOGE("%s", e.what());
 252                 return e.getError();
 253         }
 254
 255         return MEDIA_VISION_ERROR_NONE;
 256 }
 257
 258 int FaceRecognition::getAnswer()
 259 {
 260         int answer_idx;
 261
 262         string result_str;
 263
 264         try {
 265                 for (auto &r : _result.raw_data)
 266                         result_str += to_string(r) + " ";
 267
 268                 LOGD("raw data = %s", result_str.c_str());
 269
 270                 answer_idx = max_element(_result.raw_data.begin(), _result.raw_data.end()) - _result.raw_data.begin();
 271
 272                 // Check decision threshold.
 273                 if (_result.raw_data[answer_idx] < _label_manager->getDecisionThreshold()) {
 274                         throw NoData("Not meet decision threshold.");
 275                 }
 276
 277                 float weighted = _result.raw_data[answer_idx] * _label_manager->getDecisionWeight();
 278
 279                 // Check decision weight threshold.
 280                 for (auto &r : _result.raw_data) {
 281                         if (_result.raw_data[answer_idx] == r)
 282                                 continue;
 283
 284                         if (weighted < r)
 285                                 throw NoData("Not meet decision weight threshold");
 286                 }
 287
 288                 _result.label_idx = answer_idx;
 289                 _result.is_valid = true;
 290         } catch (const BaseException &e) {
 291                 LOGE("%s", e.what());
 292                 return e.getError();
 293         }
 294
 295         return MEDIA_VISION_ERROR_NONE;
 296 }
 297
 298 int FaceRecognition::recognizeFace(std::vector<float> &input_vec)
 299 {
 300         if (_status < INITIALIZED) {
 301                 LOGE("Initialization not ready yet.(%u)", _status);
 302                 return MEDIA_VISION_ERROR_INVALID_OPERATION;
 303         }
 304
 305         if (!FaceRecogUtil::isFileExist(_config.internal_model_file_path)) {
 306                 LOGE("Internal model file(%s) doesn't exist.", _config.internal_model_file_path.c_str());
 307                 return MEDIA_VISION_ERROR_INVALID_PATH;
 308         }
 309
 310         TrainingEngineBackendInfo engine_info = _training_model->getTrainingEngineInfo();
 311         vector<string> &input_layers = engine_info.input_layer_names;
 312         vector<inference_engine_tensor_info> &input_tensor_info = engine_info.input_tensor_info;
 313         vector<string> &output_layers = engine_info.output_layer_names;
 314         vector<inference_engine_tensor_info> &output_tensor_info = engine_info.output_tensor_info;
 315
 316         // Face Recognition has following steps
 317         // ------------------------------------
 318         // 1. Import label data to in-memory from a file.
 319         // 2. Load internal model.
 320         // 3. Do an inference with the internal model to get a label.
 321         // 4. Get the label best suitable to the output tensor and return the label.
 322
 323         try {
 324                 // Import label data from a label file.
 325                 importLabel();
 326
 327                 // Tensor order is NCHW.
 328                 size_t width = input_tensor_info[0].shape[0];
 329                 size_t height = input_tensor_info[0].shape[1];
 330                 size_t ch = input_tensor_info[0].shape[2];
 331
 332                 _internal->configureInputInfo(width, height, 1, ch, 1.0f, 0.0f, MV_INFERENCE_DATA_FLOAT32, input_layers);
 333                 // Output tensor size should be a number of labels so update it.
 334                 output_tensor_info[0].shape[0] = _label_manager->getMaxLabel();
 335                 _internal->configureOutputInfo(output_layers, output_tensor_info);
 336
 337                 _internal->configureModelFiles("", _config.internal_model_file_path, "");
 338
 339                 // Load the trained internal model.
 340                 int ret = _internal->load();
 341                 if (ret != INFERENCE_ENGINE_ERROR_NONE) {
 342                         LOGE("Fail to Load.");
 343                         return MEDIA_VISION_ERROR_INVALID_OPERATION;
 344                 }
 345
 346                 std::vector<std::vector<float> > input_tensors = { input_vec };
 347
 348                 // Do inference to the internal model.
 349                 ret = _internal->run(input_tensors);
 350                 if (ret != INFERENCE_ENGINE_ERROR_NONE) {
 351                         LOGE("fail to inference internal model.");
 352                         return MEDIA_VISION_ERROR_INVALID_OPERATION;
 353                 }
 354
 355                 // output layer size should be 1.
 356                 TensorBuffer tensorBuffer = _internal->getOutputTensorBuffer();
 357                 inference_engine_tensor_buffer *internal_output_buffer = tensorBuffer.getTensorBuffer(output_layers[0]);
 358                 if (!internal_output_buffer) {
 359                         LOGE("fail to get internal output tensor buffer.");
 360                         return MEDIA_VISION_ERROR_INVALID_PARAMETER;
 361                 }
 362
 363                 auto raw_buffer = static_cast<float *>(internal_output_buffer->buffer);
 364
 365                 _result.raw_data.clear();
 366                 copy(raw_buffer, raw_buffer + internal_output_buffer->size / sizeof(float), back_inserter(_result.raw_data));
 367                 _status = INFERENCED;
 368
 369                 return getAnswer();
 370         } catch (const BaseException &e) {
 371                 LOGE("%s", e.what());
 372                 return e.getError();
 373         }
 374
 375         return MEDIA_VISION_ERROR_INVALID_OPERATION;
 376 }
 377
 378 int FaceRecognition::deleteLabel(string label_name)
 379 {
 380         if (_status < INITIALIZED) {
 381                 LOGE("Initialization not ready yet.(%u)", _status);
 382                 return MEDIA_VISION_ERROR_INVALID_OPERATION;
 383         }
 384
 385         // Deleting a given label is to remove existing registered person from label and feature vector files.
 386
 387         try {
 388                 // Import label data from a label file.
 389                 importLabel();
 390
 391                 if (_label_manager->isExist(label_name) == false) {
 392                         LOGE("%s doesn't exist in label file.", label_name.c_str());
 393                         return MEDIA_VISION_ERROR_INVALID_OPERATION;
 394                 }
 395
 396                 unsigned int target_label_idx = _label_manager->getLabelIndex(label_name);
 397
 398                 auto label_cnt_ori = _label_manager->getMaxLabel();
 399
 400                 // Get label count after removing a given label from the label file.
 401                 _label_manager->removeLabel(label_name);
 402
 403                 auto label_cnt = _label_manager->getMaxLabel();
 404                 unique_ptr<FeatureVectorManager> fvm = make_unique<FaceRecognitionFVM>(_config.feature_vector_file_path);
 405                 unique_ptr<FeatureVectorManager> fvm_new =
 406                                 make_unique<FaceRecognitionFVM>(_config.feature_vector_file_path + ".new");
 407
 408                 // Make sure feature vector file.
 409                 checkFeatureVectorFile(fvm->getFileName(), fvm_new->getFileName());
 410
 411                 unique_ptr<DataSetManager> data_set = make_unique<FaceRecognitionDSM>();
 412
 413                 // feature vectors corresponding to given label aren't removed yet from feature vector file.
 414                 // So label_cnt_ori is needed.
 415                 data_set->loadDataSet(fvm->getFileName(), label_cnt_ori);
 416
 417                 vector<vector<float> > feature_vectors_old = data_set->getData();
 418                 vector<unsigned int> label_idx_vectors_old = data_set->getLabelIdx();
 419                 vector<vector<float> > feature_vectors_new;
 420                 vector<unsigned int> label_idx_vectors_new;
 421
 422                 size_t data_set_cnt = 0;
 423
 424                 // Write existing feature vectors and its one-hot encoding table with updated label.
 425                 for (unsigned int idx = 0; idx < feature_vectors_old.size(); ++idx) {
 426                         // Except the data sets with a given target_label_idx.
 427                         if (label_idx_vectors_old[idx] == target_label_idx)
 428                                 continue;
 429
 430                         // One-hot encoding table should be updated.
 431                         // Assume that below label file exists for example,
 432                         //     In label file
 433                         //     -------------
 434                         //     offset 0 : label 1
 435                         //     offset 1 : label 2
 436                         //     offset 2 : label 3
 437                         //
 438                         //     One hot encoding table should be updated like below after removing label 1,
 439                         //     In label file
 440                         //     -------------
 441                         //     offset 0 : label 2
 442                         //     offset 1 : label 3
 443                         //
 444                         // So if the index of removed label less than remaining index then decrease each index.
 445                         if (label_idx_vectors_old[idx] > target_label_idx)
 446                                 label_idx_vectors_old[idx]--;
 447
 448                         feature_vectors_new.push_back(feature_vectors_old[idx]);
 449                         label_idx_vectors_new.push_back(label_idx_vectors_old[idx]);
 450                         data_set_cnt++;
 451                 }
 452
 453                 // Retrain only in case that feature vectors exist.
 454                 if (data_set_cnt > 0) {
 455                         fvm_new->storeData(feature_vectors_new, label_idx_vectors_new);
 456                         fvm_new->writeHeader(feature_vectors_new[0].size(), label_cnt, data_set_cnt);
 457
 458                         int ret = 0;
 459
 460                         if (FaceRecogUtil::isFileExist(fvm->getFileName())) {
 461                                 // Change new data file to existing one.
 462                                 ret = ::remove(fvm->getFileName().c_str());
 463                                 if (ret)
 464                                         throw InvalidOperation("Fail to remove feature vector file.");
 465                         }
 466
 467                         ret = ::rename(fvm_new->getFileName().c_str(), fvm->getFileName().c_str());
 468                         if (ret)
 469                                 throw InvalidOperation("Fail to rename new feature vector file to original one.");
 470
 471                         _training_model->configureModel(label_cnt);
 472                         unique_ptr<DataSetManager> new_data_set = make_unique<FaceRecognitionDSM>();
 473                         new_data_set->clear();
 474
 475                         // TODO. Remove existing internal model file.
 476
 477                         new_data_set->loadDataSet(_config.feature_vector_file_path, label_cnt);
 478                         _training_model->applyDataSet(new_data_set);
 479                         _training_model->compile();
 480                         _training_model->train();
 481
 482                         // TODO. apply feature vector priority policy here.
 483                         // We can get weight trained from NNTrainer.
 484                         // _training_model->getWeights(&weights, &size, "centroid_knn1");
 485                 } else {
 486                         _training_model->removeModel();
 487                         fvm->remove();
 488                         _label_manager->removeFile();
 489
 490                         LOGD("No training data so removed all relevant files.");
 491                 }
 492
 493                 _status = DELETED;
 494         } catch (const BaseException &e) {
 495                 LOGE("%s", e.what());
 496                 return e.getError();
 497         }
 498
 499         return MEDIA_VISION_ERROR_NONE;
 500 }
 501
 502 int FaceRecognition::getLabel(const char **out_label)
 503 {
 504         if (_status != INFERENCED) {
 505                 LOGE("Inference not completed yet. (%d)", _status);
 506                 return MEDIA_VISION_ERROR_INVALID_OPERATION;
 507         }
 508
 509         try {
 510                 _label_manager->getLabelString(_result.label, _result.label_idx);
 511         } catch (const BaseException &e) {
 512                 LOGE("%s", e.what());
 513                 return e.getError();
 514         }
 515
 516         *out_label = _result.label.c_str();
 517
 518         return MEDIA_VISION_ERROR_NONE;
 519 }
 520
 521 FaceRecognitionResult &FaceRecognition::result()
 522 {
 523         if (!_result.is_valid)
 524                 throw NoData("Inference result not ready yet.");
 525
 526         importLabel();
 527
 528         if (!_label_manager)
 529                 throw NoData("Label file doesn't exist.");
 530
 531         try {
 532                 _label_manager->getLabelString(_result.label, _result.label_idx);
 533         } catch (const BaseException &e) {
 534                 LOGE("%s", e.what());
 535                 throw e;
 536         }
 537
 538         return _result;
 539 }
 540
 541 } // machine_learning
 542 } // mediavision