throw InvalidParameter("Invalid training engine backend type.");
}
-void FaceRecognition::UpdateDataSet(unique_ptr<DataSetManager> &data_set, vector<float> &feature_vec,
- const int label_idx, const int label_cnt)
+void FaceRecognition::StoreDataSet(unique_ptr<DataSetManager> &data_set, unsigned int label_cnt)
{
- size_t data_set_cnt = 0;
-
try {
auto fvm = CreateFVM(_config.training_engine_backend_type, _config.feature_vector_file_path);
auto fvm_new = CreateFVM(_config.training_engine_backend_type, _config.feature_vector_file_path + ".new");
// Make sure feature vector file.
CheckFeatureVectorFile(fvm, fvm_new);
- data_set = CreateDSM(_config.training_engine_backend_type);
-
- // 1. If data set file exists then load the file to DataSetManager object first
- // and then write them to the data set file with updated label value, and then
- // write a new dataset to the data set file.
- // Otherwise, it writes only new data set to the data set file.
- if (FaceRecogUtil::IsFileExist(fvm->GetFileName())) {
- data_set->LoadDataSet(fvm->GetFileName());
-
- vector<vector<float> > feature_vectors = data_set->GetData();
- vector<unsigned int> label_idx_vectors = data_set->GetLabelIdx();
-
- // 1) Write existing feature vectors and its one-hot encoding table considered
- // for new label count to the data set file.
- for (unsigned int idx = 0; idx < feature_vectors.size(); ++idx)
- fvm_new->WriteFeatureVec(feature_vectors[idx], label_cnt, label_idx_vectors[idx]);
-
- data_set_cnt += feature_vectors.size();
-
- // 2) If same feature vector isn't duplicated then write the feature vector to data set file.
- if (!data_set->IsFeatureVectorDuplicated(feature_vec)) {
- fvm_new->WriteFeatureVec(feature_vec, label_cnt, label_idx);
- LOGD("Added a new feature vector to data set file.");
- data_set_cnt++;
- }
- } else {
- // 1) Write only a new data set to the data st file.
- fvm_new->WriteFeatureVec(feature_vec, label_cnt, label_idx);
- LOGD("Added a new feature vector to data set file.");
- data_set_cnt++;
- }
+ // 1. Write feature vector and it's label index.
+ fvm_new->StoreData(data_set->GetData(), data_set->GetLabelIdx());
// 2. Write feature vector header.
- fvm_new->WriteHeader(feature_vec.size(), label_cnt, data_set_cnt);
+ fvm_new->WriteHeader(data_set->GetFeaVecSize(), label_cnt, data_set->GetData().size());
int ret = 0;
ret = ::rename(fvm_new->GetFileName().c_str(), fvm->GetFileName().c_str());
if (ret)
throw InvalidOperation("Fail to rename new feature vector file to original one.");
-
- data_set->Clear();
- data_set->LoadDataSet(fvm->GetFileName());
} catch (const BaseException &e) {
LOGE("%s", e.what());
throw e;
}
}
-void FaceRecognition::UpdateDataSet(unique_ptr<DataSetManager> &data_set)
-{
- try {
- data_set = CreateDSM(_config.training_engine_backend_type);
-
- auto fvm = CreateFVM(_config.training_engine_backend_type, _config.feature_vector_file_path);
-
- if (FaceRecogUtil::IsFileExist(fvm->GetFileName()) == false)
- throw InvalidOperation("Feature vector file not found.");
-
- data_set->LoadDataSet(fvm->GetFileName());
- } catch (const BaseException &e) {
- LOGE("%s", e.what());
- throw e;
- }
-}
void FaceRecognition::SetConfig(FaceRecognitionConfig &config)
{
copy(buffer, buffer + backbone_output_buffer->size / sizeof(float), back_inserter(feature_vec));
// Get label index and count.
- int label_idx = _label_manager->GetLabelIndex(label_name);
- int label_cnt = _label_manager->GetMaxLabel();
+ unsigned int label_idx = _label_manager->GetLabelIndex(label_name);
+ unsigned int label_cnt = _label_manager->GetMaxLabel();
_training_model->ConfigureModel(label_cnt);
- unique_ptr<DataSetManager> data_set;
+ unique_ptr<DataSetManager> data_set = CreateDSM(_config.training_engine_backend_type);
+
+ data_set->Clear();
+
+ // Load existing feature vectors if the feature vector file exists.
+ if (FaceRecogUtil::IsFileExist(_config.feature_vector_file_path) == true) {
+ LOGI("feature vector file already exists so it loads the file first.");
+ data_set->LoadDataSet(_config.feature_vector_file_path, label_cnt);
+ }
+
+ // Add new feature vectors.
+ data_set->AddDataSet(feature_vec, label_idx, label_cnt);
- UpdateDataSet(data_set, feature_vec, label_idx, label_cnt);
_training_model->ApplyDataSet(data_set);
_training_model->Compile();
_training_model->Train();
+ // TODO. apply feature vector priority policy here.
+ // We can get weight trained from NNTrainer.
+ // _training_model->getWeights(&weights, &size, "centroid_knn1");
+
+ // Store dataset to feature vector file.
+ StoreDataSet(data_set, label_cnt);
+
// label_cnt can be changed every time the training is performed and all data set will be used for the training
// again in this case. So make sure to clear previous data set before next training.
_training_model->ClearDataSet(data_set);
_result.raw_data.clear();
copy(raw_buffer, raw_buffer + internal_output_buffer->size / sizeof(float), back_inserter(_result.raw_data));
-
_status = INFERENCED;
return GetAnswer();
unsigned int target_label_idx = _label_manager->GetLabelIndex(label_name);
+ auto label_cnt_ori = _label_manager->GetMaxLabel();
+
// Get label count after removing a given label from the label file.
_label_manager->RemoveLabel(label_name);
auto data_set = CreateDSM(_config.training_engine_backend_type);
- data_set->LoadDataSet(fvm->GetFileName());
+ // feature vectors corresponding to given label aren't removed yet from feature vector file.
+ // So label_cnt_ori is needed.
+ data_set->LoadDataSet(fvm->GetFileName(), label_cnt_ori);
- vector<vector<float> > feature_vectors = data_set->GetData();
- vector<unsigned int> label_idx_vectors = data_set->GetLabelIdx();
+ vector<vector<float> > feature_vectors_old = data_set->GetData();
+ vector<unsigned int> label_idx_vectors_old = data_set->GetLabelIdx();
+ vector<vector<float> > feature_vectors_new;
+ vector<unsigned int> label_idx_vectors_new;
size_t data_set_cnt = 0;
// Write existing feature vectors and its one-hot encoding table with updated label.
- for (unsigned int idx = 0; idx < feature_vectors.size(); ++idx) {
+ for (unsigned int idx = 0; idx < feature_vectors_old.size(); ++idx) {
// Except the data sets with a given target_label_idx.
- if (label_idx_vectors[idx] == target_label_idx)
+ if (label_idx_vectors_old[idx] == target_label_idx)
continue;
// One-hot encoding table should be updated.
// offset 1 : label 3
//
// So if the index of removed label less than remaining index then decrease each index.
- if (label_idx_vectors[idx] > target_label_idx)
- label_idx_vectors[idx]--;
+ if (label_idx_vectors_old[idx] > target_label_idx)
+ label_idx_vectors_old[idx]--;
- fvm_new->WriteFeatureVec(feature_vectors[idx], label_cnt, label_idx_vectors[idx]);
+ feature_vectors_new.push_back(feature_vectors_old[idx]);
+ label_idx_vectors_new.push_back(label_idx_vectors_old[idx]);
data_set_cnt++;
}
- fvm_new->WriteHeader(feature_vectors[0].size(), label_cnt, data_set_cnt);
+ // Retrain only in case that feature vectors exist.
+ if (data_set_cnt > 0) {
+ fvm_new->StoreData(feature_vectors_new, label_idx_vectors_new);
+ fvm_new->WriteHeader(feature_vectors_new[0].size(), label_cnt, data_set_cnt);
- int ret = 0;
-
- if (FaceRecogUtil::IsFileExist(fvm->GetFileName())) {
- // Change new data file to existing one.
- ret = ::remove(fvm->GetFileName().c_str());
- if (ret)
- throw InvalidOperation("Fail to remove feature vector file.");
- }
+ int ret = 0;
- ret = ::rename(fvm_new->GetFileName().c_str(), fvm->GetFileName().c_str());
- if (ret)
- throw InvalidOperation("Fail to rename new feature vector file to original one.");
+ if (FaceRecogUtil::IsFileExist(fvm->GetFileName())) {
+ // Change new data file to existing one.
+ ret = ::remove(fvm->GetFileName().c_str());
+ if (ret)
+ throw InvalidOperation("Fail to remove feature vector file.");
+ }
- if (data_set_cnt == 0) {
- _training_model->RemoveModel();
- fvm->Remove();
- _label_manager->Remove();
+ ret = ::rename(fvm_new->GetFileName().c_str(), fvm->GetFileName().c_str());
+ if (ret)
+ throw InvalidOperation("Fail to rename new feature vector file to original one.");
- LOGD("No training data so removed all relevant files.");
- } else {
_training_model->ConfigureModel(label_cnt);
+ unique_ptr<DataSetManager> new_data_set = CreateDSM(_config.training_engine_backend_type);
+ new_data_set->Clear();
- unique_ptr<DataSetManager> new_data_set;
+ // TODO. Remove existing internal model file.
- UpdateDataSet(new_data_set);
+ new_data_set->LoadDataSet(_config.feature_vector_file_path, label_cnt);
_training_model->ApplyDataSet(new_data_set);
_training_model->Compile();
_training_model->Train();
+
+ // TODO. apply feature vector priority policy here.
+ // We can get weight trained from NNTrainer.
+ // _training_model->getWeights(&weights, &size, "centroid_knn1");
+ } else {
+ _training_model->RemoveModel();
+ fvm->Remove();
+ _label_manager->Remove();
+
+ LOGD("No training data so removed all relevant files.");
}
_status = DELETED;
{
LOGD("signature = %u", fvh.signature);
LOGD("feature vector size = %zu", fvh.feature_size);
- LOGD("one hot encoding table size = %zu", fvh.one_hot_table_size);
+ LOGD("label count = %zu", fvh.label_cnt);
LOGD("data set count = %u", fvh.data_set_cnt);
}
NNTrainerDSM::NNTrainerDSM() : DataSetManager()
{}
-void NNTrainerDSM::LoadDataSet(const string file_name)
+void NNTrainerDSM::AddDataSet(std::vector<float> &feature_vec, const unsigned int label_idx,
+ const unsigned int label_cnt)
+{
+ _data.push_back(feature_vec);
+ _label_index.push_back(label_idx);
+
+ vector<float> oneHotEncoding;
+
+ for (size_t num = 0; num < label_cnt; ++num)
+ oneHotEncoding.push_back(label_idx == num ? 1.0f : 0.0f);
+
+ _labels.push_back(oneHotEncoding);
+ _feature_vector_size = feature_vec.size();
+ _label_count = label_cnt;
+}
+
+void NNTrainerDSM::LoadDataSet(const string file_name, unsigned int new_label_cnt)
{
std::ifstream inFile(file_name);
if (FeatureVectorManager::feature_vector_signature != fvh.signature)
throw InvalidOperation("Wrong feature vector header.");
- size_t line_size_in_bytes = fvh.feature_size * sizeof(float) + fvh.one_hot_table_size * sizeof(float);
+ /*
+ * stride line format is as follows
+ * ********************************
+ * ____________________________
+ * |feature vector|label index|
+ * ----------------------------
+ */
+ size_t line_size_in_bytes = fvh.feature_size * sizeof(float) + sizeof(unsigned int);
_feature_vector_size = fvh.feature_size;
- _label_size = fvh.one_hot_table_size;
- _data_set_length = line_size_in_bytes;
+ _label_count = fvh.label_cnt;
- vector<float> line_data(fvh.feature_size + fvh.one_hot_table_size);
+ vector<float> line_data(fvh.feature_size + 1);
for (size_t idx = 0; idx < fvh.data_set_cnt; ++idx) {
- inFile.read((char *) line_data.data(), line_size_in_bytes);
+ inFile.read(reinterpret_cast<char *>(line_data.data()), line_size_in_bytes);
vector<float> data;
+
copy_n(line_data.begin(), _feature_vector_size, back_inserter(data));
_data.push_back(data);
- int label_idx = 0;
- vector<float> label;
+ unsigned int label_idx;
- for (size_t num = 0; num < fvh.one_hot_table_size; ++num) {
- if (line_data[fvh.feature_size + num] == 1.0f)
- label_idx = num;
+ memcpy(&label_idx, reinterpret_cast<void *>(line_data.data() + _feature_vector_size),
+ sizeof(unsigned int));
+
+ vector<float> label;
- label.push_back((float) line_data[fvh.feature_size + num]);
- }
+ // max label count may be changed so update one hot encoding table.
+ for (size_t num = 0; num < new_label_cnt; ++num)
+ label.push_back(label_idx == num ? 1.0f : 0.0f);
_labels.push_back(label);
_label_index.push_back(label_idx);