#include "data_augment_rotate.h"
typedef struct {
- std::string backbone_backend_name;
- inference_target_type_e backbone_target_device_type;
+ training_target_type_e training_target_device_type;
training_engine_backend_type_e training_engine_backend_type;
+ inference_target_type_e inference_target_device_type;
+ inference_backend_type_e inference_engine_backend_type;
+ inference_target_type_e backbone_target_device_type;
+ inference_backend_type_e backbone_engine_backend_type;
std::string backbone_model_file_path;
std::string internal_model_file_path;
std::string label_file_path;
std::vector<std::unique_ptr<DataAugment>> _data_augments;
// FYI. This function should be called every time a new face is registered.
- int Prepare();
+ void ImportLabel();
void CheckFeatureVectorFile(std::unique_ptr<FeatureVectorManager>& old_fvm, std::unique_ptr<FeatureVectorManager>& new_fvm);
std::unique_ptr<DataSetManager> CreateDSM(const training_engine_backend_type_e backend_type);
std::unique_ptr<FeatureVectorManager> CreateFVM(const training_engine_backend_type_e backend_type, std::string file_name);
protected:
bool _initialized;
- bool _prepared;
std::unique_ptr<InferenceEngineHelper> _internal;
std::unique_ptr<InferenceEngineHelper> _backbone;
std::unique_ptr<FaceNetInfo> _face_net_info;
public:
SimpleShot(const training_engine_backend_type_e backend_type = TRAINING_ENGINE_BACKEND_NNTRAINER,
+ const training_target_type_e target_type = TRAINING_TARGET_CPU,
const std::string internal_model_file = "model_and_weights.ini");
~SimpleShot();
using namespace Mediavision::MachineLearning::Exception;
FaceRecognition::FaceRecognition() :
- _initialized(false), _prepared(false), _internal(), _backbone(), _face_net_info(), _training_model(), _label_manager()
+ _initialized(false), _internal(), _backbone(), _face_net_info(), _training_model(), _label_manager()
{
_data_augments.push_back(std::make_unique<DataAugmentDefault>());
/* Add other data argument classes. */
}
// Initialize inference engine object for backbone model.
- _backbone = make_unique<InferenceEngineHelper>(_config.backbone_backend_name, _config.backbone_target_device_type);
+ _backbone = make_unique<InferenceEngineHelper>(_config.backbone_engine_backend_type,
+ _config.backbone_target_device_type);
vector<string> input_layer_names, output_layer_names;
vector<inference_engine_tensor_info> input_tensor_info, output_tensor_info;
_backbone->Load(_face_net_info->GetModelFilePath());
+ _training_model = make_unique<SimpleShot>(_config.training_engine_backend_type,
+ _config.training_target_device_type,
+ _config.internal_model_file_path);
+
+ _internal = make_unique<InferenceEngineHelper>(_config.inference_engine_backend_type,
+ _config.inference_target_device_type);
+
_initialized = true;
return MEDIA_VISION_ERROR_NONE;
}
-int FaceRecognition::Prepare()
+void FaceRecognition::ImportLabel()
{
- if (!_initialized) {
- LOGE("Initialization not ready yet.");
- return MEDIA_VISION_ERROR_INVALID_OPERATION;
- }
-
try {
// Prepare can be called several times after initialization is done so previous data should be dropped.
- _training_model.reset();
_label_manager.reset();
- _internal.reset();
-
- _training_model = make_unique<SimpleShot>(_config.training_engine_backend_type, _config.internal_model_file_path);
_label_manager = make_unique<LabelManager>(_config.label_file_path, _config.decision_threshold);
// Update label manager from a given label file.
int cnt = _label_manager->ImportLabel();
LOGD("%d labels have been imported", cnt);
-
- TrainingEngineBackendInfo engine_info = _training_model->GetTrainingEngineInfo();
-
- _internal = make_unique<InferenceEngineHelper>(engine_info.backend_name, engine_info.target_device);
} catch (const BaseException& e) {
LOGE("%s", e.what());
- return e.getError();
+ throw e;
}
-
- _prepared = true;
-
- return MEDIA_VISION_ERROR_NONE;
}
int FaceRecognition::RegisterNewFace(mv_source_h img_src, string label_name)
vector<model_layer_info>& output_layer_info = _face_net_info->GetOutputLayerInfo();
vector<float> in_vec;
+ if (!_initialized) {
+ LOGE("Initialization not ready yet.");
+ return MEDIA_VISION_ERROR_INVALID_OPERATION;
+ }
+
int ret = GetVecFromMvSource(img_src, in_vec);
if (ret != MEDIA_VISION_ERROR_NONE) {
LOGE("Fail to get tensor vector from mv_source.(%d)", ret);
}
for (auto& data_augment : _data_augments) {
- ret = Prepare();
- if (ret != MEDIA_VISION_ERROR_NONE) {
- LOGE("Fail to prepare face recognition.");
- return ret;
- }
-
try {
+ // Import label data from a label file.
+ ImportLabel();
+
// 1. Store only label names to label file, which aren't duplicated.
bool duplicated = _label_manager->AddLabelToMap(label_name, label_name);
if (!duplicated) {
_training_model->ApplyDataSet(data_set);
_training_model->Compile();
_training_model->Train();
+
+ // label_cnt can be changed every time the training is performed and all data set will be used for the training
+ // again in this case. So make sure to clear previous data set before next training.
+ _training_model->ClearDataSet(data_set);
} catch (const BaseException& e) {
LOGE("%s", e.what());
return e.getError();
int FaceRecognition::RecognizeFace(mv_source_h img_src, vector<float>& out_vec, unsigned int *out_idx)
{
- int ret = Prepare();
- if (ret != MEDIA_VISION_ERROR_NONE) {
- LOGE("Fail to prepare face recognition.");
- return ret;
+ if (!_initialized) {
+ LOGE("Initialization not ready yet.");
+ return MEDIA_VISION_ERROR_INVALID_OPERATION;
}
TrainingEngineBackendInfo engine_info = _training_model->GetTrainingEngineInfo();
vector<inference_engine_tensor_info>& output_tensor_info = engine_info.output_tensor_info;
vector<float> in_vec;
- ret = GetVecFromMvSource(img_src, in_vec);
+ int ret = GetVecFromMvSource(img_src, in_vec);
if (ret != MEDIA_VISION_ERROR_NONE) {
LOGE("Fail to get tensor vector from mv_source.(%d)", ret);
return ret;
}
try {
+ // Import label data from a label file.
+ ImportLabel();
+
if (_face_net_info->GetInputLayerInfo().empty() || _face_net_info->GetInputLayerInfo().size() > 1) {
LOGE("Invalid input layer size - input layer size should be 1.");
return MEDIA_VISION_ERROR_INVALID_PARAMETER;
}
FaceRecognitionConfig config = {
- "tflite",
- INFERENCE_TARGET_CPU,
+ TRAINING_TARGET_CPU,
TRAINING_ENGINE_BACKEND_NNTRAINER,
+ INFERENCE_TARGET_CPU,
+ INFERENCE_BACKEND_NNTRAINER,
+ INFERENCE_TARGET_CPU,
+ INFERENCE_BACKEND_TFLITE,
backboneModelFilePath,
string(defaultPath) + "model_and_weights.ini",
string(defaultPath) + "labels.dat",
using namespace TrainingEngineInterface::Common;
using namespace Mediavision::MachineLearning::Exception;
-SimpleShot::SimpleShot(const training_engine_backend_type_e backend_type, const string internal_model_file) :
- TrainingModel(backend_type, internal_model_file)
+SimpleShot::SimpleShot(const training_engine_backend_type_e backend_type,
+ const training_target_type_e target_type,
+ const string internal_model_file) :
+ TrainingModel(backend_type, target_type, internal_model_file)
{
- map<int, string>::iterator item = _backend_lookup.find(backend_type);
- if (item == _backend_lookup.end())
- throw InvalidParameter("Invalid training engine backend type.");
-
- _engine_info.backend_name = item->second;
- _engine_info.target_device = INFERENCE_TARGET_CPU;
+ _engine_info.backend_type = backend_type;
+ // TODO. training engine interface has no target type attribute yet.
+ // Add target type to training_engine_config and consider setting this type
+ // to a given training engine backend.
+ _engine_info.target_device = target_type;
const inference_engine_tensor_info nntrainer_input_tensor_info = {
{ 192, 1, 1, 1 },
void SimpleShot::ConfigureModel(int num_of_class)
{
- training_engine_config config = { _engine_info.backend_name };
- int ret = _training->BindBackend(config);
- if (ret != TRAINING_ENGINE_ERROR_NONE)
- throw InvalidOperation("Fail to bind backend engine.");
-
- training_engine_capacity capacity = { TRAINING_TENSOR_SHAPE_MIN };
- ret = _training->GetBackendCapacity(capacity);
- if (ret != TRAINING_ENGINE_ERROR_NONE)
- throw InvalidOperation("Fail to get backend capacity.");
+ // TODO. Check the capacity.
_model = _training->CreateModel();
if (!_model)
// Ps. In case of the first layer, input_shape property is mandatorily required.
// 1:192 is a shape of backbone model output tensor.
training_engine_layer_property l2norm_property = { .options = { "input_shape=1:192", "trainable=false" } };
- ret = _training->SetLayerProperty(l2norm.get(), l2norm_property);
+
+ int ret = _training->SetLayerProperty(l2norm.get(), l2norm_property);
if (ret != TRAINING_ENGINE_ERROR_NONE)
throw InvalidOperation("Fail to set layer propery.");
void CleanupTensorBuffers(IETensorBuffer &inputs, IETensorBuffer &outputs);
public:
- InferenceEngineHelper(std::string backend_name, int target_device);
+ InferenceEngineHelper(int backend_type, int target_device);
~InferenceEngineHelper();
int Load(std::string backbone_path);
int UpdateLayerInfo(const std::vector<std::string>& input_layers,
{ "onnx", INFERENCE_MODEL_ONNX }, {"ini", INFERENCE_MODEL_NNTRAINER }
};
-InferenceEngineHelper::InferenceEngineHelper(string backend_name, int target_device)
+InferenceEngineHelper::InferenceEngineHelper(int backend_type, int target_device)
{
_engine.reset();
_engine = make_unique<InferenceEngineCommon>();
- inference_engine_config config = { backend_name, -1, target_device };
+ inference_engine_config config = { "", backend_type, target_device };
int ret = _engine->BindBackend(&config);
if (ret != INFERENCE_ENGINE_ERROR_NONE)
#include "feature_vector_manager.h"
typedef struct {
- std::string backend_name;
+ int backend_type;
unsigned int target_device;
std::vector<std::string> input_layer_names;
std::vector<std::string> output_layer_names;
std::unique_ptr<TrainingEngineInterface::Common::TrainingEngineCommon> _training;
std::unique_ptr<training_engine_model> _model;
std::unique_ptr<training_engine_dataset> _data_set;
- std::map<int, std::string> _backend_lookup;
std::string _internal_model_file;
public:
TrainingModel(const training_engine_backend_type_e backend_type = TRAINING_ENGINE_BACKEND_NNTRAINER,
+ const training_target_type_e target_type = TRAINING_TARGET_CPU,
const std::string internal_model_file = "model_and_weights.ini");
virtual ~TrainingModel();
void ApplyDataSet(std::unique_ptr<DataSetManager>& data_set);
+ void ClearDataSet(std::unique_ptr<DataSetManager>& data_set);
void Compile();
void Train();
using namespace TrainingEngineInterface::Common;
using namespace Mediavision::MachineLearning::Exception;
-TrainingModel::TrainingModel(const training_engine_backend_type_e backend_type, const string internal_model_file)
+TrainingModel::TrainingModel(const training_engine_backend_type_e backend_type,
+ const training_target_type_e target_type,
+ const string internal_model_file)
{
_internal_model_file = internal_model_file;
-
- _backend_lookup.insert(make_pair<int, string>(TRAINING_ENGINE_BACKEND_NNTRAINER, "nntrainer"));
-
_training = make_unique<TrainingEngineInterface::Common::TrainingEngineCommon>();
+ // TODO. Use backend type instead of backend name, and then set the backend type later.
+ training_engine_config config = { "", TRAINING_BACKEND_NNTRAINER, TRAINING_TARGET_CPU };
+ int ret = _training->BindBackend(&config);
+ if (ret != TRAINING_ENGINE_ERROR_NONE)
+ throw InvalidOperation("Fail to bind backend engine.");
+
+ training_engine_capacity capacity = { TRAINING_TENSOR_SHAPE_MIN };
+ ret = _training->GetBackendCapacity(capacity);
+ if (ret != TRAINING_ENGINE_ERROR_NONE)
+ throw InvalidOperation("Fail to get backend capacity.");
}
+
TrainingModel::~ TrainingModel()
{
if (_training)
throw InvalidOperation("Fail to set dataset to model.", ret);
}
+void TrainingModel::ClearDataSet(unique_ptr<DataSetManager>& data_set)
+{
+ data_set->Clear();
+ _training->DestroyDataset(_data_set.get());
+}
+
void TrainingModel::Compile()
{
auto optimizer = _training->CreateOptimizer(TRAINING_OPTIMIZER_TYPE_SGD);
Name: capi-media-vision
Summary: Media Vision library for Tizen Native API
-Version: 0.21.3
+Version: 0.21.4
Release: 0
Group: Multimedia/Framework
License: Apache-2.0 and BSD-3-Clause