cmake_minimum_required(VERSION 2.6...3.13)
pkg_check_modules(${PROJECT_NAME}_DEP REQUIRED inference-engine-interface-common training-engine-interface-common)
-file(GLOB MV_FACE_RECOG_SOURCE_LIST "${PROJECT_SOURCE_DIR}/src/*.c" "${PROJECT_SOURCE_DIR}/src/*.cpp")
+file(GLOB MV_FACE_RECOG_SOURCE_LIST "${PROJECT_SOURCE_DIR}/src/*.c" "${PROJECT_SOURCE_DIR}/src/*.cpp" "${PROJECT_SOURCE_DIR}/../meta/src/*.cpp")
find_package(OpenCV REQUIRED dnn imgproc)
if(NOT OpenCV_FOUND)
+++ /dev/null
-/**
- * Copyright (c) 2022 Samsung Electronics Co., Ltd All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __FACE_NET_INFO_H__
-#define __FACE_NET_INFO_H__
-
-#include "backbone_model_info.h"
-
-class FaceNetInfo : public IBackboneModelInfo
-{
-private:
- std::vector<model_layer_info> _input_layer_info;
- std::vector<model_layer_info> _output_layer_info;
- std::string _model_file_path;
-
-public:
- FaceNetInfo(std::string model_file_path);
- ~FaceNetInfo();
-
- std::vector<model_layer_info> &GetInputLayerInfo() override;
- std::vector<model_layer_info> &GetOutputLayerInfo() override;
- std::string GetModelFilePath() override;
-};
-
-#endif
\ No newline at end of file
} // face_recognition
-typedef struct {
+struct face_recognition_register_input_s {
+ std::vector<float> data;
+ std::string label;
+};
+
+struct mv_face_recognition_input_s {
unsigned int mode;
- std::unordered_map<mv_source_h, std::string> register_src;
- mv_source_h inference_src;
+ std::vector<face_recognition_register_input_s> register_src;
+ std::vector<std::vector<float>> inputs;
std::vector<std::string> labels;
-} mv_face_recognition_input_s;
+};
/**
* @brief The face recognition result structure.
* @details Contains face recognition result such as label, label index, raw data,
* and raw data count.
*/
-typedef struct {
+struct mv_face_recognition_result_s {
unsigned int label_idx; /**< label index of label file. */
std::vector<float> raw_data; /**< raw data to each label. */
std::string label; /**< label string. */
-} mv_face_recognition_result_s;
+};
-typedef struct {
+struct FaceRecognitionConfig {
mv_inference_target_device_e training_target_device_type;
mv_inference_backend_type_e training_engine_backend_type;
mv_inference_target_device_e inference_target_device_type;
mv_inference_backend_type_e inference_engine_backend_type;
- mv_inference_target_device_e backbone_target_device_type;
- mv_inference_backend_type_e backbone_engine_backend_type;
- std::string backbone_model_file_path;
std::string internal_model_file_path;
std::string label_file_path;
std::string feature_vector_file_path;
double decision_threshold;
-} FaceRecognitionConfig;
+};
class FaceRecognition
{
int Initialize();
void SetConfig(FaceRecognitionConfig &config);
- int RegisterNewFace(mv_source_h img_src, std::string label_name);
- int RecognizeFace(mv_source_h img_src);
+ int RegisterNewFace(std::vector<float> &input_vec, std::string label_name);
+ int RecognizeFace(std::vector<float> &input_vec);
int DeleteLabel(std::string label_name);
int GetLabel(const char **out_label);
mv_face_recognition_result_s &GetResult();
#include "itask.h"
#include "face_recognition.h"
-/**
- * @brief Defines #MV_FACE_RECOGNITION_BACKBONE_MODEL_FILE_PATH
- * to set the backbone model file path.
- * @details This model file is used to extract the feature vectors from a given face image data.
- *
- * @since_tizen 7.0
- * @see mv_engine_config_set_string_attribute()
- * @see mv_engine_config_get_string_attribute()
- */
-#define MV_FACE_RECOGNITION_BACKBONE_MODEL_FILE_PATH "MV_FACE_RECOGNITION_BACKBONE_MODEL_FILE_PATH"
-
-/**
- * @brief Defines #MV_FACE_RECOGNITION_DEFAULT_PATH
- * to set the path where the training relevant files are created.
- * @details This path is used as a default location where the trained model, label and feature vector files are created.
- *
- * @since_tizen 7.0
- * @see mv_engine_config_set_string_attribute()
- * @see mv_engine_config_get_string_attribute()
- */
-#define MV_FACE_RECOGNITION_DEFAULT_PATH "MV_FACE_RECOGNITION_DEFAULT_PATH"
-
-/**
- * @brief Defines #MV_FACE_RECOGNITION_DECISION_THRESHOLD
- * to set the decision threshold file+.
- * @details This file is used to determine face recognition result with a given face image data is true or false..
- *
- * @since_tizen 7.0
- * @see mv_engine_config_set_string_attribute()
- * @see mv_engine_config_get_string_attribute()
- */
-#define MV_FACE_RECOGNITION_DECISION_THRESHOLD "MV_FACE_RECOGNITION_DECISION_THRESHOLD"
-
namespace mediavision
{
namespace machine_learning
{
private:
std::unique_ptr<FaceRecognition> _face_recognition;
- mv_face_recognition_input_s _source;
+ T _source {};
std::unique_ptr<MediaVision::Common::EngineConfig> _config;
public:
--- /dev/null
+/**
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __FACE_RECOGNITION_TYPE_H__
+#define __FACE_RECOGNITION_TYPE_H__
+
+#include <vector>
+
+/**
+ * @brief Defines #MV_FACENET_MODEL_FILE_PATH
+ * to set the backbone model file path.
+ * @details This model file is used to extract the feature vectors from a given face image data.
+ *
+ * @since_tizen 7.0
+ * @see mv_engine_config_set_string_attribute()
+ * @see mv_engine_config_get_string_attribute()
+ */
+#define MV_FACENET_MODEL_FILE_PATH "FACENET_MODEL_FILE_PATH"
+
+/**
+ * @brief Defines #MV_FACENET_MODEL_META_FILE_PATH
+ * to set the backbone model meta file path.
+ * @details This model meta file is used to provide input and output tensor info of a given model file.
+ *
+ * @since_tizen 7.0
+ * @see mv_engine_config_set_string_attribute()
+ * @see mv_engine_config_get_string_attribute()
+ */
+#define MV_FACENET_MODEL_META_FILE_PATH "FACENET_MODEL_META_FILE_PATH"
+
+#define MV_FACENET_OUTPUT_TENSOR_NAME "FACENET_OUTPUT_TENSOR_NAME"
+
+/**
+ * @brief Defines #MV_FACE_RECOGNITION_DEFAULT_PATH
+ * to set the path where the training relevant files are created.
+ * @details This path is used as a default location where the trained model, label and feature vector files are created.
+ *
+ * @since_tizen 7.0
+ * @see mv_engine_config_set_string_attribute()
+ * @see mv_engine_config_get_string_attribute()
+ */
+#define MV_FACE_RECOGNITION_DEFAULT_PATH "FACE_RECOGNITION_DEFAULT_PATH"
+
+#define MV_FACENET_BACKEND_TYPE "FACENET_MODEL_BACKEND_TYPE"
+
+#define MV_FACENET_TARGET_DEVICE_TYPE "FACENET_MODEL_TARGET_DEVICE_TYPE"
+
+#define FACE_RECOGNITION_META_FILE_NAME "face_recognition.json"
+
+/**
+ * @brief Defines #MV_FACE_RECOGNITION_DECISION_THRESHOLD
+ * to set the decision threshold file+.
+ * @details This file is used to determine face recognition result with a given face image data is true or false..
+ *
+ * @since_tizen 7.0
+ * @see mv_engine_config_set_string_attribute()
+ * @see mv_engine_config_get_string_attribute()
+ */
+#define MV_FACE_RECOGNITION_DECISION_THRESHOLD "FACE_RECOGNITION_DECISION_THRESHOLD"
+
+struct facenet_input_s {
+ std::vector<mv_source_h> inputs;
+};
+
+struct facenet_output_s {
+ std::vector<std::vector<float>> outputs;
+};
+
+#endif
\ No newline at end of file
--- /dev/null
+/**
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __FACENET_H__
+#define __FACENET_H__
+
+#include <mv_common.h>
+#include <mv_inference_type.h>
+#include "mv_private.h"
+
+#include "EngineConfig.h"
+#include "inference_engine_common_impl.h"
+#include "Inference.h"
+#include "facenet_parser.h"
+#include "face_recognition_type.h"
+#include "Preprocess.h"
+
+namespace mediavision
+{
+namespace machine_learning
+{
+
+class Facenet
+{
+protected:
+ std::unique_ptr<mediavision::inference::Inference> _inference;
+ std::unique_ptr<MediaVision::Common::EngineConfig> _config;
+ std::unique_ptr<MetaParser> _parser;
+ facenet_output_s _result;
+ inference_engine_tensor_buffer *_outputTensorBuffer;
+ Preprocess _preprocess;
+ std::string _modelFilePath;
+ std::string _modelMetaFilePath;
+ std::string _facenetOutputTensorName;
+ int _backendType;
+ int _targetDeviceType;
+
+public:
+ Facenet();
+ virtual ~Facenet() = default;
+ void parseMetaFile();
+ void configure();
+ void prepare();
+ void preprocess(mv_source_h &mv_src);
+ void inference(mv_source_h source);
+ facenet_output_s &getResult();
+};
+
+} // machine_learning
+} // mediavision
+
+#endif
\ No newline at end of file
--- /dev/null
+/**
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __FACENET_ADAPTER_H__
+#define __FACENET_ADAPTER_H__
+
+#include <dlog.h>
+
+#include "EngineConfig.h"
+#include "itask.h"
+#include "facenet.h"
+
+namespace mediavision
+{
+namespace machine_learning
+{
+
+template<typename T, typename V> class FacenetAdapter : public mediavision::common::ITask<T, V>
+{
+private:
+ std::unique_ptr<Facenet> _facenet;
+ T _source;
+
+public:
+ FacenetAdapter();
+ ~FacenetAdapter();
+
+ void create(int type) override;
+
+ void configure() override;
+ void prepare() override;
+ void setInput(T &t) override;
+ void perform() override;
+ V &getOutput() override;
+};
+
+} // machine_learning
+} // mediavision
+
+#endif
\ No newline at end of file
--- /dev/null
+/**
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __FACENET_PARSER_H__
+#define __FACENET_PARSER_H__
+
+#include "MetaParser.h"
+#include "PostprocessParser.h"
+
+namespace mediavision
+{
+namespace machine_learning
+{
+class FacenetParser : public MetaParser
+{
+private:
+ PostprocessParser _postprocessParser;
+
+protected:
+ void parsePostprocess(std::shared_ptr<MetaInfo> meta_info, JsonObject *in_obj) override;
+
+public:
+ FacenetParser();
+ ~FacenetParser();
+};
+
+}
+}
+
+#endif
\ No newline at end of file
"attributes":
[
{
- "name" : "MV_FACE_RECOGNITION_BACKBONE_MODEL_FILE_PATH",
+ "name" : "FACENET_MODEL_FILE_PATH",
"type" : "string",
"value" : "/home/owner/media/res/face_recognition/backbone/facenet.tflite"
},
{
- "name" : "MV_FACE_RECOGNITION_DEFAULT_PATH",
+ "name" : "FACENET_MODEL_META_FILE_PATH",
+ "type" : "string",
+ "value" : "/home/owner/media/res/face_recognition/backbone/facenet.json"
+ },
+ {
+ "name" : "FACE_RECOGNITION_DEFAULT_PATH",
"type" : "string",
"value" : "/home/owner/media/res/face_recognition/training/"
},
{
- "name" : "MV_FACE_RECOGNITION_DECISION_THRESHOLD",
+ "name" : "FACENET_MODEL_BACKEND_TYPE",
+ "type" : "integer",
+ "value" : 1
+ },
+ {
+ "name" : "FACENET_MODEL_TARGET_DEVICE_TYPE",
+ "type" : "integer",
+ "value" : 1
+ },
+ {
+ "name" : "FACENET_OUTPUT_TENSOR_NAME",
+ "type" : "string",
+ "value" : "normalize/l2_normalize"
+ },
+ {
+ "name" : "FACE_RECOGNITION_DECISION_THRESHOLD",
"type" : "double",
"value" : -0.85
}
+++ /dev/null
-/**
- * Copyright (c) 2022 Samsung Electronics Co., Ltd All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "face_net_info.h"
-
-using namespace std;
-
-FaceNetInfo::FaceNetInfo(string model_file_path)
-{
- _model_file_path = model_file_path;
-
- const string input_layer_name = { "input_1" };
- const inference_engine_tensor_info input_tensor_info = {
- { 160, 160, 3, 1 }, INFERENCE_TENSOR_SHAPE_NCHW, INFERENCE_TENSOR_DATA_TYPE_FLOAT32, (size_t)(1 * 3 * 160 * 160)
- };
-
- model_layer_info input_info = { input_layer_name, input_tensor_info };
- _input_layer_info.push_back(input_info);
-
- const string output_layer_name = { "normalize/l2_normalize" };
- const inference_engine_tensor_info output_tensor_info = {
- { 512, 1, 1, 1 }, INFERENCE_TENSOR_SHAPE_NCHW, INFERENCE_TENSOR_DATA_TYPE_FLOAT32, (size_t)(1 * 512)
- };
-
- model_layer_info output_info = { output_layer_name, output_tensor_info };
- _output_layer_info.push_back(output_info);
-}
-
-FaceNetInfo::~FaceNetInfo()
-{
- _input_layer_info.clear();
- _output_layer_info.clear();
-}
-
-string FaceNetInfo::GetModelFilePath()
-{
- return _model_file_path;
-}
-
-vector<model_layer_info> &FaceNetInfo::GetInputLayerInfo()
-{
- return _input_layer_info;
-}
-
-vector<model_layer_info> &FaceNetInfo::GetOutputLayerInfo()
-{
- return _output_layer_info;
-}
\ No newline at end of file
#include "face_recognition.h"
#include "nntrainer_fvm.h"
#include "nntrainer_dsm.h"
-#include "face_net_info.h"
#include "file_util.h"
using namespace std;
int FaceRecognition::Initialize()
{
- _backbone_model_info = make_unique<FaceNetInfo>(_config.backbone_model_file_path);
-
- if (_backbone_model_info->GetInputLayerInfo().empty() || _backbone_model_info->GetInputLayerInfo().size() > 1) {
- LOGE("Invalid input layer size - input layer size should be 1.");
- return MEDIA_VISION_ERROR_INVALID_PARAMETER;
- }
-
- if (_backbone_model_info->GetOutputLayerInfo().empty() || _backbone_model_info->GetOutputLayerInfo().size() > 1) {
- LOGE("Invalid output layer size - output layer size should be 1.");
- return MEDIA_VISION_ERROR_INVALID_PARAMETER;
- }
-
- vector<string> input_layer_names, output_layer_names;
-
- for (auto &input : _backbone_model_info->GetInputLayerInfo())
- input_layer_names.push_back(input.layer_name);
-
- for (auto &output : _backbone_model_info->GetOutputLayerInfo())
- output_layer_names.push_back(output.layer_name);
-
- // Initialize inference engine object for backbone model.
- _backbone = make_unique<Inference>();
-
- int ret = _backbone->Bind(_config.backbone_engine_backend_type, _config.backbone_target_device_type);
- if (ret != MEDIA_VISION_ERROR_NONE)
- return ret;
-
- // Tensor order is NCHW.
- vector<model_layer_info> &input_layer_info = GetBackboneInputLayerInfo();
- size_t width = input_layer_info[0].tensor_info.shape[0];
- size_t height = input_layer_info[0].tensor_info.shape[1];
- size_t ch = input_layer_info[0].tensor_info.shape[2];
- vector<inference_engine_tensor_info> output_tensor_info;
-
- _backbone->ConfigureInputInfo(width, height, 1, ch, 127.5f, 127.5f, MV_INFERENCE_DATA_FLOAT32, input_layer_names);
- _backbone->ConfigureOutputInfo(output_layer_names, output_tensor_info);
- _backbone->ConfigureModelFiles("", _backbone_model_info->GetModelFilePath(), "");
-
- ret = _backbone->Load();
- if (ret != MEDIA_VISION_ERROR_NONE)
- return ret;
-
_training_model = make_unique<SimpleShot>(_config.training_engine_backend_type, _config.training_target_device_type,
_config.internal_model_file_path);
_internal = make_unique<Inference>();
- ret = _internal->Bind(_config.inference_engine_backend_type, _config.inference_target_device_type);
+ int ret = _internal->Bind(_config.inference_engine_backend_type, _config.inference_target_device_type);
if (ret != MEDIA_VISION_ERROR_NONE)
return ret;
}
}
-int FaceRecognition::RegisterNewFace(mv_source_h img_src, string label_name)
+int FaceRecognition::RegisterNewFace(std::vector<float> &input_vec, string label_name)
{
- vector<model_layer_info> &output_layer_info = _backbone_model_info->GetOutputLayerInfo();
-
if (_status < INITIALIZED) {
LOGE("Initialization not ready yet. (%u)", _status);
return MEDIA_VISION_ERROR_INVALID_OPERATION;
return MEDIA_VISION_ERROR_INVALID_OPERATION;
}
- std::vector<mv_source_h> backbone_sources { img_src };
- std::vector<mv_rectangle_s> rects;
-
- int ret = _backbone->Run(backbone_sources, rects);
- if (ret != INFERENCE_ENGINE_ERROR_NONE) {
- LOGE("fail to inference backbone model.");
- return MEDIA_VISION_ERROR_INVALID_OPERATION;
- }
-
- // 2. Get feature vector from a given vec through inference engine.
- // Ps. output layer size should be 1.
- TensorBuffer tensorBuffer = _backbone->GetOutputTensorBuffer();
- inference_engine_tensor_buffer *backbone_output_buffer =
- tensorBuffer.getTensorBuffer(output_layer_info[0].layer_name);
- if (!backbone_output_buffer) {
- LOGE("fail to get output tensor buffer.");
- return MEDIA_VISION_ERROR_INVALID_PARAMETER;
- }
-
- vector<float> feature_vec;
- auto buffer = static_cast<float *>(backbone_output_buffer->buffer);
-
- copy(buffer, buffer + backbone_output_buffer->size / sizeof(float), back_inserter(feature_vec));
-
// Get label index and count.
unsigned int label_idx = _label_manager->GetLabelIndex(label_name);
unsigned int label_cnt = _label_manager->GetMaxLabel();
}
// Add new feature vectors.
- data_set->AddDataSet(feature_vec, label_idx, label_cnt);
+ data_set->AddDataSet(input_vec, label_idx, label_cnt);
_training_model->ApplyDataSet(data_set);
_training_model->Compile();
return MEDIA_VISION_ERROR_NONE;
}
-int FaceRecognition::RecognizeFace(mv_source_h img_src)
+int FaceRecognition::RecognizeFace(std::vector<float> &input_vec)
{
if (_status < INITIALIZED) {
LOGE("Initialization not ready yet.(%u)", _status);
// Import label data from a label file.
ImportLabel();
- if (_backbone_model_info->GetInputLayerInfo().empty() || _backbone_model_info->GetInputLayerInfo().size() > 1) {
- LOGE("Invalid input layer size - input layer size should be 1.");
- return MEDIA_VISION_ERROR_INVALID_PARAMETER;
- }
-
- if (_backbone_model_info->GetOutputLayerInfo().empty() ||
- _backbone_model_info->GetOutputLayerInfo().size() > 1) {
- LOGE("Invalid output layer size - output layer size should be 1.");
- return MEDIA_VISION_ERROR_INVALID_PARAMETER;
- }
-
- std::vector<mv_source_h> backbone_sources { img_src };
- std::vector<mv_rectangle_s> backbone_rects;
-
- // Do inference to backbone model to get feature vector.
- int ret = _backbone->Run(backbone_sources, backbone_rects);
- if (ret != INFERENCE_ENGINE_ERROR_NONE) {
- LOGE("fail to inference backbone model.");
- return MEDIA_VISION_ERROR_INVALID_OPERATION;
- }
-
- // Get output layer info for facenet model.
- vector<model_layer_info> &output_layer_info = _backbone_model_info->GetOutputLayerInfo();
- // Get output tensor buffer to the output layer.
- TensorBuffer tensorBuffer = _backbone->GetOutputTensorBuffer();
- inference_engine_tensor_buffer *backbone_output_buffer =
- tensorBuffer.getTensorBuffer(output_layer_info[0].layer_name);
- if (!backbone_output_buffer) {
- LOGE("fail to get backbone output tensor buffer.");
- return MEDIA_VISION_ERROR_INVALID_PARAMETER;
- }
-
// Tensor order is NCHW.
size_t width = input_tensor_info[0].shape[0];
size_t height = input_tensor_info[0].shape[1];
_internal->ConfigureModelFiles("", _config.internal_model_file_path, "");
// Load the trained internal model.
- ret = _internal->Load();
+ int ret = _internal->Load();
if (ret != INFERENCE_ENGINE_ERROR_NONE) {
LOGE("Fail to Load.");
return MEDIA_VISION_ERROR_INVALID_OPERATION;
}
- std::vector<void *> raw_buffers { static_cast<void *>(backbone_output_buffer) };
+ std::vector<std::vector<float>> input_tensors = { input_vec };
// Do inference to the internal model.
- ret = _internal->Run(raw_buffers);
+ ret = _internal->Run(input_tensors);
if (ret != INFERENCE_ENGINE_ERROR_NONE) {
LOGE("fail to inference internal model.");
return MEDIA_VISION_ERROR_INVALID_OPERATION;
}
// output layer size should be 1.
- tensorBuffer = _internal->GetOutputTensorBuffer();
+ TensorBuffer tensorBuffer = _internal->GetOutputTensorBuffer();
inference_engine_tensor_buffer *internal_output_buffer = tensorBuffer.getTensorBuffer(output_layers[0]);
if (!internal_output_buffer) {
LOGE("fail to get internal output tensor buffer.");
#include "machine_learning_exception.h"
#include "face_recognition_adapter.h"
-
-#define FACE_RECOGNITION_META_FILE_NAME "face_recognition.json"
+#include "face_recognition_type.h"
using namespace std;
using namespace MediaVision::Common;
template<typename T, typename V> void FaceRecognitionAdapter<T, V>::configure()
{
_config = make_unique<EngineConfig>(string(MV_CONFIG_PATH) + string(FACE_RECOGNITION_META_FILE_NAME));
- string backboneModelFilePath;
- int ret = _config->getStringAttribute(string(MV_FACE_RECOGNITION_BACKBONE_MODEL_FILE_PATH), &backboneModelFilePath);
- if (ret != MEDIA_VISION_ERROR_NONE)
- throw InvalidParameter("Failed to get an attribute");
-
- LOGD("Backbone model file path : %s", backboneModelFilePath.c_str());
string defaultPath;
- ret = _config->getStringAttribute(string(MV_FACE_RECOGNITION_DEFAULT_PATH), &defaultPath);
+ int ret = _config->getStringAttribute(string(MV_FACE_RECOGNITION_DEFAULT_PATH), &defaultPath);
if (ret != MEDIA_VISION_ERROR_NONE)
throw InvalidOperation("Fail to get default path.");
MV_INFERENCE_BACKEND_NNTRAINER, // not used and default type is used. See TrainingModel()
MV_INFERENCE_TARGET_DEVICE_CPU,
MV_INFERENCE_BACKEND_NNTRAINER,
- MV_INFERENCE_TARGET_DEVICE_CPU,
- MV_INFERENCE_BACKEND_TFLITE,
- backboneModelFilePath,
string(defaultPath) + "model_and_weights.ini",
string(defaultPath) + "labels.dat",
string(defaultPath) + "feature_vector_file.dat",
{
if (_source.mode == mode::REGISTER) {
for (auto &s : _source.register_src) {
- int ret = _face_recognition->RegisterNewFace(s.first, s.second);
+ int ret = _face_recognition->RegisterNewFace(s.data, s.label);
if (ret != MEDIA_VISION_ERROR_NONE)
throw InvalidOperation("Fail to register new face.");
}
}
if (_source.mode == mode::INFERENCE) {
- int ret = _face_recognition->RecognizeFace(_source.inference_src);
+ // _source.inputs.size should be 1.
+ int ret = _face_recognition->RecognizeFace(_source.inputs[0]);
if (ret == MEDIA_VISION_ERROR_NO_DATA)
throw NoData("Label not found.");
--- /dev/null
+/**
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <string.h>
+#include <map>
+#include <memory>
+#include <algorithm>
+
+#include "machine_learning_exception.h"
+#include "facenet.h"
+#include "face_recognition_type.h"
+
+using namespace std;
+using namespace mediavision::inference;
+using namespace MediaVision::Common;
+using namespace mediavision::machine_learning::exception;
+
+namespace mediavision
+{
+namespace machine_learning
+{
+Facenet::Facenet() : _backendType(), _targetDeviceType()
+{
+ _inference = make_unique<Inference>();
+ _parser = make_unique<FacenetParser>();
+}
+
+static bool IsJsonFile(const string &fileName)
+{
+ return (!fileName.substr(fileName.find_last_of(".") + 1).compare("json"));
+}
+
+void Facenet::parseMetaFile()
+{
+ _config = make_unique<EngineConfig>(string(MV_CONFIG_PATH) + string(FACE_RECOGNITION_META_FILE_NAME));
+
+ int ret = _config->getIntegerAttribute(string(MV_FACENET_BACKEND_TYPE), &_backendType);
+ if (ret != MEDIA_VISION_ERROR_NONE)
+ throw InvalidOperation("Fail to get backend engine type.");
+
+ ret = _config->getIntegerAttribute(string(MV_FACENET_TARGET_DEVICE_TYPE), &_targetDeviceType);
+ if (ret != MEDIA_VISION_ERROR_NONE)
+ throw InvalidOperation("Fail to get target device type.");
+
+ ret = _config->getStringAttribute(MV_FACENET_MODEL_FILE_PATH, &_modelFilePath);
+ if (ret != MEDIA_VISION_ERROR_NONE)
+ throw InvalidOperation("Fail to get model file path.");
+
+ ret = _config->getStringAttribute(MV_FACENET_MODEL_META_FILE_PATH, &_modelMetaFilePath);
+ if (ret != MEDIA_VISION_ERROR_NONE)
+ throw InvalidOperation("Fail to get model meta file path.");
+
+ ret = _config->getStringAttribute(MV_FACENET_OUTPUT_TENSOR_NAME, &_facenetOutputTensorName);
+ if (ret != MEDIA_VISION_ERROR_NONE)
+ throw InvalidOperation("Fail to get facenet output tensor name.");
+
+ if (_modelMetaFilePath.empty())
+ throw InvalidOperation("Model meta file doesn't exist.");
+
+ if (!IsJsonFile(_modelMetaFilePath))
+ throw InvalidOperation("Model meta file should be json.");
+
+ _parser->load(_modelMetaFilePath);
+}
+
+void Facenet::configure()
+{
+ int ret = _inference->Bind(_backendType, _targetDeviceType);
+ if (ret != MEDIA_VISION_ERROR_NONE)
+ throw InvalidOperation("Fail to bind a backend engine.");
+}
+
+void Facenet::prepare()
+{
+ int ret = _inference->configureInputMetaInfo(_parser->getInputMetaMap());
+ if (ret != MEDIA_VISION_ERROR_NONE)
+ throw InvalidOperation("Fail to configure input tensor info from meta file.");
+
+ ret = _inference->configureOutputMetaInfo(_parser->getOutputMetaMap());
+ if (ret != MEDIA_VISION_ERROR_NONE)
+ throw InvalidOperation("Fail to configure output tensor info from meta file.");
+
+ _inference->ConfigureModelFiles("", _modelFilePath, "");
+
+ // Request to load model files to a backend engine.
+ ret = _inference->Load();
+ if (ret != MEDIA_VISION_ERROR_NONE)
+ throw InvalidOperation("Fail to load model files.");
+}
+void Facenet::preprocess(mv_source_h &mv_src)
+{
+ LOGI("ENTER");
+
+ TensorBuffer &tensor_buffer_obj = _inference->getInputTensorBuffer();
+ IETensorBuffer &ie_tensor_buffer = tensor_buffer_obj.getIETensorBuffer();
+ vector<mv_source_h> mv_srcs = { mv_src };
+
+ _preprocess.run(mv_srcs, _parser->getInputMetaMap(), ie_tensor_buffer);
+
+ LOGI("LEAVE");
+}
+
+void Facenet::inference(mv_source_h source)
+{
+ LOGI("ENTER");
+
+ vector<mv_source_h> sources;
+
+ sources.push_back(source);
+
+ int ret = _inference->Run();
+ if (ret != MEDIA_VISION_ERROR_NONE)
+ throw InvalidOperation("Fail to run inference");
+
+ LOGI("LEAVE");
+}
+
+facenet_output_s &Facenet::getResult()
+{
+ TensorBuffer &tensor_buffer_obj = _inference->GetOutputTensorBuffer();
+ IETensorBuffer &ie_tensor_buffer = tensor_buffer_obj.getIETensorBuffer();
+
+ // Make sure to clear _result.outputs vectors because if not clear then other output_vector will be pushed to _result.outputs
+ // and it results in sending wrong output vector to face recognition framework.
+ _result.outputs.clear();
+
+ for (IETensorBuffer::iterator it = ie_tensor_buffer.begin(); it != ie_tensor_buffer.end(); it++) {
+ if (it->first.compare(_facenetOutputTensorName) == 0) {
+ _outputTensorBuffer = tensor_buffer_obj.getTensorBuffer(it->first);
+ if (!_outputTensorBuffer)
+ throw InvalidOperation("Fail to get output tensor buffer.");
+
+ vector<float> output_vector;
+ float *buffer = reinterpret_cast<float *>(_outputTensorBuffer->buffer);
+
+ copy(&buffer[0], &buffer[_outputTensorBuffer->size / sizeof(float)], back_inserter(output_vector));
+ _result.outputs.push_back(output_vector);
+
+ return _result;
+ }
+ }
+
+ throw InvalidOperation("No output tensor.");
+}
+
+}
+}
\ No newline at end of file
--- /dev/null
+/**
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "machine_learning_exception.h"
+#include "facenet_adapter.h"
+
+using namespace std;
+using namespace MediaVision::Common;
+using namespace mediavision::machine_learning;
+using namespace mediavision::machine_learning::exception;
+
+namespace mediavision
+{
+namespace machine_learning
+{
+template<typename T, typename V> FacenetAdapter<T, V>::FacenetAdapter() : _source()
+{
+ _facenet = make_unique<Facenet>();
+}
+
+template<typename T, typename V> FacenetAdapter<T, V>::~FacenetAdapter()
+{}
+
+template<typename T, typename V> void FacenetAdapter<T, V>::create(int type)
+{
+ throw InvalidOperation("Not support yet.");
+}
+
+template<typename T, typename V> void FacenetAdapter<T, V>::configure()
+{
+ try {
+ _facenet->parseMetaFile();
+ _facenet->configure();
+ } catch (const BaseException &e) {
+ throw e;
+ }
+}
+
+template<typename T, typename V> void FacenetAdapter<T, V>::prepare()
+{
+ try {
+ _facenet->prepare();
+ } catch (const BaseException &e) {
+ throw e;
+ }
+}
+
+template<typename T, typename V> void FacenetAdapter<T, V>::setInput(T &t)
+{
+ _source = t;
+}
+
+template<typename T, typename V> void FacenetAdapter<T, V>::perform()
+{
+ try {
+ _facenet->preprocess(_source.inputs[0]);
+ _facenet->inference(_source.inputs[0]);
+ } catch (const BaseException &e) {
+ throw e;
+ }
+}
+
+template<typename T, typename V> V &FacenetAdapter<T, V>::getOutput()
+{
+ return _facenet->getResult();
+}
+
+template class FacenetAdapter<facenet_input_s, facenet_output_s>;
+}
+}
\ No newline at end of file
--- /dev/null
+/**
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "machine_learning_exception.h"
+#include "facenet_parser.h"
+
+using namespace std;
+using namespace mediavision::machine_learning::exception;
+
+namespace mediavision
+{
+namespace machine_learning
+{
+FacenetParser::FacenetParser()
+{
+ LOGI("ENTER");
+ LOGI("LEAVE");
+}
+
+FacenetParser::~FacenetParser()
+{}
+
+void FacenetParser::parsePostprocess(shared_ptr<MetaInfo> meta_info, JsonObject *in_obj)
+{
+ LOGI("ENTER");
+
+ LOGI("tensor name : %s", meta_info->name.c_str());
+
+ if (json_object_has_member(in_obj, "box"))
+ _postprocessParser.parseBox(meta_info, in_obj);
+
+ LOGI("LEAVE");
+}
+
+}
+}
\ No newline at end of file
#include <memory>
#include "face_recognition_adapter.h"
+#include "facenet_adapter.h"
#include "mv_face_recognition_open.h"
#include "machine_learning_exception.h"
#include "context.h"
using namespace mediavision::machine_learning::face_recognition;
using namespace mediavision::machine_learning::exception;
using FaceRecognitionTask = ITask<mv_face_recognition_input_s, mv_face_recognition_result_s>;
+using FacenetTask = ITask<facenet_input_s, facenet_output_s>;
int mv_face_recognition_create_open(mv_face_recognition_h *handle)
{
return MEDIA_VISION_ERROR_OUT_OF_MEMORY;
}
- FaceRecognitionTask *task = new (nothrow)
+ FaceRecognitionTask *face_recognition_task = new (nothrow)
FaceRecognitionAdapter<mv_face_recognition_input_s, mv_face_recognition_result_s>();
- if (!task) {
+ if (!face_recognition_task) {
+ delete context;
+ LOGE("Fail to allocate a task.");
+ return MEDIA_VISION_ERROR_OUT_OF_MEMORY;
+ }
+
+ FacenetTask *facenet_task = new (nothrow)
+ FacenetAdapter<facenet_input_s, facenet_output_s>();
+ if (!facenet_task) {
+ delete face_recognition_task;
delete context;
LOGE("Fail to allocate a task.");
return MEDIA_VISION_ERROR_OUT_OF_MEMORY;
pair<map<string, void *>::iterator, bool> result;
- result = context->__tasks.insert(pair<string, void *>("face_recognition", task));
+ result = context->__tasks.insert(pair<string, void *>("face_recognition", face_recognition_task));
if (!result.second) {
+ delete facenet_task;
+ delete face_recognition_task;
+ delete context;
+ LOGE("Fail to register a new task. Same task already exists.");
+ return MEDIA_VISION_ERROR_INVALID_OPERATION;
+ }
+
+ result = context->__tasks.insert(pair<string, void *>("facenet", facenet_task));
+ if (!result.second) {
+ delete facenet_task;
+ delete face_recognition_task;
delete context;
LOGE("Fail to register a new task. Same task already exists.");
return MEDIA_VISION_ERROR_INVALID_OPERATION;
map<string, void *>::iterator iter;
for (iter = context->__tasks.begin(); iter != context->__tasks.end(); ++iter) {
- auto task = static_cast<FaceRecognitionTask *>(iter->second);
- delete task;
+ if (iter->first.compare("face_recognition") == 0) {
+ auto face_recognition_task = static_cast<FaceRecognitionTask *>(iter->second);
+ delete face_recognition_task;
+ }
+
+ if (iter->first.compare("facenet") == 0) {
+ auto facenet_task = static_cast<FacenetTask *>(iter->second);
+ delete facenet_task;
+ }
}
delete context;
try {
Context *context = static_cast<Context *>(handle);
- auto task = static_cast<FaceRecognitionTask *>(context->__tasks["face_recognition"]);
+ auto face_recognition_task = static_cast<FaceRecognitionTask *>(context->__tasks["face_recognition"]);
+ auto facenet_task = static_cast<FacenetTask *>(context->__tasks["facenet"]);
- task->configure();
- task->prepare();
+ face_recognition_task->configure();
+ facenet_task->configure();
+ face_recognition_task->prepare();
+ facenet_task->prepare();
} catch (const BaseException &e) {
LOGE("%s", e.what());
return e.getError();
try {
Context *context = static_cast<Context *>(handle);
- auto task = static_cast<FaceRecognitionTask *>(context->__tasks["face_recognition"]);
+ auto face_recognition_task = static_cast<FaceRecognitionTask *>(context->__tasks["face_recognition"]);
+ auto facenet_task = static_cast<FacenetTask *>(context->__tasks["facenet"]);
+
+ facenet_input_s facenet_input = { { source } };
- mv_face_recognition_input_s input = { mode::REGISTER };
+ facenet_task->setInput(facenet_input);
+ facenet_task->perform();
- input.register_src.clear();
- input.register_src.insert(make_pair(source, string(label)));
- task->setInput(input);
- task->perform();
+ facenet_output_s &facenet_output = facenet_task->getOutput();
+ mv_face_recognition_input_s face_recognition_input = { .mode = mode::REGISTER };
+ face_recognition_register_input_s facenet_to_face_reg_input = { facenet_output.outputs[0], string(label) };
+
+ face_recognition_input.register_src.clear();
+ face_recognition_input.register_src.push_back(facenet_to_face_reg_input);
+ face_recognition_task->setInput(face_recognition_input);
+ face_recognition_task->perform();
} catch (const BaseException &e) {
LOGE("%s", e.what());
return e.getError();
try {
Context *context = static_cast<Context *>(handle);
- auto task = static_cast<FaceRecognitionTask *>(context->__tasks["face_recognition"]);
+ auto face_recognition_task = static_cast<FaceRecognitionTask *>(context->__tasks["face_recognition"]);
mv_face_recognition_input_s input = { mode::DELETE };
input.labels.clear();
input.labels.push_back(string(label));
- task->setInput(input);
- task->perform();
+ face_recognition_task->setInput(input);
+ face_recognition_task->perform();
} catch (const BaseException &e) {
LOGE("%s", e.what());
return e.getError();
try {
Context *context = static_cast<Context *>(handle);
- auto task = static_cast<FaceRecognitionTask *>(context->__tasks["face_recognition"]);
+ auto face_recognition_task = static_cast<FaceRecognitionTask *>(context->__tasks["face_recognition"]);
+ auto facenet_task = static_cast<FacenetTask *>(context->__tasks["facenet"]);
+
+ facenet_input_s facenet_input = { { source } };
+
+ facenet_task->setInput(facenet_input);
+ facenet_task->perform();
+ facenet_output_s &facenet_output = facenet_task->getOutput();
- mv_face_recognition_input_s input = { mode::INFERENCE };
+ mv_face_recognition_input_s face_recognition_input = { mode::INFERENCE };
- input.inference_src = source;
- task->setInput(input);
- task->perform();
+ face_recognition_input.inputs = facenet_output.outputs;
+ face_recognition_task->setInput(face_recognition_input);
+ face_recognition_task->perform();
} catch (const BaseException &e) {
LOGE("%s", e.what());
return e.getError();
try {
Context *context = static_cast<Context *>(handle);
- auto task = static_cast<FaceRecognitionTask *>(context->__tasks["face_recognition"]);
+ auto face_recognition_task = static_cast<FaceRecognitionTask *>(context->__tasks["face_recognition"]);
- *out_label = task->getOutput().label.c_str();
+ *out_label = face_recognition_task->getOutput().label.c_str();
} catch (const BaseException &e) {
LOGE("%s", e.what());
return e.getError();
*/
int Run(std::vector<mv_source_h> &mvSources, std::vector<mv_rectangle_s> &rects);
- /**
- * @brief Runs inference with a region of a given image
- * @details Use this function to run forward pass with the given image.
- * The given image is preprocessed and the region of the image is
- * thrown to neural network. Then, the output tensor is returned.
- * If roi is NULL, then full source will be analyzed.
- *
- * @since_tizen 7.0
- * @return @c true on success, otherwise a negative error value
- */
- int Run(std::vector<void *> &buffer_objs);
+ int Run(std::vector<std::vector<float>> &input_tensors);
int Run();
return ConvertOutputDataTypeToFloat();
}
-int Inference::Run(std::vector<void *> &buffer_objs)
+int Inference::Run(std::vector<std::vector<float>> &input_tensors)
{
int ret = INFERENCE_ENGINE_ERROR_NONE;
return MEDIA_VISION_ERROR_INVALID_OPERATION;
}
- if (buffer_objs.empty()) {
+ if (input_tensors.empty()) {
LOGE("cvSources should contain only one cv source.");
return MEDIA_VISION_ERROR_INVALID_PARAMETER;
}
// We are able to request Only one input data for the inference as of now.
- if (buffer_objs.size() > 1) {
+ if (input_tensors.size() > 1) {
LOGE("It allows only one source for the inference.");
return MEDIA_VISION_ERROR_INVALID_PARAMETER;
}
- if (mInputTensorBuffers.getIETensorBuffer().size() != buffer_objs.size()) {
+ if (mInputTensorBuffers.getIETensorBuffer().size() != input_tensors.size()) {
LOGE("Raw source count is not invalid.");
return MEDIA_VISION_ERROR_INVALID_PARAMETER;
}
for (auto &buffer : mInputTensorBuffers.getIETensorBuffer()) {
inference_engine_tensor_buffer &tensor_buffer = buffer.second;
- inference_engine_tensor_buffer *buffer_obj =
- static_cast<inference_engine_tensor_buffer *>(buffer_objs[buffer_idx]);
+ std::vector<float> &input_tensor = input_tensors[buffer_idx];
+ const size_t input_tensor_size = input_tensor.size() * sizeof(float);
- if (tensor_buffer.size != buffer_obj->size) {
- LOGE("Raw buffer size is invalid.");
+ if (tensor_buffer.size != input_tensor_size) {
+ LOGE("Raw buffer size is invalid.(%u vs %u)", tensor_buffer.size, input_tensor_size);
return MEDIA_VISION_ERROR_INVALID_PARAMETER;
}
- LOGI("A number of tensor bytes : %zu", buffer_obj->size);
+ LOGI("A number of tensor bytes : %zu", input_tensor_size);
- memcpy(tensor_buffer.buffer, buffer_obj->buffer, tensor_buffer.size);
+ memcpy(tensor_buffer.buffer, input_tensor.data(), input_tensor_size);
buffer_idx++;
}