mv_machine_learning: code refactoring to face recognition sandbox/inki.dae/face_recognition_refactoring
authorInki Dae <inki.dae@samsung.com>
Mon, 12 Dec 2022 09:37:29 +0000 (18:37 +0900)
committerInki Dae <inki.dae@samsung.com>
Fri, 23 Dec 2022 06:56:53 +0000 (15:56 +0900)
Change-Id: Ia38c4dc9c45b3216d88acf5f80befd2c02da2aad
Signed-off-by: Inki Dae <inki.dae@samsung.com>
18 files changed:
mv_machine_learning/face_recognition/CMakeLists.txt
mv_machine_learning/face_recognition/include/face_net_info.h [deleted file]
mv_machine_learning/face_recognition/include/face_recognition.h
mv_machine_learning/face_recognition/include/face_recognition_adapter.h
mv_machine_learning/face_recognition/include/face_recognition_type.h [new file with mode: 0644]
mv_machine_learning/face_recognition/include/facenet.h [new file with mode: 0644]
mv_machine_learning/face_recognition/include/facenet_adapter.h [new file with mode: 0644]
mv_machine_learning/face_recognition/include/facenet_parser.h [new file with mode: 0644]
mv_machine_learning/face_recognition/meta/face_recognition.json
mv_machine_learning/face_recognition/src/face_net_info.cpp [deleted file]
mv_machine_learning/face_recognition/src/face_recognition.cpp
mv_machine_learning/face_recognition/src/face_recognition_adapter.cpp
mv_machine_learning/face_recognition/src/facenet.cpp [new file with mode: 0644]
mv_machine_learning/face_recognition/src/facenet_adapter.cpp [new file with mode: 0644]
mv_machine_learning/face_recognition/src/facenet_parser.cpp [new file with mode: 0644]
mv_machine_learning/face_recognition/src/mv_face_recognition_open.cpp
mv_machine_learning/inference/include/Inference.h
mv_machine_learning/inference/src/Inference.cpp

index c51d75d034a5b908160e0b1ba3acc6f9e2a0be28..6059e4f2f1ea950da27d494fe281362eebb1c161 100644 (file)
@@ -2,7 +2,7 @@ project(${MV_FACE_RECOG_LIB_NAME})
 cmake_minimum_required(VERSION 2.6...3.13)
 
 pkg_check_modules(${PROJECT_NAME}_DEP REQUIRED inference-engine-interface-common training-engine-interface-common)
-file(GLOB MV_FACE_RECOG_SOURCE_LIST  "${PROJECT_SOURCE_DIR}/src/*.c" "${PROJECT_SOURCE_DIR}/src/*.cpp")
+file(GLOB MV_FACE_RECOG_SOURCE_LIST  "${PROJECT_SOURCE_DIR}/src/*.c" "${PROJECT_SOURCE_DIR}/src/*.cpp" "${PROJECT_SOURCE_DIR}/../meta/src/*.cpp")
 
 find_package(OpenCV REQUIRED dnn imgproc)
 if(NOT OpenCV_FOUND)
diff --git a/mv_machine_learning/face_recognition/include/face_net_info.h b/mv_machine_learning/face_recognition/include/face_net_info.h
deleted file mode 100644 (file)
index 8e54704..0000000
+++ /dev/null
@@ -1,38 +0,0 @@
-/**
- * Copyright (c) 2022 Samsung Electronics Co., Ltd All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __FACE_NET_INFO_H__
-#define __FACE_NET_INFO_H__
-
-#include "backbone_model_info.h"
-
-class FaceNetInfo : public IBackboneModelInfo
-{
-private:
-       std::vector<model_layer_info> _input_layer_info;
-       std::vector<model_layer_info> _output_layer_info;
-       std::string _model_file_path;
-
-public:
-       FaceNetInfo(std::string model_file_path);
-       ~FaceNetInfo();
-
-       std::vector<model_layer_info> &GetInputLayerInfo() override;
-       std::vector<model_layer_info> &GetOutputLayerInfo() override;
-       std::string GetModelFilePath() override;
-};
-
-#endif
\ No newline at end of file
index 0a4620ac31ddc8108bfc8acb13c60759aa37c88b..b0e70be403030352c181da7a9053fd089f038693 100644 (file)
@@ -50,37 +50,39 @@ enum { REGISTER = 0, INFERENCE, DELETE };
 
 } // face_recognition
 
-typedef struct {
+struct face_recognition_register_input_s {
+       std::vector<float> data;
+       std::string label;
+};
+
+struct mv_face_recognition_input_s {
        unsigned int mode;
-       std::unordered_map<mv_source_h, std::string> register_src;
-       mv_source_h inference_src;
+       std::vector<face_recognition_register_input_s> register_src;
+       std::vector<std::vector<float>> inputs;
        std::vector<std::string> labels;
-} mv_face_recognition_input_s;
+};
 
 /**
  * @brief The face recognition result structure.
  * @details Contains face recognition result such as label, label index, raw data,
  *          and raw data count.
  */
-typedef struct {
+struct mv_face_recognition_result_s {
        unsigned int label_idx; /**< label index of label file. */
        std::vector<float> raw_data; /**< raw data to each label. */
        std::string label; /**< label string. */
-} mv_face_recognition_result_s;
+};
 
-typedef struct {
+struct FaceRecognitionConfig {
        mv_inference_target_device_e training_target_device_type;
        mv_inference_backend_type_e training_engine_backend_type;
        mv_inference_target_device_e inference_target_device_type;
        mv_inference_backend_type_e inference_engine_backend_type;
-       mv_inference_target_device_e backbone_target_device_type;
-       mv_inference_backend_type_e backbone_engine_backend_type;
-       std::string backbone_model_file_path;
        std::string internal_model_file_path;
        std::string label_file_path;
        std::string feature_vector_file_path;
        double decision_threshold;
-} FaceRecognitionConfig;
+};
 
 class FaceRecognition
 {
@@ -113,8 +115,8 @@ public:
 
        int Initialize();
        void SetConfig(FaceRecognitionConfig &config);
-       int RegisterNewFace(mv_source_h img_src, std::string label_name);
-       int RecognizeFace(mv_source_h img_src);
+       int RegisterNewFace(std::vector<float> &input_vec, std::string label_name);
+       int RecognizeFace(std::vector<float> &input_vec);
        int DeleteLabel(std::string label_name);
        int GetLabel(const char **out_label);
        mv_face_recognition_result_s &GetResult();
index d77d938f0fb527555ef473b28d9d10cf5e465fe5..c63c38481662177a82862c1e35673d4ad670650b 100644 (file)
 #include "itask.h"
 #include "face_recognition.h"
 
-/**
- * @brief Defines #MV_FACE_RECOGNITION_BACKBONE_MODEL_FILE_PATH
- *        to set the backbone model file path.
- * @details This model file is used to extract the feature vectors from a given face image data.
- *
- * @since_tizen 7.0
- * @see mv_engine_config_set_string_attribute()
- * @see mv_engine_config_get_string_attribute()
- */
-#define MV_FACE_RECOGNITION_BACKBONE_MODEL_FILE_PATH "MV_FACE_RECOGNITION_BACKBONE_MODEL_FILE_PATH"
-
-/**
- * @brief Defines #MV_FACE_RECOGNITION_DEFAULT_PATH
- *        to set the path where the training relevant files are created.
- * @details This path is used as a default location where the trained model, label and feature vector files are created.
- *
- * @since_tizen 7.0
- * @see mv_engine_config_set_string_attribute()
- * @see mv_engine_config_get_string_attribute()
- */
-#define MV_FACE_RECOGNITION_DEFAULT_PATH "MV_FACE_RECOGNITION_DEFAULT_PATH"
-
-/**
- * @brief Defines #MV_FACE_RECOGNITION_DECISION_THRESHOLD
- *        to set the decision threshold file+.
- * @details This file is used to determine face recognition result with a given face image data is true or false..
- *
- * @since_tizen 7.0
- * @see mv_engine_config_set_string_attribute()
- * @see mv_engine_config_get_string_attribute()
- */
-#define MV_FACE_RECOGNITION_DECISION_THRESHOLD "MV_FACE_RECOGNITION_DECISION_THRESHOLD"
-
 namespace mediavision
 {
 namespace machine_learning
@@ -64,7 +31,7 @@ template<typename T, typename V> class FaceRecognitionAdapter : public mediavisi
 {
 private:
        std::unique_ptr<FaceRecognition> _face_recognition;
-       mv_face_recognition_input_s _source;
+       T _source {};
        std::unique_ptr<MediaVision::Common::EngineConfig> _config;
 
 public:
diff --git a/mv_machine_learning/face_recognition/include/face_recognition_type.h b/mv_machine_learning/face_recognition/include/face_recognition_type.h
new file mode 100644 (file)
index 0000000..b3167d9
--- /dev/null
@@ -0,0 +1,82 @@
+/**
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __FACE_RECOGNITION_TYPE_H__
+#define __FACE_RECOGNITION_TYPE_H__
+
+#include <vector>
+
+/**
+ * @brief Defines #MV_FACENET_MODEL_FILE_PATH
+ *        to set the backbone model file path.
+ * @details This model file is used to extract the feature vectors from a given face image data.
+ *
+ * @since_tizen 7.0
+ * @see mv_engine_config_set_string_attribute()
+ * @see mv_engine_config_get_string_attribute()
+ */
+#define MV_FACENET_MODEL_FILE_PATH "FACENET_MODEL_FILE_PATH"
+
+/**
+ * @brief Defines #MV_FACENET_MODEL_META_FILE_PATH
+ *        to set the backbone model meta file path.
+ * @details This model meta file is used to provide input and output tensor info of a given model file.
+ *
+ * @since_tizen 7.0
+ * @see mv_engine_config_set_string_attribute()
+ * @see mv_engine_config_get_string_attribute()
+ */
+#define MV_FACENET_MODEL_META_FILE_PATH "FACENET_MODEL_META_FILE_PATH"
+
+#define MV_FACENET_OUTPUT_TENSOR_NAME "FACENET_OUTPUT_TENSOR_NAME"
+
+/**
+ * @brief Defines #MV_FACE_RECOGNITION_DEFAULT_PATH
+ *        to set the path where the training relevant files are created.
+ * @details This path is used as a default location where the trained model, label and feature vector files are created.
+ *
+ * @since_tizen 7.0
+ * @see mv_engine_config_set_string_attribute()
+ * @see mv_engine_config_get_string_attribute()
+ */
+#define MV_FACE_RECOGNITION_DEFAULT_PATH "FACE_RECOGNITION_DEFAULT_PATH"
+
+#define MV_FACENET_BACKEND_TYPE "FACENET_MODEL_BACKEND_TYPE"
+
+#define MV_FACENET_TARGET_DEVICE_TYPE "FACENET_MODEL_TARGET_DEVICE_TYPE"
+
+#define FACE_RECOGNITION_META_FILE_NAME "face_recognition.json"
+
+/**
+ * @brief Defines #MV_FACE_RECOGNITION_DECISION_THRESHOLD
+ *        to set the decision threshold file+.
+ * @details This file is used to determine face recognition result with a given face image data is true or false..
+ *
+ * @since_tizen 7.0
+ * @see mv_engine_config_set_string_attribute()
+ * @see mv_engine_config_get_string_attribute()
+ */
+#define MV_FACE_RECOGNITION_DECISION_THRESHOLD "FACE_RECOGNITION_DECISION_THRESHOLD"
+
+struct facenet_input_s {
+       std::vector<mv_source_h> inputs;
+};
+
+struct facenet_output_s {
+       std::vector<std::vector<float>> outputs;
+};
+
+#endif
\ No newline at end of file
diff --git a/mv_machine_learning/face_recognition/include/facenet.h b/mv_machine_learning/face_recognition/include/facenet.h
new file mode 100644 (file)
index 0000000..ec5cda4
--- /dev/null
@@ -0,0 +1,65 @@
+/**
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __FACENET_H__
+#define __FACENET_H__
+
+#include <mv_common.h>
+#include <mv_inference_type.h>
+#include "mv_private.h"
+
+#include "EngineConfig.h"
+#include "inference_engine_common_impl.h"
+#include "Inference.h"
+#include "facenet_parser.h"
+#include "face_recognition_type.h"
+#include "Preprocess.h"
+
+namespace mediavision
+{
+namespace machine_learning
+{
+
+class Facenet
+{
+protected:
+       std::unique_ptr<mediavision::inference::Inference> _inference;
+       std::unique_ptr<MediaVision::Common::EngineConfig> _config;
+       std::unique_ptr<MetaParser> _parser;
+       facenet_output_s _result;
+       inference_engine_tensor_buffer *_outputTensorBuffer;
+       Preprocess _preprocess;
+       std::string _modelFilePath;
+       std::string _modelMetaFilePath;
+       std::string _facenetOutputTensorName;
+       int _backendType;
+       int _targetDeviceType;
+
+public:
+       Facenet();
+       virtual ~Facenet() = default;
+       void parseMetaFile();
+       void configure();
+       void prepare();
+       void preprocess(mv_source_h &mv_src);
+       void inference(mv_source_h source);
+       facenet_output_s &getResult();
+};
+
+} // machine_learning
+} // mediavision
+
+#endif
\ No newline at end of file
diff --git a/mv_machine_learning/face_recognition/include/facenet_adapter.h b/mv_machine_learning/face_recognition/include/facenet_adapter.h
new file mode 100644 (file)
index 0000000..2d5bd90
--- /dev/null
@@ -0,0 +1,53 @@
+/**
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __FACENET_ADAPTER_H__
+#define __FACENET_ADAPTER_H__
+
+#include <dlog.h>
+
+#include "EngineConfig.h"
+#include "itask.h"
+#include "facenet.h"
+
+namespace mediavision
+{
+namespace machine_learning
+{
+
+template<typename T, typename V> class FacenetAdapter : public mediavision::common::ITask<T, V>
+{
+private:
+       std::unique_ptr<Facenet> _facenet;
+       T _source;
+
+public:
+       FacenetAdapter();
+       ~FacenetAdapter();
+
+       void create(int type) override;
+
+       void configure() override;
+       void prepare() override;
+       void setInput(T &t) override;
+       void perform() override;
+       V &getOutput() override;
+};
+
+} // machine_learning
+} // mediavision
+
+#endif
\ No newline at end of file
diff --git a/mv_machine_learning/face_recognition/include/facenet_parser.h b/mv_machine_learning/face_recognition/include/facenet_parser.h
new file mode 100644 (file)
index 0000000..1f4e0ea
--- /dev/null
@@ -0,0 +1,43 @@
+/**
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __FACENET_PARSER_H__
+#define __FACENET_PARSER_H__
+
+#include "MetaParser.h"
+#include "PostprocessParser.h"
+
+namespace mediavision
+{
+namespace machine_learning
+{
+class FacenetParser : public MetaParser
+{
+private:
+       PostprocessParser _postprocessParser;
+
+protected:
+       void parsePostprocess(std::shared_ptr<MetaInfo> meta_info, JsonObject *in_obj) override;
+
+public:
+       FacenetParser();
+       ~FacenetParser();
+};
+
+}
+}
+
+#endif
\ No newline at end of file
index 3d0bfd2e1a437c68c1e04b9a8ce388d1b45db359..1076f4c398b157e25f78a2c65c75735112e6896f 100644 (file)
@@ -2,17 +2,37 @@
     "attributes":
     [
         {
-            "name"  : "MV_FACE_RECOGNITION_BACKBONE_MODEL_FILE_PATH",
+            "name"  : "FACENET_MODEL_FILE_PATH",
             "type"  : "string",
             "value" : "/home/owner/media/res/face_recognition/backbone/facenet.tflite"
         },
         {
-            "name"  : "MV_FACE_RECOGNITION_DEFAULT_PATH",
+            "name"  : "FACENET_MODEL_META_FILE_PATH",
+            "type"  : "string",
+            "value" : "/home/owner/media/res/face_recognition/backbone/facenet.json"
+        },
+        {
+            "name"  : "FACE_RECOGNITION_DEFAULT_PATH",
             "type"  : "string",
             "value" : "/home/owner/media/res/face_recognition/training/"
         },
         {
-            "name"  : "MV_FACE_RECOGNITION_DECISION_THRESHOLD",
+            "name"  : "FACENET_MODEL_BACKEND_TYPE",
+            "type"  : "integer",
+            "value" : 1
+        },
+        {
+            "name"  : "FACENET_MODEL_TARGET_DEVICE_TYPE",
+            "type"  : "integer",
+            "value" : 1
+        },
+        {
+            "name"  : "FACENET_OUTPUT_TENSOR_NAME",
+            "type"  : "string",
+            "value" : "normalize/l2_normalize"
+        },
+        {
+            "name"  : "FACE_RECOGNITION_DECISION_THRESHOLD",
             "type"  : "double",
             "value" : -0.85
         }
diff --git a/mv_machine_learning/face_recognition/src/face_net_info.cpp b/mv_machine_learning/face_recognition/src/face_net_info.cpp
deleted file mode 100644 (file)
index 888e51b..0000000
+++ /dev/null
@@ -1,61 +0,0 @@
-/**
- * Copyright (c) 2022 Samsung Electronics Co., Ltd All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "face_net_info.h"
-
-using namespace std;
-
-FaceNetInfo::FaceNetInfo(string model_file_path)
-{
-       _model_file_path = model_file_path;
-
-       const string input_layer_name = { "input_1" };
-       const inference_engine_tensor_info input_tensor_info = {
-               { 160, 160, 3, 1 }, INFERENCE_TENSOR_SHAPE_NCHW, INFERENCE_TENSOR_DATA_TYPE_FLOAT32, (size_t)(1 * 3 * 160 * 160)
-       };
-
-       model_layer_info input_info = { input_layer_name, input_tensor_info };
-       _input_layer_info.push_back(input_info);
-
-       const string output_layer_name = { "normalize/l2_normalize" };
-       const inference_engine_tensor_info output_tensor_info = {
-               { 512, 1, 1, 1 }, INFERENCE_TENSOR_SHAPE_NCHW, INFERENCE_TENSOR_DATA_TYPE_FLOAT32, (size_t)(1 * 512)
-       };
-
-       model_layer_info output_info = { output_layer_name, output_tensor_info };
-       _output_layer_info.push_back(output_info);
-}
-
-FaceNetInfo::~FaceNetInfo()
-{
-       _input_layer_info.clear();
-       _output_layer_info.clear();
-}
-
-string FaceNetInfo::GetModelFilePath()
-{
-       return _model_file_path;
-}
-
-vector<model_layer_info> &FaceNetInfo::GetInputLayerInfo()
-{
-       return _input_layer_info;
-}
-
-vector<model_layer_info> &FaceNetInfo::GetOutputLayerInfo()
-{
-       return _output_layer_info;
-}
\ No newline at end of file
index 212a5e082d472cd66f09ff6e87ab5b48d4a3c26a..5b3afdbd80beef517108fdd8023e7f6a6f6968ac 100644 (file)
@@ -32,7 +32,6 @@
 #include "face_recognition.h"
 #include "nntrainer_fvm.h"
 #include "nntrainer_dsm.h"
-#include "face_net_info.h"
 #include "file_util.h"
 
 using namespace std;
@@ -188,54 +187,12 @@ int FaceRecognition::GetVecFromMvSource(mv_source_h img_src, std::vector<float>
 
 int FaceRecognition::Initialize()
 {
-       _backbone_model_info = make_unique<FaceNetInfo>(_config.backbone_model_file_path);
-
-       if (_backbone_model_info->GetInputLayerInfo().empty() || _backbone_model_info->GetInputLayerInfo().size() > 1) {
-               LOGE("Invalid input layer size - input layer size should be 1.");
-               return MEDIA_VISION_ERROR_INVALID_PARAMETER;
-       }
-
-       if (_backbone_model_info->GetOutputLayerInfo().empty() || _backbone_model_info->GetOutputLayerInfo().size() > 1) {
-               LOGE("Invalid output layer size - output layer size should be 1.");
-               return MEDIA_VISION_ERROR_INVALID_PARAMETER;
-       }
-
-       vector<string> input_layer_names, output_layer_names;
-
-       for (auto &input : _backbone_model_info->GetInputLayerInfo())
-               input_layer_names.push_back(input.layer_name);
-
-       for (auto &output : _backbone_model_info->GetOutputLayerInfo())
-               output_layer_names.push_back(output.layer_name);
-
-       // Initialize inference engine object for backbone model.
-       _backbone = make_unique<Inference>();
-
-       int ret = _backbone->Bind(_config.backbone_engine_backend_type, _config.backbone_target_device_type);
-       if (ret != MEDIA_VISION_ERROR_NONE)
-               return ret;
-
-       // Tensor order is NCHW.
-       vector<model_layer_info> &input_layer_info = GetBackboneInputLayerInfo();
-       size_t width = input_layer_info[0].tensor_info.shape[0];
-       size_t height = input_layer_info[0].tensor_info.shape[1];
-       size_t ch = input_layer_info[0].tensor_info.shape[2];
-       vector<inference_engine_tensor_info> output_tensor_info;
-
-       _backbone->ConfigureInputInfo(width, height, 1, ch, 127.5f, 127.5f, MV_INFERENCE_DATA_FLOAT32, input_layer_names);
-       _backbone->ConfigureOutputInfo(output_layer_names, output_tensor_info);
-       _backbone->ConfigureModelFiles("", _backbone_model_info->GetModelFilePath(), "");
-
-       ret = _backbone->Load();
-       if (ret != MEDIA_VISION_ERROR_NONE)
-               return ret;
-
        _training_model = make_unique<SimpleShot>(_config.training_engine_backend_type, _config.training_target_device_type,
                                                                                          _config.internal_model_file_path);
 
        _internal = make_unique<Inference>();
 
-       ret = _internal->Bind(_config.inference_engine_backend_type, _config.inference_target_device_type);
+       int ret = _internal->Bind(_config.inference_engine_backend_type, _config.inference_target_device_type);
        if (ret != MEDIA_VISION_ERROR_NONE)
                return ret;
 
@@ -261,10 +218,8 @@ void FaceRecognition::ImportLabel()
        }
 }
 
-int FaceRecognition::RegisterNewFace(mv_source_h img_src, string label_name)
+int FaceRecognition::RegisterNewFace(std::vector<float> &input_vec, string label_name)
 {
-       vector<model_layer_info> &output_layer_info = _backbone_model_info->GetOutputLayerInfo();
-
        if (_status < INITIALIZED) {
                LOGE("Initialization not ready yet. (%u)", _status);
                return MEDIA_VISION_ERROR_INVALID_OPERATION;
@@ -283,30 +238,6 @@ int FaceRecognition::RegisterNewFace(mv_source_h img_src, string label_name)
                                return MEDIA_VISION_ERROR_INVALID_OPERATION;
                }
 
-               std::vector<mv_source_h> backbone_sources { img_src };
-               std::vector<mv_rectangle_s> rects;
-
-               int ret = _backbone->Run(backbone_sources, rects);
-               if (ret != INFERENCE_ENGINE_ERROR_NONE) {
-                       LOGE("fail to inference backbone model.");
-                       return MEDIA_VISION_ERROR_INVALID_OPERATION;
-               }
-
-               // 2. Get feature vector from a given vec through inference engine.
-               // Ps. output layer size should be 1.
-               TensorBuffer tensorBuffer = _backbone->GetOutputTensorBuffer();
-               inference_engine_tensor_buffer *backbone_output_buffer =
-                               tensorBuffer.getTensorBuffer(output_layer_info[0].layer_name);
-               if (!backbone_output_buffer) {
-                       LOGE("fail to get output tensor buffer.");
-                       return MEDIA_VISION_ERROR_INVALID_PARAMETER;
-               }
-
-               vector<float> feature_vec;
-               auto buffer = static_cast<float *>(backbone_output_buffer->buffer);
-
-               copy(buffer, buffer + backbone_output_buffer->size / sizeof(float), back_inserter(feature_vec));
-
                // Get label index and count.
                unsigned int label_idx = _label_manager->GetLabelIndex(label_name);
                unsigned int label_cnt = _label_manager->GetMaxLabel();
@@ -324,7 +255,7 @@ int FaceRecognition::RegisterNewFace(mv_source_h img_src, string label_name)
                }
 
                // Add new feature vectors.
-               data_set->AddDataSet(feature_vec, label_idx, label_cnt);
+               data_set->AddDataSet(input_vec, label_idx, label_cnt);
 
                _training_model->ApplyDataSet(data_set);
                _training_model->Compile();
@@ -388,7 +319,7 @@ int FaceRecognition::GetAnswer()
        return MEDIA_VISION_ERROR_NONE;
 }
 
-int FaceRecognition::RecognizeFace(mv_source_h img_src)
+int FaceRecognition::RecognizeFace(std::vector<float> &input_vec)
 {
        if (_status < INITIALIZED) {
                LOGE("Initialization not ready yet.(%u)", _status);
@@ -417,38 +348,6 @@ int FaceRecognition::RecognizeFace(mv_source_h img_src)
                // Import label data from a label file.
                ImportLabel();
 
-               if (_backbone_model_info->GetInputLayerInfo().empty() || _backbone_model_info->GetInputLayerInfo().size() > 1) {
-                       LOGE("Invalid input layer size - input layer size should be 1.");
-                       return MEDIA_VISION_ERROR_INVALID_PARAMETER;
-               }
-
-               if (_backbone_model_info->GetOutputLayerInfo().empty() ||
-                       _backbone_model_info->GetOutputLayerInfo().size() > 1) {
-                       LOGE("Invalid output layer size - output layer size should be 1.");
-                       return MEDIA_VISION_ERROR_INVALID_PARAMETER;
-               }
-
-               std::vector<mv_source_h> backbone_sources { img_src };
-               std::vector<mv_rectangle_s> backbone_rects;
-
-               // Do inference to backbone model to get feature vector.
-               int ret = _backbone->Run(backbone_sources, backbone_rects);
-               if (ret != INFERENCE_ENGINE_ERROR_NONE) {
-                       LOGE("fail to inference backbone model.");
-                       return MEDIA_VISION_ERROR_INVALID_OPERATION;
-               }
-
-               // Get output layer info for facenet model.
-               vector<model_layer_info> &output_layer_info = _backbone_model_info->GetOutputLayerInfo();
-               // Get output tensor buffer to the output layer.
-               TensorBuffer tensorBuffer = _backbone->GetOutputTensorBuffer();
-               inference_engine_tensor_buffer *backbone_output_buffer =
-                               tensorBuffer.getTensorBuffer(output_layer_info[0].layer_name);
-               if (!backbone_output_buffer) {
-                       LOGE("fail to get backbone output tensor buffer.");
-                       return MEDIA_VISION_ERROR_INVALID_PARAMETER;
-               }
-
                // Tensor order is NCHW.
                size_t width = input_tensor_info[0].shape[0];
                size_t height = input_tensor_info[0].shape[1];
@@ -462,23 +361,23 @@ int FaceRecognition::RecognizeFace(mv_source_h img_src)
                _internal->ConfigureModelFiles("", _config.internal_model_file_path, "");
 
                // Load the trained internal model.
-               ret = _internal->Load();
+               int ret = _internal->Load();
                if (ret != INFERENCE_ENGINE_ERROR_NONE) {
                        LOGE("Fail to Load.");
                        return MEDIA_VISION_ERROR_INVALID_OPERATION;
                }
 
-               std::vector<void *> raw_buffers { static_cast<void *>(backbone_output_buffer) };
+               std::vector<std::vector<float>> input_tensors = { input_vec };
 
                // Do inference to the internal model.
-               ret = _internal->Run(raw_buffers);
+               ret = _internal->Run(input_tensors);
                if (ret != INFERENCE_ENGINE_ERROR_NONE) {
                        LOGE("fail to inference internal model.");
                        return MEDIA_VISION_ERROR_INVALID_OPERATION;
                }
 
                // output layer size should be 1.
-               tensorBuffer = _internal->GetOutputTensorBuffer();
+               TensorBuffer tensorBuffer = _internal->GetOutputTensorBuffer();
                inference_engine_tensor_buffer *internal_output_buffer = tensorBuffer.getTensorBuffer(output_layers[0]);
                if (!internal_output_buffer) {
                        LOGE("fail to get internal output tensor buffer.");
index 79619011149044df8709a34cc370af27a9174d95..30a847a48d494238084b85d612ee30f7661b97f7 100644 (file)
@@ -16,8 +16,7 @@
 
 #include "machine_learning_exception.h"
 #include "face_recognition_adapter.h"
-
-#define FACE_RECOGNITION_META_FILE_NAME "face_recognition.json"
+#include "face_recognition_type.h"
 
 using namespace std;
 using namespace MediaVision::Common;
@@ -44,16 +43,10 @@ template<typename T, typename V> void FaceRecognitionAdapter<T, V>::create(int t
 template<typename T, typename V> void FaceRecognitionAdapter<T, V>::configure()
 {
        _config = make_unique<EngineConfig>(string(MV_CONFIG_PATH) + string(FACE_RECOGNITION_META_FILE_NAME));
-       string backboneModelFilePath;
-       int ret = _config->getStringAttribute(string(MV_FACE_RECOGNITION_BACKBONE_MODEL_FILE_PATH), &backboneModelFilePath);
-       if (ret != MEDIA_VISION_ERROR_NONE)
-               throw InvalidParameter("Failed to get an attribute");
-
-       LOGD("Backbone model file path : %s", backboneModelFilePath.c_str());
 
        string defaultPath;
 
-       ret = _config->getStringAttribute(string(MV_FACE_RECOGNITION_DEFAULT_PATH), &defaultPath);
+       int ret = _config->getStringAttribute(string(MV_FACE_RECOGNITION_DEFAULT_PATH), &defaultPath);
        if (ret != MEDIA_VISION_ERROR_NONE)
                throw InvalidOperation("Fail to get default path.");
 
@@ -69,9 +62,6 @@ template<typename T, typename V> void FaceRecognitionAdapter<T, V>::configure()
                MV_INFERENCE_BACKEND_NNTRAINER, // not used and default type is used. See TrainingModel()
                MV_INFERENCE_TARGET_DEVICE_CPU,
                MV_INFERENCE_BACKEND_NNTRAINER,
-               MV_INFERENCE_TARGET_DEVICE_CPU,
-               MV_INFERENCE_BACKEND_TFLITE,
-               backboneModelFilePath,
                string(defaultPath) + "model_and_weights.ini",
                string(defaultPath) + "labels.dat",
                string(defaultPath) + "feature_vector_file.dat",
@@ -97,7 +87,7 @@ template<typename T, typename V> void FaceRecognitionAdapter<T, V>::perform()
 {
        if (_source.mode == mode::REGISTER) {
                for (auto &s : _source.register_src) {
-                       int ret = _face_recognition->RegisterNewFace(s.first, s.second);
+                       int ret = _face_recognition->RegisterNewFace(s.data, s.label);
                        if (ret != MEDIA_VISION_ERROR_NONE)
                                throw InvalidOperation("Fail to register new face.");
                }
@@ -106,7 +96,8 @@ template<typename T, typename V> void FaceRecognitionAdapter<T, V>::perform()
        }
 
        if (_source.mode == mode::INFERENCE) {
-               int ret = _face_recognition->RecognizeFace(_source.inference_src);
+               // _source.inputs.size should be 1.
+               int ret = _face_recognition->RecognizeFace(_source.inputs[0]);
                if (ret == MEDIA_VISION_ERROR_NO_DATA)
                        throw NoData("Label not found.");
 
diff --git a/mv_machine_learning/face_recognition/src/facenet.cpp b/mv_machine_learning/face_recognition/src/facenet.cpp
new file mode 100644 (file)
index 0000000..93c6405
--- /dev/null
@@ -0,0 +1,160 @@
+/**
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <string.h>
+#include <map>
+#include <memory>
+#include <algorithm>
+
+#include "machine_learning_exception.h"
+#include "facenet.h"
+#include "face_recognition_type.h"
+
+using namespace std;
+using namespace mediavision::inference;
+using namespace MediaVision::Common;
+using namespace mediavision::machine_learning::exception;
+
+namespace mediavision
+{
+namespace machine_learning
+{
+Facenet::Facenet() : _backendType(), _targetDeviceType()
+{
+       _inference = make_unique<Inference>();
+       _parser = make_unique<FacenetParser>();
+}
+
+static bool IsJsonFile(const string &fileName)
+{
+       return (!fileName.substr(fileName.find_last_of(".") + 1).compare("json"));
+}
+
+void Facenet::parseMetaFile()
+{
+       _config = make_unique<EngineConfig>(string(MV_CONFIG_PATH) + string(FACE_RECOGNITION_META_FILE_NAME));
+
+       int ret = _config->getIntegerAttribute(string(MV_FACENET_BACKEND_TYPE), &_backendType);
+       if (ret != MEDIA_VISION_ERROR_NONE)
+               throw InvalidOperation("Fail to get backend engine type.");
+
+       ret = _config->getIntegerAttribute(string(MV_FACENET_TARGET_DEVICE_TYPE), &_targetDeviceType);
+       if (ret != MEDIA_VISION_ERROR_NONE)
+               throw InvalidOperation("Fail to get target device type.");
+
+       ret = _config->getStringAttribute(MV_FACENET_MODEL_FILE_PATH, &_modelFilePath);
+       if (ret != MEDIA_VISION_ERROR_NONE)
+               throw InvalidOperation("Fail to get model file path.");
+
+       ret = _config->getStringAttribute(MV_FACENET_MODEL_META_FILE_PATH, &_modelMetaFilePath);
+       if (ret != MEDIA_VISION_ERROR_NONE)
+               throw InvalidOperation("Fail to get model meta file path.");
+
+       ret = _config->getStringAttribute(MV_FACENET_OUTPUT_TENSOR_NAME, &_facenetOutputTensorName);
+       if (ret != MEDIA_VISION_ERROR_NONE)
+               throw InvalidOperation("Fail to get facenet output tensor name.");
+
+       if (_modelMetaFilePath.empty())
+               throw InvalidOperation("Model meta file doesn't exist.");
+
+       if (!IsJsonFile(_modelMetaFilePath))
+               throw InvalidOperation("Model meta file should be json.");
+
+       _parser->load(_modelMetaFilePath);
+}
+
+void Facenet::configure()
+{
+       int ret = _inference->Bind(_backendType, _targetDeviceType);
+       if (ret != MEDIA_VISION_ERROR_NONE)
+               throw InvalidOperation("Fail to bind a backend engine.");
+}
+
+void Facenet::prepare()
+{
+       int ret = _inference->configureInputMetaInfo(_parser->getInputMetaMap());
+       if (ret != MEDIA_VISION_ERROR_NONE)
+               throw InvalidOperation("Fail to configure input tensor info from meta file.");
+
+       ret = _inference->configureOutputMetaInfo(_parser->getOutputMetaMap());
+       if (ret != MEDIA_VISION_ERROR_NONE)
+               throw InvalidOperation("Fail to configure output tensor info from meta file.");
+
+       _inference->ConfigureModelFiles("", _modelFilePath, "");
+
+       // Request to load model files to a backend engine.
+       ret = _inference->Load();
+       if (ret != MEDIA_VISION_ERROR_NONE)
+               throw InvalidOperation("Fail to load model files.");
+}
+void Facenet::preprocess(mv_source_h &mv_src)
+{
+       LOGI("ENTER");
+
+       TensorBuffer &tensor_buffer_obj = _inference->getInputTensorBuffer();
+       IETensorBuffer &ie_tensor_buffer = tensor_buffer_obj.getIETensorBuffer();
+       vector<mv_source_h> mv_srcs = { mv_src };
+
+       _preprocess.run(mv_srcs, _parser->getInputMetaMap(), ie_tensor_buffer);
+
+       LOGI("LEAVE");
+}
+
+void Facenet::inference(mv_source_h source)
+{
+       LOGI("ENTER");
+
+       vector<mv_source_h> sources;
+
+       sources.push_back(source);
+
+       int ret = _inference->Run();
+       if (ret != MEDIA_VISION_ERROR_NONE)
+               throw InvalidOperation("Fail to run inference");
+
+       LOGI("LEAVE");
+}
+
+facenet_output_s &Facenet::getResult()
+{
+       TensorBuffer &tensor_buffer_obj = _inference->GetOutputTensorBuffer();
+       IETensorBuffer &ie_tensor_buffer = tensor_buffer_obj.getIETensorBuffer();
+
+       // Make sure to clear _result.outputs vectors because if not clear then other output_vector will be pushed to _result.outputs
+       // and it results in sending wrong output vector to face recognition framework.
+       _result.outputs.clear();
+
+       for (IETensorBuffer::iterator it = ie_tensor_buffer.begin(); it != ie_tensor_buffer.end(); it++) {
+               if (it->first.compare(_facenetOutputTensorName) == 0) {
+                       _outputTensorBuffer = tensor_buffer_obj.getTensorBuffer(it->first);
+                       if (!_outputTensorBuffer)
+                               throw InvalidOperation("Fail to get output tensor buffer.");
+
+                       vector<float> output_vector;
+                       float *buffer = reinterpret_cast<float *>(_outputTensorBuffer->buffer);
+
+                       copy(&buffer[0], &buffer[_outputTensorBuffer->size / sizeof(float)], back_inserter(output_vector));
+                       _result.outputs.push_back(output_vector);
+
+                       return _result;
+               }
+       }
+
+       throw InvalidOperation("No output tensor.");
+}
+
+}
+}
\ No newline at end of file
diff --git a/mv_machine_learning/face_recognition/src/facenet_adapter.cpp b/mv_machine_learning/face_recognition/src/facenet_adapter.cpp
new file mode 100644 (file)
index 0000000..a85c7bc
--- /dev/null
@@ -0,0 +1,83 @@
+/**
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "machine_learning_exception.h"
+#include "facenet_adapter.h"
+
+using namespace std;
+using namespace MediaVision::Common;
+using namespace mediavision::machine_learning;
+using namespace mediavision::machine_learning::exception;
+
+namespace mediavision
+{
+namespace machine_learning
+{
+template<typename T, typename V> FacenetAdapter<T, V>::FacenetAdapter() : _source()
+{
+       _facenet = make_unique<Facenet>();
+}
+
+template<typename T, typename V> FacenetAdapter<T, V>::~FacenetAdapter()
+{}
+
+template<typename T, typename V> void FacenetAdapter<T, V>::create(int type)
+{
+       throw InvalidOperation("Not support yet.");
+}
+
+template<typename T, typename V> void FacenetAdapter<T, V>::configure()
+{
+       try {
+               _facenet->parseMetaFile();
+               _facenet->configure();
+       } catch (const BaseException &e) {
+               throw e;
+       }
+}
+
+template<typename T, typename V> void FacenetAdapter<T, V>::prepare()
+{
+       try {
+               _facenet->prepare();
+       } catch (const BaseException &e) {
+               throw e;
+       }
+}
+
+template<typename T, typename V> void FacenetAdapter<T, V>::setInput(T &t)
+{
+       _source = t;
+}
+
+template<typename T, typename V> void FacenetAdapter<T, V>::perform()
+{
+       try {
+               _facenet->preprocess(_source.inputs[0]);
+               _facenet->inference(_source.inputs[0]);
+       } catch (const BaseException &e) {
+               throw e;
+       }
+}
+
+template<typename T, typename V> V &FacenetAdapter<T, V>::getOutput()
+{
+       return _facenet->getResult();
+}
+
+template class FacenetAdapter<facenet_input_s, facenet_output_s>;
+}
+}
\ No newline at end of file
diff --git a/mv_machine_learning/face_recognition/src/facenet_parser.cpp b/mv_machine_learning/face_recognition/src/facenet_parser.cpp
new file mode 100644 (file)
index 0000000..b04d561
--- /dev/null
@@ -0,0 +1,49 @@
+/**
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "machine_learning_exception.h"
+#include "facenet_parser.h"
+
+using namespace std;
+using namespace mediavision::machine_learning::exception;
+
+namespace mediavision
+{
+namespace machine_learning
+{
+FacenetParser::FacenetParser()
+{
+       LOGI("ENTER");
+       LOGI("LEAVE");
+}
+
+FacenetParser::~FacenetParser()
+{}
+
+void FacenetParser::parsePostprocess(shared_ptr<MetaInfo> meta_info, JsonObject *in_obj)
+{
+       LOGI("ENTER");
+
+       LOGI("tensor name : %s", meta_info->name.c_str());
+
+       if (json_object_has_member(in_obj, "box"))
+               _postprocessParser.parseBox(meta_info, in_obj);
+
+       LOGI("LEAVE");
+}
+
+}
+}
\ No newline at end of file
index 5d305f30f71b2c805e96c4587f1b08240a30eadd..1b5042b4d89338a8b9c33df29e0ae5ed4603284f 100644 (file)
@@ -19,6 +19,7 @@
 #include <memory>
 
 #include "face_recognition_adapter.h"
+#include "facenet_adapter.h"
 #include "mv_face_recognition_open.h"
 #include "machine_learning_exception.h"
 #include "context.h"
@@ -29,6 +30,7 @@ using namespace mediavision::machine_learning;
 using namespace mediavision::machine_learning::face_recognition;
 using namespace mediavision::machine_learning::exception;
 using FaceRecognitionTask = ITask<mv_face_recognition_input_s, mv_face_recognition_result_s>;
+using FacenetTask = ITask<facenet_input_s, facenet_output_s>;
 
 int mv_face_recognition_create_open(mv_face_recognition_h *handle)
 {
@@ -43,9 +45,18 @@ int mv_face_recognition_create_open(mv_face_recognition_h *handle)
                return MEDIA_VISION_ERROR_OUT_OF_MEMORY;
        }
 
-       FaceRecognitionTask *task = new (nothrow)
+       FaceRecognitionTask *face_recognition_task = new (nothrow)
                        FaceRecognitionAdapter<mv_face_recognition_input_s, mv_face_recognition_result_s>();
-       if (!task) {
+       if (!face_recognition_task) {
+               delete context;
+               LOGE("Fail to allocate a task.");
+               return MEDIA_VISION_ERROR_OUT_OF_MEMORY;
+       }
+
+       FacenetTask *facenet_task = new (nothrow)
+                       FacenetAdapter<facenet_input_s, facenet_output_s>();
+       if (!facenet_task) {
+               delete face_recognition_task;
                delete context;
                LOGE("Fail to allocate a task.");
                return MEDIA_VISION_ERROR_OUT_OF_MEMORY;
@@ -53,8 +64,19 @@ int mv_face_recognition_create_open(mv_face_recognition_h *handle)
 
        pair<map<string, void *>::iterator, bool> result;
 
-       result = context->__tasks.insert(pair<string, void *>("face_recognition", task));
+       result = context->__tasks.insert(pair<string, void *>("face_recognition", face_recognition_task));
        if (!result.second) {
+               delete facenet_task;
+               delete face_recognition_task;
+               delete context;
+               LOGE("Fail to register a new task. Same task already exists.");
+               return MEDIA_VISION_ERROR_INVALID_OPERATION;
+       }
+
+       result = context->__tasks.insert(pair<string, void *>("facenet", facenet_task));
+       if (!result.second) {
+               delete facenet_task;
+               delete face_recognition_task;
                delete context;
                LOGE("Fail to register a new task. Same task already exists.");
                return MEDIA_VISION_ERROR_INVALID_OPERATION;
@@ -78,8 +100,15 @@ int mv_face_recognition_destroy_open(mv_face_recognition_h handle)
        map<string, void *>::iterator iter;
 
        for (iter = context->__tasks.begin(); iter != context->__tasks.end(); ++iter) {
-               auto task = static_cast<FaceRecognitionTask *>(iter->second);
-               delete task;
+               if (iter->first.compare("face_recognition") == 0) {
+                       auto face_recognition_task = static_cast<FaceRecognitionTask *>(iter->second);
+                       delete face_recognition_task;
+               }
+
+               if (iter->first.compare("facenet") == 0) {
+                       auto facenet_task = static_cast<FacenetTask *>(iter->second);
+                       delete facenet_task;
+               }
        }
 
        delete context;
@@ -100,10 +129,13 @@ int mv_face_recognition_prepare_open(mv_face_recognition_h handle)
 
        try {
                Context *context = static_cast<Context *>(handle);
-               auto task = static_cast<FaceRecognitionTask *>(context->__tasks["face_recognition"]);
+               auto face_recognition_task = static_cast<FaceRecognitionTask *>(context->__tasks["face_recognition"]);
+               auto facenet_task = static_cast<FacenetTask *>(context->__tasks["facenet"]);
 
-               task->configure();
-               task->prepare();
+               face_recognition_task->configure();
+               facenet_task->configure();
+               face_recognition_task->prepare();
+               facenet_task->prepare();
        } catch (const BaseException &e) {
                LOGE("%s", e.what());
                return e.getError();
@@ -125,14 +157,22 @@ int mv_face_recognition_register_open(mv_face_recognition_h handle, mv_source_h
 
        try {
                Context *context = static_cast<Context *>(handle);
-               auto task = static_cast<FaceRecognitionTask *>(context->__tasks["face_recognition"]);
+               auto face_recognition_task = static_cast<FaceRecognitionTask *>(context->__tasks["face_recognition"]);
+               auto facenet_task = static_cast<FacenetTask *>(context->__tasks["facenet"]);
+
+               facenet_input_s facenet_input = { { source } };
 
-               mv_face_recognition_input_s input = { mode::REGISTER };
+               facenet_task->setInput(facenet_input);
+               facenet_task->perform();
 
-               input.register_src.clear();
-               input.register_src.insert(make_pair(source, string(label)));
-               task->setInput(input);
-               task->perform();
+               facenet_output_s &facenet_output = facenet_task->getOutput();
+               mv_face_recognition_input_s face_recognition_input = { .mode = mode::REGISTER };
+               face_recognition_register_input_s facenet_to_face_reg_input = { facenet_output.outputs[0], string(label) };
+
+               face_recognition_input.register_src.clear();
+               face_recognition_input.register_src.push_back(facenet_to_face_reg_input);
+               face_recognition_task->setInput(face_recognition_input);
+               face_recognition_task->perform();
        } catch (const BaseException &e) {
                LOGE("%s", e.what());
                return e.getError();
@@ -154,14 +194,14 @@ int mv_face_recognition_unregister_open(mv_face_recognition_h handle, const char
 
        try {
                Context *context = static_cast<Context *>(handle);
-               auto task = static_cast<FaceRecognitionTask *>(context->__tasks["face_recognition"]);
+               auto face_recognition_task = static_cast<FaceRecognitionTask *>(context->__tasks["face_recognition"]);
 
                mv_face_recognition_input_s input = { mode::DELETE };
 
                input.labels.clear();
                input.labels.push_back(string(label));
-               task->setInput(input);
-               task->perform();
+               face_recognition_task->setInput(input);
+               face_recognition_task->perform();
        } catch (const BaseException &e) {
                LOGE("%s", e.what());
                return e.getError();
@@ -183,13 +223,20 @@ int mv_face_recognition_inference_open(mv_face_recognition_h handle, mv_source_h
 
        try {
                Context *context = static_cast<Context *>(handle);
-               auto task = static_cast<FaceRecognitionTask *>(context->__tasks["face_recognition"]);
+               auto face_recognition_task = static_cast<FaceRecognitionTask *>(context->__tasks["face_recognition"]);
+               auto facenet_task = static_cast<FacenetTask *>(context->__tasks["facenet"]);
+
+               facenet_input_s facenet_input = { { source } };
+
+               facenet_task->setInput(facenet_input);
+               facenet_task->perform();
+               facenet_output_s &facenet_output = facenet_task->getOutput();
 
-               mv_face_recognition_input_s input = { mode::INFERENCE };
+               mv_face_recognition_input_s face_recognition_input = { mode::INFERENCE };
 
-               input.inference_src = source;
-               task->setInput(input);
-               task->perform();
+               face_recognition_input.inputs = facenet_output.outputs;
+               face_recognition_task->setInput(face_recognition_input);
+               face_recognition_task->perform();
        } catch (const BaseException &e) {
                LOGE("%s", e.what());
                return e.getError();
@@ -211,9 +258,9 @@ int mv_face_recognition_get_label_open(mv_face_recognition_h handle, const char
 
        try {
                Context *context = static_cast<Context *>(handle);
-               auto task = static_cast<FaceRecognitionTask *>(context->__tasks["face_recognition"]);
+               auto face_recognition_task = static_cast<FaceRecognitionTask *>(context->__tasks["face_recognition"]);
 
-               *out_label = task->getOutput().label.c_str();
+               *out_label = face_recognition_task->getOutput().label.c_str();
        } catch (const BaseException &e) {
                LOGE("%s", e.what());
                return e.getError();
index 4cfc73e51da6747506d8eada536d29a393ac23dc..b8093d61d36b54fc42957323cc1e8a5256e48b01 100644 (file)
@@ -258,17 +258,7 @@ public:
                 */
        int Run(std::vector<mv_source_h> &mvSources, std::vector<mv_rectangle_s> &rects);
 
-       /**
-                * @brief       Runs inference with a region of a given image
-                * @details Use this function to run forward pass with the given image.
-                *          The given image is preprocessed and the region of the image is
-                *          thrown to neural network. Then, the output tensor is returned.
-                *          If roi is NULL, then full source will be analyzed.
-                *
-                * @since_tizen 7.0
-                * @return @c true on success, otherwise a negative error value
-                */
-       int Run(std::vector<void *> &buffer_objs);
+       int Run(std::vector<std::vector<float>> &input_tensors);
 
        int Run();
 
index 320c116013a2e50dca65ef8ac46ef35df8aef751..5f5c40c360881448eae425ed77f175e73938e82c 100644 (file)
@@ -1020,7 +1020,7 @@ int Inference::Run(std::vector<mv_source_h> &mvSources, std::vector<mv_rectangle
        return ConvertOutputDataTypeToFloat();
 }
 
-int Inference::Run(std::vector<void *> &buffer_objs)
+int Inference::Run(std::vector<std::vector<float>> &input_tensors)
 {
        int ret = INFERENCE_ENGINE_ERROR_NONE;
 
@@ -1029,18 +1029,18 @@ int Inference::Run(std::vector<void *> &buffer_objs)
                return MEDIA_VISION_ERROR_INVALID_OPERATION;
        }
 
-       if (buffer_objs.empty()) {
+       if (input_tensors.empty()) {
                LOGE("cvSources should contain only one cv source.");
                return MEDIA_VISION_ERROR_INVALID_PARAMETER;
        }
 
        // We are able to request Only one input data for the inference as of now.
-       if (buffer_objs.size() > 1) {
+       if (input_tensors.size() > 1) {
                LOGE("It allows only one source for the inference.");
                return MEDIA_VISION_ERROR_INVALID_PARAMETER;
        }
 
-       if (mInputTensorBuffers.getIETensorBuffer().size() != buffer_objs.size()) {
+       if (mInputTensorBuffers.getIETensorBuffer().size() != input_tensors.size()) {
                LOGE("Raw source count is not invalid.");
                return MEDIA_VISION_ERROR_INVALID_PARAMETER;
        }
@@ -1049,17 +1049,17 @@ int Inference::Run(std::vector<void *> &buffer_objs)
 
        for (auto &buffer : mInputTensorBuffers.getIETensorBuffer()) {
                inference_engine_tensor_buffer &tensor_buffer = buffer.second;
-               inference_engine_tensor_buffer *buffer_obj =
-                               static_cast<inference_engine_tensor_buffer *>(buffer_objs[buffer_idx]);
+               std::vector<float> &input_tensor = input_tensors[buffer_idx];
+               const size_t input_tensor_size = input_tensor.size() * sizeof(float);
 
-               if (tensor_buffer.size != buffer_obj->size) {
-                       LOGE("Raw buffer size is invalid.");
+               if (tensor_buffer.size != input_tensor_size) {
+                       LOGE("Raw buffer size is invalid.(%u vs %u)", tensor_buffer.size, input_tensor_size);
                        return MEDIA_VISION_ERROR_INVALID_PARAMETER;
                }
 
-               LOGI("A number of tensor bytes : %zu", buffer_obj->size);
+               LOGI("A number of tensor bytes : %zu", input_tensor_size);
 
-               memcpy(tensor_buffer.buffer, buffer_obj->buffer, tensor_buffer.size);
+               memcpy(tensor_buffer.buffer, input_tensor.data(), input_tensor_size);
                buffer_idx++;
        }